make health check enable completion check if chat is failed
This commit is contained in:
parent
9b08f1fcee
commit
cc5711bb5e
4 changed files with 151 additions and 110 deletions
|
|
@ -1,136 +1,153 @@
|
|||
import {
|
||||
type UseQueryOptions,
|
||||
useQuery,
|
||||
useQueryClient,
|
||||
type UseQueryOptions,
|
||||
useQuery,
|
||||
useQueryClient,
|
||||
} from "@tanstack/react-query";
|
||||
import { useChat } from "@/contexts/chat-context";
|
||||
import { useGetSettingsQuery } from "./useGetSettingsQuery";
|
||||
|
||||
export interface ProviderHealthDetails {
|
||||
llm_model: string;
|
||||
embedding_model: string;
|
||||
endpoint?: string | null;
|
||||
llm_model: string;
|
||||
embedding_model: string;
|
||||
endpoint?: string | null;
|
||||
}
|
||||
|
||||
export interface ProviderHealthResponse {
|
||||
status: "healthy" | "unhealthy" | "error" | "backend-unavailable";
|
||||
message: string;
|
||||
provider?: string;
|
||||
llm_provider?: string;
|
||||
embedding_provider?: string;
|
||||
llm_error?: string | null;
|
||||
embedding_error?: string | null;
|
||||
details?: ProviderHealthDetails;
|
||||
status: "healthy" | "unhealthy" | "error" | "backend-unavailable";
|
||||
message: string;
|
||||
provider?: string;
|
||||
llm_provider?: string;
|
||||
embedding_provider?: string;
|
||||
llm_error?: string | null;
|
||||
embedding_error?: string | null;
|
||||
details?: ProviderHealthDetails;
|
||||
}
|
||||
|
||||
export interface ProviderHealthParams {
|
||||
provider?: "openai" | "ollama" | "watsonx";
|
||||
provider?: "openai" | "ollama" | "watsonx";
|
||||
test_completion?: boolean;
|
||||
}
|
||||
|
||||
// Track consecutive failures for exponential backoff
|
||||
const failureCountMap = new Map<string, number>();
|
||||
|
||||
export const useProviderHealthQuery = (
|
||||
params?: ProviderHealthParams,
|
||||
options?: Omit<
|
||||
UseQueryOptions<ProviderHealthResponse, Error>,
|
||||
"queryKey" | "queryFn"
|
||||
>,
|
||||
params?: ProviderHealthParams,
|
||||
options?: Omit<
|
||||
UseQueryOptions<ProviderHealthResponse, Error>,
|
||||
"queryKey" | "queryFn"
|
||||
>,
|
||||
) => {
|
||||
const queryClient = useQueryClient();
|
||||
const queryClient = useQueryClient();
|
||||
|
||||
const { data: settings = {} } = useGetSettingsQuery();
|
||||
// Get chat error state from context (ChatProvider wraps the entire app in layout.tsx)
|
||||
const { hasChatError, setChatError } = useChat();
|
||||
|
||||
async function checkProviderHealth(): Promise<ProviderHealthResponse> {
|
||||
try {
|
||||
const url = new URL("/api/provider/health", window.location.origin);
|
||||
const { data: settings = {} } = useGetSettingsQuery();
|
||||
|
||||
// Add provider query param if specified
|
||||
if (params?.provider) {
|
||||
url.searchParams.set("provider", params.provider);
|
||||
}
|
||||
async function checkProviderHealth(): Promise<ProviderHealthResponse> {
|
||||
try {
|
||||
const url = new URL("/api/provider/health", window.location.origin);
|
||||
|
||||
const response = await fetch(url.toString());
|
||||
// Add provider query param if specified
|
||||
if (params?.provider) {
|
||||
url.searchParams.set("provider", params.provider);
|
||||
}
|
||||
|
||||
if (response.ok) {
|
||||
return await response.json();
|
||||
} else if (response.status === 503) {
|
||||
// Backend is up but provider validation failed
|
||||
const errorData = await response.json().catch(() => ({}));
|
||||
return {
|
||||
status: "unhealthy",
|
||||
message: errorData.message || "Provider validation failed",
|
||||
provider: errorData.provider || params?.provider || "unknown",
|
||||
llm_provider: errorData.llm_provider,
|
||||
embedding_provider: errorData.embedding_provider,
|
||||
llm_error: errorData.llm_error,
|
||||
embedding_error: errorData.embedding_error,
|
||||
details: errorData.details,
|
||||
};
|
||||
} else {
|
||||
// Other backend errors (400, etc.) - treat as provider issues
|
||||
const errorData = await response.json().catch(() => ({}));
|
||||
return {
|
||||
status: "error",
|
||||
message: errorData.message || "Failed to check provider health",
|
||||
provider: errorData.provider || params?.provider || "unknown",
|
||||
llm_provider: errorData.llm_provider,
|
||||
embedding_provider: errorData.embedding_provider,
|
||||
llm_error: errorData.llm_error,
|
||||
embedding_error: errorData.embedding_error,
|
||||
details: errorData.details,
|
||||
};
|
||||
}
|
||||
} catch (error) {
|
||||
// Network error - backend is likely down, don't show provider banner
|
||||
return {
|
||||
status: "backend-unavailable",
|
||||
message: error instanceof Error ? error.message : "Connection failed",
|
||||
provider: params?.provider || "unknown",
|
||||
};
|
||||
}
|
||||
}
|
||||
// Add test_completion query param if specified or if chat error exists
|
||||
const testCompletion = params?.test_completion ?? hasChatError;
|
||||
if (testCompletion) {
|
||||
url.searchParams.set("test_completion", "true");
|
||||
}
|
||||
|
||||
const queryKey = ["provider", "health"];
|
||||
const failureCountKey = queryKey.join("-");
|
||||
const response = await fetch(url.toString());
|
||||
|
||||
const queryResult = useQuery(
|
||||
{
|
||||
queryKey,
|
||||
queryFn: checkProviderHealth,
|
||||
retry: false, // Don't retry health checks automatically
|
||||
refetchInterval: (query) => {
|
||||
const data = query.state.data;
|
||||
const status = data?.status;
|
||||
|
||||
// If healthy, reset failure count and check every 30 seconds
|
||||
if (status === "healthy") {
|
||||
failureCountMap.set(failureCountKey, 0);
|
||||
return 30000;
|
||||
}
|
||||
|
||||
// If backend unavailable, use moderate polling
|
||||
if (status === "backend-unavailable") {
|
||||
return 15000;
|
||||
}
|
||||
|
||||
// For unhealthy/error status, use exponential backoff
|
||||
const currentFailures = failureCountMap.get(failureCountKey) || 0;
|
||||
failureCountMap.set(failureCountKey, currentFailures + 1);
|
||||
|
||||
// Exponential backoff: 5s, 10s, 20s, then 30s
|
||||
const backoffDelays = [5000, 10000, 20000, 30000];
|
||||
const delay = backoffDelays[Math.min(currentFailures, backoffDelays.length - 1)];
|
||||
|
||||
return delay;
|
||||
},
|
||||
refetchOnWindowFocus: false, // Disabled to reduce unnecessary calls on tab switches
|
||||
refetchOnMount: true,
|
||||
staleTime: 30000, // Consider data stale after 30 seconds
|
||||
enabled: !!settings?.edited && options?.enabled !== false, // Only run after onboarding is complete
|
||||
...options,
|
||||
},
|
||||
queryClient,
|
||||
);
|
||||
if (response.ok) {
|
||||
return await response.json();
|
||||
} else if (response.status === 503) {
|
||||
// Backend is up but provider validation failed
|
||||
const errorData = await response.json().catch(() => ({}));
|
||||
return {
|
||||
status: "unhealthy",
|
||||
message: errorData.message || "Provider validation failed",
|
||||
provider: errorData.provider || params?.provider || "unknown",
|
||||
llm_provider: errorData.llm_provider,
|
||||
embedding_provider: errorData.embedding_provider,
|
||||
llm_error: errorData.llm_error,
|
||||
embedding_error: errorData.embedding_error,
|
||||
details: errorData.details,
|
||||
};
|
||||
} else {
|
||||
// Other backend errors (400, etc.) - treat as provider issues
|
||||
const errorData = await response.json().catch(() => ({}));
|
||||
return {
|
||||
status: "error",
|
||||
message: errorData.message || "Failed to check provider health",
|
||||
provider: errorData.provider || params?.provider || "unknown",
|
||||
llm_provider: errorData.llm_provider,
|
||||
embedding_provider: errorData.embedding_provider,
|
||||
llm_error: errorData.llm_error,
|
||||
embedding_error: errorData.embedding_error,
|
||||
details: errorData.details,
|
||||
};
|
||||
}
|
||||
} catch (error) {
|
||||
// Network error - backend is likely down, don't show provider banner
|
||||
return {
|
||||
status: "backend-unavailable",
|
||||
message: error instanceof Error ? error.message : "Connection failed",
|
||||
provider: params?.provider || "unknown",
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
return queryResult;
|
||||
const queryKey = ["provider", "health", params?.test_completion];
|
||||
const failureCountKey = queryKey.join("-");
|
||||
|
||||
const queryResult = useQuery(
|
||||
{
|
||||
queryKey,
|
||||
queryFn: checkProviderHealth,
|
||||
retry: false, // Don't retry health checks automatically
|
||||
refetchInterval: (query) => {
|
||||
const data = query.state.data;
|
||||
const status = data?.status;
|
||||
|
||||
// If healthy, reset failure count and check every 30 seconds
|
||||
// Also reset chat error flag if we're using test_completion=true and it succeeded
|
||||
if (status === "healthy") {
|
||||
failureCountMap.set(failureCountKey, 0);
|
||||
// If we were checking with test_completion=true due to chat errors, reset the flag
|
||||
if (hasChatError && setChatError) {
|
||||
setChatError(false);
|
||||
}
|
||||
return 30000;
|
||||
}
|
||||
|
||||
// If backend unavailable, use moderate polling
|
||||
if (status === "backend-unavailable") {
|
||||
return 15000;
|
||||
}
|
||||
|
||||
// For unhealthy/error status, use exponential backoff
|
||||
const currentFailures = failureCountMap.get(failureCountKey) || 0;
|
||||
failureCountMap.set(failureCountKey, currentFailures + 1);
|
||||
|
||||
// Exponential backoff: 5s, 10s, 20s, then 30s
|
||||
const backoffDelays = [5000, 10000, 20000, 30000];
|
||||
const delay =
|
||||
backoffDelays[Math.min(currentFailures, backoffDelays.length - 1)];
|
||||
|
||||
return delay;
|
||||
},
|
||||
refetchOnWindowFocus: false, // Disabled to reduce unnecessary calls on tab switches
|
||||
refetchOnMount: true,
|
||||
staleTime: 30000, // Consider data stale after 30 seconds
|
||||
enabled: !!settings?.edited && options?.enabled !== false, // Only run after onboarding is complete
|
||||
...options,
|
||||
},
|
||||
queryClient,
|
||||
);
|
||||
|
||||
return queryResult;
|
||||
};
|
||||
|
|
|
|||
|
|
@ -51,6 +51,7 @@ function ChatPage() {
|
|||
]);
|
||||
const [input, setInput] = useState("");
|
||||
const { loading, setLoading } = useLoadingStore();
|
||||
const { setChatError } = useChat();
|
||||
const [asyncMode, setAsyncMode] = useState(true);
|
||||
const [expandedFunctionCalls, setExpandedFunctionCalls] = useState<
|
||||
Set<string>
|
||||
|
|
@ -123,6 +124,8 @@ function ChatPage() {
|
|||
console.error("Streaming error:", error);
|
||||
setLoading(false);
|
||||
setWaitingTooLong(false);
|
||||
// Set chat error flag to trigger test_completion=true on health checks
|
||||
setChatError(true);
|
||||
const errorMessage: Message = {
|
||||
role: "assistant",
|
||||
content:
|
||||
|
|
@ -197,6 +200,11 @@ function ChatPage() {
|
|||
const result = await response.json();
|
||||
console.log("Upload result:", result);
|
||||
|
||||
if (!response.ok) {
|
||||
// Set chat error flag if upload fails
|
||||
setChatError(true);
|
||||
}
|
||||
|
||||
if (response.status === 201) {
|
||||
// New flow: Got task ID, start tracking with centralized system
|
||||
const taskId = result.task_id || result.id;
|
||||
|
|
@ -255,6 +263,8 @@ function ChatPage() {
|
|||
}
|
||||
} catch (error) {
|
||||
console.error("Upload failed:", error);
|
||||
// Set chat error flag to trigger test_completion=true on health checks
|
||||
setChatError(true);
|
||||
const errorMessage: Message = {
|
||||
role: "assistant",
|
||||
content: `❌ Failed to process document. Please try again.`,
|
||||
|
|
@ -858,6 +868,8 @@ function ChatPage() {
|
|||
}
|
||||
} else {
|
||||
console.error("Chat failed:", result.error);
|
||||
// Set chat error flag to trigger test_completion=true on health checks
|
||||
setChatError(true);
|
||||
const errorMessage: Message = {
|
||||
role: "assistant",
|
||||
content: "Sorry, I encountered an error. Please try again.",
|
||||
|
|
@ -867,6 +879,8 @@ function ChatPage() {
|
|||
}
|
||||
} catch (error) {
|
||||
console.error("Chat error:", error);
|
||||
// Set chat error flag to trigger test_completion=true on health checks
|
||||
setChatError(true);
|
||||
const errorMessage: Message = {
|
||||
role: "assistant",
|
||||
content:
|
||||
|
|
|
|||
|
|
@ -6,6 +6,7 @@ import { useProviderHealthQuery } from "@/app/api/queries/useProviderHealthQuery
|
|||
import type { ModelProvider } from "@/app/settings/_helpers/model-helpers";
|
||||
import { Banner, BannerIcon, BannerTitle } from "@/components/ui/banner";
|
||||
import { cn } from "@/lib/utils";
|
||||
import { useChat } from "@/contexts/chat-context";
|
||||
import { Button } from "./ui/button";
|
||||
|
||||
interface ProviderHealthBannerProps {
|
||||
|
|
@ -14,13 +15,16 @@ interface ProviderHealthBannerProps {
|
|||
|
||||
// Custom hook to check provider health status
|
||||
export function useProviderHealth() {
|
||||
const { hasChatError } = useChat();
|
||||
const {
|
||||
data: health,
|
||||
isLoading,
|
||||
isFetching,
|
||||
error,
|
||||
isError,
|
||||
} = useProviderHealthQuery();
|
||||
} = useProviderHealthQuery({
|
||||
test_completion: hasChatError, // Use test_completion=true when chat errors occur
|
||||
});
|
||||
|
||||
const isHealthy = health?.status === "healthy" && !isError;
|
||||
// Only consider unhealthy if backend is up but provider validation failed
|
||||
|
|
|
|||
|
|
@ -79,6 +79,8 @@ interface ChatContextType {
|
|||
conversationFilter: KnowledgeFilter | null;
|
||||
// responseId: undefined = use currentConversationId, null = don't save to localStorage
|
||||
setConversationFilter: (filter: KnowledgeFilter | null, responseId?: string | null) => void;
|
||||
hasChatError: boolean;
|
||||
setChatError: (hasError: boolean) => void;
|
||||
}
|
||||
|
||||
const ChatContext = createContext<ChatContextType | undefined>(undefined);
|
||||
|
|
@ -108,6 +110,7 @@ export function ChatProvider({ children }: ChatProviderProps) {
|
|||
const [conversationLoaded, setConversationLoaded] = useState(false);
|
||||
const [conversationFilter, setConversationFilterState] =
|
||||
useState<KnowledgeFilter | null>(null);
|
||||
const [hasChatError, setChatError] = useState(false);
|
||||
|
||||
// Debounce refresh requests to prevent excessive reloads
|
||||
const refreshTimeoutRef = useRef<NodeJS.Timeout | null>(null);
|
||||
|
|
@ -358,6 +361,8 @@ export function ChatProvider({ children }: ChatProviderProps) {
|
|||
setConversationLoaded,
|
||||
conversationFilter,
|
||||
setConversationFilter,
|
||||
hasChatError,
|
||||
setChatError,
|
||||
}),
|
||||
[
|
||||
endpoint,
|
||||
|
|
@ -378,6 +383,7 @@ export function ChatProvider({ children }: ChatProviderProps) {
|
|||
conversationLoaded,
|
||||
conversationFilter,
|
||||
setConversationFilter,
|
||||
hasChatError,
|
||||
],
|
||||
);
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue