make health check enable completion check if chat is failed
This commit is contained in:
parent
9b08f1fcee
commit
cc5711bb5e
4 changed files with 151 additions and 110 deletions
|
|
@ -1,136 +1,153 @@
|
||||||
import {
|
import {
|
||||||
type UseQueryOptions,
|
type UseQueryOptions,
|
||||||
useQuery,
|
useQuery,
|
||||||
useQueryClient,
|
useQueryClient,
|
||||||
} from "@tanstack/react-query";
|
} from "@tanstack/react-query";
|
||||||
|
import { useChat } from "@/contexts/chat-context";
|
||||||
import { useGetSettingsQuery } from "./useGetSettingsQuery";
|
import { useGetSettingsQuery } from "./useGetSettingsQuery";
|
||||||
|
|
||||||
export interface ProviderHealthDetails {
|
export interface ProviderHealthDetails {
|
||||||
llm_model: string;
|
llm_model: string;
|
||||||
embedding_model: string;
|
embedding_model: string;
|
||||||
endpoint?: string | null;
|
endpoint?: string | null;
|
||||||
}
|
}
|
||||||
|
|
||||||
export interface ProviderHealthResponse {
|
export interface ProviderHealthResponse {
|
||||||
status: "healthy" | "unhealthy" | "error" | "backend-unavailable";
|
status: "healthy" | "unhealthy" | "error" | "backend-unavailable";
|
||||||
message: string;
|
message: string;
|
||||||
provider?: string;
|
provider?: string;
|
||||||
llm_provider?: string;
|
llm_provider?: string;
|
||||||
embedding_provider?: string;
|
embedding_provider?: string;
|
||||||
llm_error?: string | null;
|
llm_error?: string | null;
|
||||||
embedding_error?: string | null;
|
embedding_error?: string | null;
|
||||||
details?: ProviderHealthDetails;
|
details?: ProviderHealthDetails;
|
||||||
}
|
}
|
||||||
|
|
||||||
export interface ProviderHealthParams {
|
export interface ProviderHealthParams {
|
||||||
provider?: "openai" | "ollama" | "watsonx";
|
provider?: "openai" | "ollama" | "watsonx";
|
||||||
|
test_completion?: boolean;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Track consecutive failures for exponential backoff
|
// Track consecutive failures for exponential backoff
|
||||||
const failureCountMap = new Map<string, number>();
|
const failureCountMap = new Map<string, number>();
|
||||||
|
|
||||||
export const useProviderHealthQuery = (
|
export const useProviderHealthQuery = (
|
||||||
params?: ProviderHealthParams,
|
params?: ProviderHealthParams,
|
||||||
options?: Omit<
|
options?: Omit<
|
||||||
UseQueryOptions<ProviderHealthResponse, Error>,
|
UseQueryOptions<ProviderHealthResponse, Error>,
|
||||||
"queryKey" | "queryFn"
|
"queryKey" | "queryFn"
|
||||||
>,
|
>,
|
||||||
) => {
|
) => {
|
||||||
const queryClient = useQueryClient();
|
const queryClient = useQueryClient();
|
||||||
|
|
||||||
const { data: settings = {} } = useGetSettingsQuery();
|
// Get chat error state from context (ChatProvider wraps the entire app in layout.tsx)
|
||||||
|
const { hasChatError, setChatError } = useChat();
|
||||||
|
|
||||||
async function checkProviderHealth(): Promise<ProviderHealthResponse> {
|
const { data: settings = {} } = useGetSettingsQuery();
|
||||||
try {
|
|
||||||
const url = new URL("/api/provider/health", window.location.origin);
|
|
||||||
|
|
||||||
// Add provider query param if specified
|
async function checkProviderHealth(): Promise<ProviderHealthResponse> {
|
||||||
if (params?.provider) {
|
try {
|
||||||
url.searchParams.set("provider", params.provider);
|
const url = new URL("/api/provider/health", window.location.origin);
|
||||||
}
|
|
||||||
|
|
||||||
const response = await fetch(url.toString());
|
// Add provider query param if specified
|
||||||
|
if (params?.provider) {
|
||||||
|
url.searchParams.set("provider", params.provider);
|
||||||
|
}
|
||||||
|
|
||||||
if (response.ok) {
|
// Add test_completion query param if specified or if chat error exists
|
||||||
return await response.json();
|
const testCompletion = params?.test_completion ?? hasChatError;
|
||||||
} else if (response.status === 503) {
|
if (testCompletion) {
|
||||||
// Backend is up but provider validation failed
|
url.searchParams.set("test_completion", "true");
|
||||||
const errorData = await response.json().catch(() => ({}));
|
}
|
||||||
return {
|
|
||||||
status: "unhealthy",
|
|
||||||
message: errorData.message || "Provider validation failed",
|
|
||||||
provider: errorData.provider || params?.provider || "unknown",
|
|
||||||
llm_provider: errorData.llm_provider,
|
|
||||||
embedding_provider: errorData.embedding_provider,
|
|
||||||
llm_error: errorData.llm_error,
|
|
||||||
embedding_error: errorData.embedding_error,
|
|
||||||
details: errorData.details,
|
|
||||||
};
|
|
||||||
} else {
|
|
||||||
// Other backend errors (400, etc.) - treat as provider issues
|
|
||||||
const errorData = await response.json().catch(() => ({}));
|
|
||||||
return {
|
|
||||||
status: "error",
|
|
||||||
message: errorData.message || "Failed to check provider health",
|
|
||||||
provider: errorData.provider || params?.provider || "unknown",
|
|
||||||
llm_provider: errorData.llm_provider,
|
|
||||||
embedding_provider: errorData.embedding_provider,
|
|
||||||
llm_error: errorData.llm_error,
|
|
||||||
embedding_error: errorData.embedding_error,
|
|
||||||
details: errorData.details,
|
|
||||||
};
|
|
||||||
}
|
|
||||||
} catch (error) {
|
|
||||||
// Network error - backend is likely down, don't show provider banner
|
|
||||||
return {
|
|
||||||
status: "backend-unavailable",
|
|
||||||
message: error instanceof Error ? error.message : "Connection failed",
|
|
||||||
provider: params?.provider || "unknown",
|
|
||||||
};
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
const queryKey = ["provider", "health"];
|
const response = await fetch(url.toString());
|
||||||
const failureCountKey = queryKey.join("-");
|
|
||||||
|
|
||||||
const queryResult = useQuery(
|
if (response.ok) {
|
||||||
{
|
return await response.json();
|
||||||
queryKey,
|
} else if (response.status === 503) {
|
||||||
queryFn: checkProviderHealth,
|
// Backend is up but provider validation failed
|
||||||
retry: false, // Don't retry health checks automatically
|
const errorData = await response.json().catch(() => ({}));
|
||||||
refetchInterval: (query) => {
|
return {
|
||||||
const data = query.state.data;
|
status: "unhealthy",
|
||||||
const status = data?.status;
|
message: errorData.message || "Provider validation failed",
|
||||||
|
provider: errorData.provider || params?.provider || "unknown",
|
||||||
// If healthy, reset failure count and check every 30 seconds
|
llm_provider: errorData.llm_provider,
|
||||||
if (status === "healthy") {
|
embedding_provider: errorData.embedding_provider,
|
||||||
failureCountMap.set(failureCountKey, 0);
|
llm_error: errorData.llm_error,
|
||||||
return 30000;
|
embedding_error: errorData.embedding_error,
|
||||||
}
|
details: errorData.details,
|
||||||
|
};
|
||||||
// If backend unavailable, use moderate polling
|
} else {
|
||||||
if (status === "backend-unavailable") {
|
// Other backend errors (400, etc.) - treat as provider issues
|
||||||
return 15000;
|
const errorData = await response.json().catch(() => ({}));
|
||||||
}
|
return {
|
||||||
|
status: "error",
|
||||||
// For unhealthy/error status, use exponential backoff
|
message: errorData.message || "Failed to check provider health",
|
||||||
const currentFailures = failureCountMap.get(failureCountKey) || 0;
|
provider: errorData.provider || params?.provider || "unknown",
|
||||||
failureCountMap.set(failureCountKey, currentFailures + 1);
|
llm_provider: errorData.llm_provider,
|
||||||
|
embedding_provider: errorData.embedding_provider,
|
||||||
// Exponential backoff: 5s, 10s, 20s, then 30s
|
llm_error: errorData.llm_error,
|
||||||
const backoffDelays = [5000, 10000, 20000, 30000];
|
embedding_error: errorData.embedding_error,
|
||||||
const delay = backoffDelays[Math.min(currentFailures, backoffDelays.length - 1)];
|
details: errorData.details,
|
||||||
|
};
|
||||||
return delay;
|
}
|
||||||
},
|
} catch (error) {
|
||||||
refetchOnWindowFocus: false, // Disabled to reduce unnecessary calls on tab switches
|
// Network error - backend is likely down, don't show provider banner
|
||||||
refetchOnMount: true,
|
return {
|
||||||
staleTime: 30000, // Consider data stale after 30 seconds
|
status: "backend-unavailable",
|
||||||
enabled: !!settings?.edited && options?.enabled !== false, // Only run after onboarding is complete
|
message: error instanceof Error ? error.message : "Connection failed",
|
||||||
...options,
|
provider: params?.provider || "unknown",
|
||||||
},
|
};
|
||||||
queryClient,
|
}
|
||||||
);
|
}
|
||||||
|
|
||||||
return queryResult;
|
const queryKey = ["provider", "health", params?.test_completion];
|
||||||
|
const failureCountKey = queryKey.join("-");
|
||||||
|
|
||||||
|
const queryResult = useQuery(
|
||||||
|
{
|
||||||
|
queryKey,
|
||||||
|
queryFn: checkProviderHealth,
|
||||||
|
retry: false, // Don't retry health checks automatically
|
||||||
|
refetchInterval: (query) => {
|
||||||
|
const data = query.state.data;
|
||||||
|
const status = data?.status;
|
||||||
|
|
||||||
|
// If healthy, reset failure count and check every 30 seconds
|
||||||
|
// Also reset chat error flag if we're using test_completion=true and it succeeded
|
||||||
|
if (status === "healthy") {
|
||||||
|
failureCountMap.set(failureCountKey, 0);
|
||||||
|
// If we were checking with test_completion=true due to chat errors, reset the flag
|
||||||
|
if (hasChatError && setChatError) {
|
||||||
|
setChatError(false);
|
||||||
|
}
|
||||||
|
return 30000;
|
||||||
|
}
|
||||||
|
|
||||||
|
// If backend unavailable, use moderate polling
|
||||||
|
if (status === "backend-unavailable") {
|
||||||
|
return 15000;
|
||||||
|
}
|
||||||
|
|
||||||
|
// For unhealthy/error status, use exponential backoff
|
||||||
|
const currentFailures = failureCountMap.get(failureCountKey) || 0;
|
||||||
|
failureCountMap.set(failureCountKey, currentFailures + 1);
|
||||||
|
|
||||||
|
// Exponential backoff: 5s, 10s, 20s, then 30s
|
||||||
|
const backoffDelays = [5000, 10000, 20000, 30000];
|
||||||
|
const delay =
|
||||||
|
backoffDelays[Math.min(currentFailures, backoffDelays.length - 1)];
|
||||||
|
|
||||||
|
return delay;
|
||||||
|
},
|
||||||
|
refetchOnWindowFocus: false, // Disabled to reduce unnecessary calls on tab switches
|
||||||
|
refetchOnMount: true,
|
||||||
|
staleTime: 30000, // Consider data stale after 30 seconds
|
||||||
|
enabled: !!settings?.edited && options?.enabled !== false, // Only run after onboarding is complete
|
||||||
|
...options,
|
||||||
|
},
|
||||||
|
queryClient,
|
||||||
|
);
|
||||||
|
|
||||||
|
return queryResult;
|
||||||
};
|
};
|
||||||
|
|
|
||||||
|
|
@ -51,6 +51,7 @@ function ChatPage() {
|
||||||
]);
|
]);
|
||||||
const [input, setInput] = useState("");
|
const [input, setInput] = useState("");
|
||||||
const { loading, setLoading } = useLoadingStore();
|
const { loading, setLoading } = useLoadingStore();
|
||||||
|
const { setChatError } = useChat();
|
||||||
const [asyncMode, setAsyncMode] = useState(true);
|
const [asyncMode, setAsyncMode] = useState(true);
|
||||||
const [expandedFunctionCalls, setExpandedFunctionCalls] = useState<
|
const [expandedFunctionCalls, setExpandedFunctionCalls] = useState<
|
||||||
Set<string>
|
Set<string>
|
||||||
|
|
@ -123,6 +124,8 @@ function ChatPage() {
|
||||||
console.error("Streaming error:", error);
|
console.error("Streaming error:", error);
|
||||||
setLoading(false);
|
setLoading(false);
|
||||||
setWaitingTooLong(false);
|
setWaitingTooLong(false);
|
||||||
|
// Set chat error flag to trigger test_completion=true on health checks
|
||||||
|
setChatError(true);
|
||||||
const errorMessage: Message = {
|
const errorMessage: Message = {
|
||||||
role: "assistant",
|
role: "assistant",
|
||||||
content:
|
content:
|
||||||
|
|
@ -197,6 +200,11 @@ function ChatPage() {
|
||||||
const result = await response.json();
|
const result = await response.json();
|
||||||
console.log("Upload result:", result);
|
console.log("Upload result:", result);
|
||||||
|
|
||||||
|
if (!response.ok) {
|
||||||
|
// Set chat error flag if upload fails
|
||||||
|
setChatError(true);
|
||||||
|
}
|
||||||
|
|
||||||
if (response.status === 201) {
|
if (response.status === 201) {
|
||||||
// New flow: Got task ID, start tracking with centralized system
|
// New flow: Got task ID, start tracking with centralized system
|
||||||
const taskId = result.task_id || result.id;
|
const taskId = result.task_id || result.id;
|
||||||
|
|
@ -255,6 +263,8 @@ function ChatPage() {
|
||||||
}
|
}
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
console.error("Upload failed:", error);
|
console.error("Upload failed:", error);
|
||||||
|
// Set chat error flag to trigger test_completion=true on health checks
|
||||||
|
setChatError(true);
|
||||||
const errorMessage: Message = {
|
const errorMessage: Message = {
|
||||||
role: "assistant",
|
role: "assistant",
|
||||||
content: `❌ Failed to process document. Please try again.`,
|
content: `❌ Failed to process document. Please try again.`,
|
||||||
|
|
@ -858,6 +868,8 @@ function ChatPage() {
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
console.error("Chat failed:", result.error);
|
console.error("Chat failed:", result.error);
|
||||||
|
// Set chat error flag to trigger test_completion=true on health checks
|
||||||
|
setChatError(true);
|
||||||
const errorMessage: Message = {
|
const errorMessage: Message = {
|
||||||
role: "assistant",
|
role: "assistant",
|
||||||
content: "Sorry, I encountered an error. Please try again.",
|
content: "Sorry, I encountered an error. Please try again.",
|
||||||
|
|
@ -867,6 +879,8 @@ function ChatPage() {
|
||||||
}
|
}
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
console.error("Chat error:", error);
|
console.error("Chat error:", error);
|
||||||
|
// Set chat error flag to trigger test_completion=true on health checks
|
||||||
|
setChatError(true);
|
||||||
const errorMessage: Message = {
|
const errorMessage: Message = {
|
||||||
role: "assistant",
|
role: "assistant",
|
||||||
content:
|
content:
|
||||||
|
|
|
||||||
|
|
@ -6,6 +6,7 @@ import { useProviderHealthQuery } from "@/app/api/queries/useProviderHealthQuery
|
||||||
import type { ModelProvider } from "@/app/settings/_helpers/model-helpers";
|
import type { ModelProvider } from "@/app/settings/_helpers/model-helpers";
|
||||||
import { Banner, BannerIcon, BannerTitle } from "@/components/ui/banner";
|
import { Banner, BannerIcon, BannerTitle } from "@/components/ui/banner";
|
||||||
import { cn } from "@/lib/utils";
|
import { cn } from "@/lib/utils";
|
||||||
|
import { useChat } from "@/contexts/chat-context";
|
||||||
import { Button } from "./ui/button";
|
import { Button } from "./ui/button";
|
||||||
|
|
||||||
interface ProviderHealthBannerProps {
|
interface ProviderHealthBannerProps {
|
||||||
|
|
@ -14,13 +15,16 @@ interface ProviderHealthBannerProps {
|
||||||
|
|
||||||
// Custom hook to check provider health status
|
// Custom hook to check provider health status
|
||||||
export function useProviderHealth() {
|
export function useProviderHealth() {
|
||||||
|
const { hasChatError } = useChat();
|
||||||
const {
|
const {
|
||||||
data: health,
|
data: health,
|
||||||
isLoading,
|
isLoading,
|
||||||
isFetching,
|
isFetching,
|
||||||
error,
|
error,
|
||||||
isError,
|
isError,
|
||||||
} = useProviderHealthQuery();
|
} = useProviderHealthQuery({
|
||||||
|
test_completion: hasChatError, // Use test_completion=true when chat errors occur
|
||||||
|
});
|
||||||
|
|
||||||
const isHealthy = health?.status === "healthy" && !isError;
|
const isHealthy = health?.status === "healthy" && !isError;
|
||||||
// Only consider unhealthy if backend is up but provider validation failed
|
// Only consider unhealthy if backend is up but provider validation failed
|
||||||
|
|
|
||||||
|
|
@ -79,6 +79,8 @@ interface ChatContextType {
|
||||||
conversationFilter: KnowledgeFilter | null;
|
conversationFilter: KnowledgeFilter | null;
|
||||||
// responseId: undefined = use currentConversationId, null = don't save to localStorage
|
// responseId: undefined = use currentConversationId, null = don't save to localStorage
|
||||||
setConversationFilter: (filter: KnowledgeFilter | null, responseId?: string | null) => void;
|
setConversationFilter: (filter: KnowledgeFilter | null, responseId?: string | null) => void;
|
||||||
|
hasChatError: boolean;
|
||||||
|
setChatError: (hasError: boolean) => void;
|
||||||
}
|
}
|
||||||
|
|
||||||
const ChatContext = createContext<ChatContextType | undefined>(undefined);
|
const ChatContext = createContext<ChatContextType | undefined>(undefined);
|
||||||
|
|
@ -108,6 +110,7 @@ export function ChatProvider({ children }: ChatProviderProps) {
|
||||||
const [conversationLoaded, setConversationLoaded] = useState(false);
|
const [conversationLoaded, setConversationLoaded] = useState(false);
|
||||||
const [conversationFilter, setConversationFilterState] =
|
const [conversationFilter, setConversationFilterState] =
|
||||||
useState<KnowledgeFilter | null>(null);
|
useState<KnowledgeFilter | null>(null);
|
||||||
|
const [hasChatError, setChatError] = useState(false);
|
||||||
|
|
||||||
// Debounce refresh requests to prevent excessive reloads
|
// Debounce refresh requests to prevent excessive reloads
|
||||||
const refreshTimeoutRef = useRef<NodeJS.Timeout | null>(null);
|
const refreshTimeoutRef = useRef<NodeJS.Timeout | null>(null);
|
||||||
|
|
@ -358,6 +361,8 @@ export function ChatProvider({ children }: ChatProviderProps) {
|
||||||
setConversationLoaded,
|
setConversationLoaded,
|
||||||
conversationFilter,
|
conversationFilter,
|
||||||
setConversationFilter,
|
setConversationFilter,
|
||||||
|
hasChatError,
|
||||||
|
setChatError,
|
||||||
}),
|
}),
|
||||||
[
|
[
|
||||||
endpoint,
|
endpoint,
|
||||||
|
|
@ -378,6 +383,7 @@ export function ChatProvider({ children }: ChatProviderProps) {
|
||||||
conversationLoaded,
|
conversationLoaded,
|
||||||
conversationFilter,
|
conversationFilter,
|
||||||
setConversationFilter,
|
setConversationFilter,
|
||||||
|
hasChatError,
|
||||||
],
|
],
|
||||||
);
|
);
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue