From a6f1ed7c48db5dbc51757eed750047399517ef01 Mon Sep 17 00:00:00 2001 From: Eric Hare Date: Tue, 2 Dec 2025 11:16:34 -0800 Subject: [PATCH] feat: Provide ollama health checking --- .../app/api/queries/useProviderHealthQuery.ts | 35 ++++++++-- src/api/provider_health.py | 69 +++++++++++++++---- src/api/provider_validation.py | 38 ++++++++-- 3 files changed, 119 insertions(+), 23 deletions(-) diff --git a/frontend/app/api/queries/useProviderHealthQuery.ts b/frontend/app/api/queries/useProviderHealthQuery.ts index 82ca2db2..6a52c24f 100644 --- a/frontend/app/api/queries/useProviderHealthQuery.ts +++ b/frontend/app/api/queries/useProviderHealthQuery.ts @@ -26,6 +26,9 @@ export interface ProviderHealthParams { provider?: "openai" | "ollama" | "watsonx"; } +// Track consecutive failures for exponential backoff +const failureCountMap = new Map(); + export const useProviderHealthQuery = ( params?: ProviderHealthParams, options?: Omit< @@ -87,18 +90,42 @@ export const useProviderHealthQuery = ( } } + const queryKey = ["provider", "health"]; + const failureCountKey = queryKey.join("-"); + const queryResult = useQuery( { - queryKey: ["provider", "health"], + queryKey, queryFn: checkProviderHealth, retry: false, // Don't retry health checks automatically refetchInterval: (query) => { - // If healthy, check every 30 seconds; otherwise check every 3 seconds - return query.state.data?.status === "healthy" ? 30000 : 3000; + const data = query.state.data; + const status = data?.status; + + // If healthy, reset failure count and check every 30 seconds + if (status === "healthy") { + failureCountMap.set(failureCountKey, 0); + return 30000; + } + + // If backend unavailable, use moderate polling + if (status === "backend-unavailable") { + return 15000; + } + + // For unhealthy/error status, use exponential backoff + const currentFailures = failureCountMap.get(failureCountKey) || 0; + failureCountMap.set(failureCountKey, currentFailures + 1); + + // Exponential backoff: 5s, 10s, 20s, then 30s + const backoffDelays = [5000, 10000, 20000, 30000]; + const delay = backoffDelays[Math.min(currentFailures, backoffDelays.length - 1)]; + + return delay; }, refetchOnWindowFocus: true, refetchOnMount: true, - staleTime: 30000, // Consider data stale after 25 seconds + staleTime: 30000, // Consider data stale after 30 seconds enabled: !!settings?.edited && options?.enabled !== false, // Only run after onboarding is complete ...options, }, diff --git a/src/api/provider_health.py b/src/api/provider_health.py index da7c7371..67b6e765 100644 --- a/src/api/provider_health.py +++ b/src/api/provider_health.py @@ -1,9 +1,10 @@ """Provider health check endpoint.""" +import httpx from starlette.responses import JSONResponse from utils.logging_config import get_logger from config.settings import get_openrag_config -from api.provider_validation import validate_provider_setup +from api.provider_validation import validate_provider_setup, _test_ollama_lightweight_health logger = get_logger(__name__) @@ -116,31 +117,69 @@ async def check_provider_health(request): ) else: # Validate both LLM and embedding providers + # Note: For Ollama, we use lightweight checks that don't require model inference. + # This prevents false-positive errors when Ollama is busy processing other requests. llm_error = None embedding_error = None # Validate LLM provider try: - await validate_provider_setup( - provider=provider, - api_key=api_key, - llm_model=llm_model, - endpoint=endpoint, - project_id=project_id, - ) + # For Ollama, use lightweight health check that doesn't block on active requests + if provider == "ollama": + try: + await _test_ollama_lightweight_health(endpoint) + except Exception as lightweight_error: + # If lightweight check fails, Ollama is down or misconfigured + llm_error = str(lightweight_error) + logger.error(f"LLM provider ({provider}) lightweight check failed: {llm_error}") + raise + else: + await validate_provider_setup( + provider=provider, + api_key=api_key, + llm_model=llm_model, + endpoint=endpoint, + project_id=project_id, + ) + except httpx.TimeoutException as e: + # Timeout means provider is busy, not misconfigured + if provider == "ollama": + llm_error = None # Don't treat as error + logger.info(f"LLM provider ({provider}) appears busy: {str(e)}") + else: + llm_error = str(e) + logger.error(f"LLM provider ({provider}) validation timed out: {llm_error}") except Exception as e: llm_error = str(e) logger.error(f"LLM provider ({provider}) validation failed: {llm_error}") # Validate embedding provider try: - await validate_provider_setup( - provider=embedding_provider, - api_key=embedding_api_key, - embedding_model=embedding_model, - endpoint=embedding_endpoint, - project_id=embedding_project_id, - ) + # For Ollama, use lightweight health check first + if embedding_provider == "ollama": + try: + await _test_ollama_lightweight_health(embedding_endpoint) + except Exception as lightweight_error: + # If lightweight check fails, Ollama is down or misconfigured + embedding_error = str(lightweight_error) + logger.error(f"Embedding provider ({embedding_provider}) lightweight check failed: {embedding_error}") + raise + else: + await validate_provider_setup( + provider=embedding_provider, + api_key=embedding_api_key, + embedding_model=embedding_model, + endpoint=embedding_endpoint, + project_id=embedding_project_id, + ) + except httpx.TimeoutException as e: + # Timeout means provider is busy, not misconfigured + if embedding_provider == "ollama": + embedding_error = None # Don't treat as error + logger.info(f"Embedding provider ({embedding_provider}) appears busy: {str(e)}") + else: + embedding_error = str(e) + logger.error(f"Embedding provider ({embedding_provider}) validation timed out: {embedding_error}") except Exception as e: embedding_error = str(e) logger.error(f"Embedding provider ({embedding_provider}) validation failed: {embedding_error}") diff --git a/src/api/provider_validation.py b/src/api/provider_validation.py index 2fcc1e65..cb3d571a 100644 --- a/src/api/provider_validation.py +++ b/src/api/provider_validation.py @@ -364,6 +364,36 @@ async def _test_watsonx_embedding( # Ollama validation functions +async def _test_ollama_lightweight_health(endpoint: str) -> None: + """Test Ollama availability with lightweight /api/tags endpoint. + + This endpoint is very fast and doesn't block on active requests, + making it ideal for health checks when Ollama might be busy. + """ + try: + ollama_url = transform_localhost_url(endpoint) + url = f"{ollama_url}/api/tags" + + async with httpx.AsyncClient() as client: + response = await client.get( + url, + timeout=10.0, # Short timeout for lightweight check + ) + + if response.status_code != 200: + logger.error(f"Ollama lightweight health check failed: {response.status_code}") + raise Exception(f"Ollama endpoint not responding: {response.status_code}") + + logger.info("Ollama lightweight health check passed") + + except httpx.TimeoutException: + logger.error("Ollama lightweight health check timed out") + raise Exception("Ollama endpoint timed out") + except Exception as e: + logger.error(f"Ollama lightweight health check failed: {str(e)}") + raise + + async def _test_ollama_completion_with_tools(llm_model: str, endpoint: str) -> None: """Test Ollama completion with tool calling.""" try: @@ -401,7 +431,7 @@ async def _test_ollama_completion_with_tools(llm_model: str, endpoint: str) -> N response = await client.post( url, json=payload, - timeout=30.0, + timeout=120.0, # Increased timeout for Ollama when potentially busy ) if response.status_code != 200: @@ -412,7 +442,7 @@ async def _test_ollama_completion_with_tools(llm_model: str, endpoint: str) -> N except httpx.TimeoutException: logger.error("Ollama completion test timed out") - raise Exception("Request timed out") + raise httpx.TimeoutException("Ollama is busy or model inference timed out") except Exception as e: logger.error(f"Ollama completion test failed: {str(e)}") raise @@ -433,7 +463,7 @@ async def _test_ollama_embedding(embedding_model: str, endpoint: str) -> None: response = await client.post( url, json=payload, - timeout=30.0, + timeout=120.0, # Increased timeout for Ollama when potentially busy ) if response.status_code != 200: @@ -448,7 +478,7 @@ async def _test_ollama_embedding(embedding_model: str, endpoint: str) -> None: except httpx.TimeoutException: logger.error("Ollama embedding test timed out") - raise Exception("Request timed out") + raise httpx.TimeoutException("Ollama is busy or embedding generation timed out") except Exception as e: logger.error(f"Ollama embedding test failed: {str(e)}") raise