feat: Provide ollama health checking

This commit is contained in:
Eric Hare 2025-12-02 11:16:34 -08:00
parent 3fbf19aaec
commit a6f1ed7c48
No known key found for this signature in database
GPG key ID: A73DF73724270AB7
3 changed files with 119 additions and 23 deletions

View file

@ -26,6 +26,9 @@ export interface ProviderHealthParams {
provider?: "openai" | "ollama" | "watsonx";
}
// Track consecutive failures for exponential backoff
const failureCountMap = new Map<string, number>();
export const useProviderHealthQuery = (
params?: ProviderHealthParams,
options?: Omit<
@ -87,18 +90,42 @@ export const useProviderHealthQuery = (
}
}
const queryKey = ["provider", "health"];
const failureCountKey = queryKey.join("-");
const queryResult = useQuery(
{
queryKey: ["provider", "health"],
queryKey,
queryFn: checkProviderHealth,
retry: false, // Don't retry health checks automatically
refetchInterval: (query) => {
// If healthy, check every 30 seconds; otherwise check every 3 seconds
return query.state.data?.status === "healthy" ? 30000 : 3000;
const data = query.state.data;
const status = data?.status;
// If healthy, reset failure count and check every 30 seconds
if (status === "healthy") {
failureCountMap.set(failureCountKey, 0);
return 30000;
}
// If backend unavailable, use moderate polling
if (status === "backend-unavailable") {
return 15000;
}
// For unhealthy/error status, use exponential backoff
const currentFailures = failureCountMap.get(failureCountKey) || 0;
failureCountMap.set(failureCountKey, currentFailures + 1);
// Exponential backoff: 5s, 10s, 20s, then 30s
const backoffDelays = [5000, 10000, 20000, 30000];
const delay = backoffDelays[Math.min(currentFailures, backoffDelays.length - 1)];
return delay;
},
refetchOnWindowFocus: true,
refetchOnMount: true,
staleTime: 30000, // Consider data stale after 25 seconds
staleTime: 30000, // Consider data stale after 30 seconds
enabled: !!settings?.edited && options?.enabled !== false, // Only run after onboarding is complete
...options,
},

View file

@ -1,9 +1,10 @@
"""Provider health check endpoint."""
import httpx
from starlette.responses import JSONResponse
from utils.logging_config import get_logger
from config.settings import get_openrag_config
from api.provider_validation import validate_provider_setup
from api.provider_validation import validate_provider_setup, _test_ollama_lightweight_health
logger = get_logger(__name__)
@ -116,31 +117,69 @@ async def check_provider_health(request):
)
else:
# Validate both LLM and embedding providers
# Note: For Ollama, we use lightweight checks that don't require model inference.
# This prevents false-positive errors when Ollama is busy processing other requests.
llm_error = None
embedding_error = None
# Validate LLM provider
try:
await validate_provider_setup(
provider=provider,
api_key=api_key,
llm_model=llm_model,
endpoint=endpoint,
project_id=project_id,
)
# For Ollama, use lightweight health check that doesn't block on active requests
if provider == "ollama":
try:
await _test_ollama_lightweight_health(endpoint)
except Exception as lightweight_error:
# If lightweight check fails, Ollama is down or misconfigured
llm_error = str(lightweight_error)
logger.error(f"LLM provider ({provider}) lightweight check failed: {llm_error}")
raise
else:
await validate_provider_setup(
provider=provider,
api_key=api_key,
llm_model=llm_model,
endpoint=endpoint,
project_id=project_id,
)
except httpx.TimeoutException as e:
# Timeout means provider is busy, not misconfigured
if provider == "ollama":
llm_error = None # Don't treat as error
logger.info(f"LLM provider ({provider}) appears busy: {str(e)}")
else:
llm_error = str(e)
logger.error(f"LLM provider ({provider}) validation timed out: {llm_error}")
except Exception as e:
llm_error = str(e)
logger.error(f"LLM provider ({provider}) validation failed: {llm_error}")
# Validate embedding provider
try:
await validate_provider_setup(
provider=embedding_provider,
api_key=embedding_api_key,
embedding_model=embedding_model,
endpoint=embedding_endpoint,
project_id=embedding_project_id,
)
# For Ollama, use lightweight health check first
if embedding_provider == "ollama":
try:
await _test_ollama_lightweight_health(embedding_endpoint)
except Exception as lightweight_error:
# If lightweight check fails, Ollama is down or misconfigured
embedding_error = str(lightweight_error)
logger.error(f"Embedding provider ({embedding_provider}) lightweight check failed: {embedding_error}")
raise
else:
await validate_provider_setup(
provider=embedding_provider,
api_key=embedding_api_key,
embedding_model=embedding_model,
endpoint=embedding_endpoint,
project_id=embedding_project_id,
)
except httpx.TimeoutException as e:
# Timeout means provider is busy, not misconfigured
if embedding_provider == "ollama":
embedding_error = None # Don't treat as error
logger.info(f"Embedding provider ({embedding_provider}) appears busy: {str(e)}")
else:
embedding_error = str(e)
logger.error(f"Embedding provider ({embedding_provider}) validation timed out: {embedding_error}")
except Exception as e:
embedding_error = str(e)
logger.error(f"Embedding provider ({embedding_provider}) validation failed: {embedding_error}")

View file

@ -364,6 +364,36 @@ async def _test_watsonx_embedding(
# Ollama validation functions
async def _test_ollama_lightweight_health(endpoint: str) -> None:
"""Test Ollama availability with lightweight /api/tags endpoint.
This endpoint is very fast and doesn't block on active requests,
making it ideal for health checks when Ollama might be busy.
"""
try:
ollama_url = transform_localhost_url(endpoint)
url = f"{ollama_url}/api/tags"
async with httpx.AsyncClient() as client:
response = await client.get(
url,
timeout=10.0, # Short timeout for lightweight check
)
if response.status_code != 200:
logger.error(f"Ollama lightweight health check failed: {response.status_code}")
raise Exception(f"Ollama endpoint not responding: {response.status_code}")
logger.info("Ollama lightweight health check passed")
except httpx.TimeoutException:
logger.error("Ollama lightweight health check timed out")
raise Exception("Ollama endpoint timed out")
except Exception as e:
logger.error(f"Ollama lightweight health check failed: {str(e)}")
raise
async def _test_ollama_completion_with_tools(llm_model: str, endpoint: str) -> None:
"""Test Ollama completion with tool calling."""
try:
@ -401,7 +431,7 @@ async def _test_ollama_completion_with_tools(llm_model: str, endpoint: str) -> N
response = await client.post(
url,
json=payload,
timeout=30.0,
timeout=120.0, # Increased timeout for Ollama when potentially busy
)
if response.status_code != 200:
@ -412,7 +442,7 @@ async def _test_ollama_completion_with_tools(llm_model: str, endpoint: str) -> N
except httpx.TimeoutException:
logger.error("Ollama completion test timed out")
raise Exception("Request timed out")
raise httpx.TimeoutException("Ollama is busy or model inference timed out")
except Exception as e:
logger.error(f"Ollama completion test failed: {str(e)}")
raise
@ -433,7 +463,7 @@ async def _test_ollama_embedding(embedding_model: str, endpoint: str) -> None:
response = await client.post(
url,
json=payload,
timeout=30.0,
timeout=120.0, # Increased timeout for Ollama when potentially busy
)
if response.status_code != 200:
@ -448,7 +478,7 @@ async def _test_ollama_embedding(embedding_model: str, endpoint: str) -> None:
except httpx.TimeoutException:
logger.error("Ollama embedding test timed out")
raise Exception("Request timed out")
raise httpx.TimeoutException("Ollama is busy or embedding generation timed out")
except Exception as e:
logger.error(f"Ollama embedding test failed: {str(e)}")
raise