Merge pull request #574 from langflow-ai/feat-ollama-health-checks
This commit is contained in:
commit
12a13f1d2c
3 changed files with 117 additions and 23 deletions
|
|
@ -26,6 +26,9 @@ export interface ProviderHealthParams {
|
||||||
provider?: "openai" | "ollama" | "watsonx";
|
provider?: "openai" | "ollama" | "watsonx";
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Track consecutive failures for exponential backoff
|
||||||
|
const failureCountMap = new Map<string, number>();
|
||||||
|
|
||||||
export const useProviderHealthQuery = (
|
export const useProviderHealthQuery = (
|
||||||
params?: ProviderHealthParams,
|
params?: ProviderHealthParams,
|
||||||
options?: Omit<
|
options?: Omit<
|
||||||
|
|
@ -87,18 +90,42 @@ export const useProviderHealthQuery = (
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const queryKey = ["provider", "health"];
|
||||||
|
const failureCountKey = queryKey.join("-");
|
||||||
|
|
||||||
const queryResult = useQuery(
|
const queryResult = useQuery(
|
||||||
{
|
{
|
||||||
queryKey: ["provider", "health"],
|
queryKey,
|
||||||
queryFn: checkProviderHealth,
|
queryFn: checkProviderHealth,
|
||||||
retry: false, // Don't retry health checks automatically
|
retry: false, // Don't retry health checks automatically
|
||||||
refetchInterval: (query) => {
|
refetchInterval: (query) => {
|
||||||
// If healthy, check every 30 seconds; otherwise check every 3 seconds
|
const data = query.state.data;
|
||||||
return query.state.data?.status === "healthy" ? 30000 : 3000;
|
const status = data?.status;
|
||||||
|
|
||||||
|
// If healthy, reset failure count and check every 30 seconds
|
||||||
|
if (status === "healthy") {
|
||||||
|
failureCountMap.set(failureCountKey, 0);
|
||||||
|
return 30000;
|
||||||
|
}
|
||||||
|
|
||||||
|
// If backend unavailable, use moderate polling
|
||||||
|
if (status === "backend-unavailable") {
|
||||||
|
return 15000;
|
||||||
|
}
|
||||||
|
|
||||||
|
// For unhealthy/error status, use exponential backoff
|
||||||
|
const currentFailures = failureCountMap.get(failureCountKey) || 0;
|
||||||
|
failureCountMap.set(failureCountKey, currentFailures + 1);
|
||||||
|
|
||||||
|
// Exponential backoff: 5s, 10s, 20s, then 30s
|
||||||
|
const backoffDelays = [5000, 10000, 20000, 30000];
|
||||||
|
const delay = backoffDelays[Math.min(currentFailures, backoffDelays.length - 1)];
|
||||||
|
|
||||||
|
return delay;
|
||||||
},
|
},
|
||||||
refetchOnWindowFocus: false, // Disabled to reduce unnecessary calls on tab switches
|
refetchOnWindowFocus: false, // Disabled to reduce unnecessary calls on tab switches
|
||||||
refetchOnMount: true,
|
refetchOnMount: true,
|
||||||
staleTime: 30000, // Consider data fresh for 30 seconds
|
staleTime: 30000, // Consider data stale after 30 seconds
|
||||||
enabled: !!settings?.edited && options?.enabled !== false, // Only run after onboarding is complete
|
enabled: !!settings?.edited && options?.enabled !== false, // Only run after onboarding is complete
|
||||||
...options,
|
...options,
|
||||||
},
|
},
|
||||||
|
|
|
||||||
|
|
@ -1,9 +1,10 @@
|
||||||
"""Provider health check endpoint."""
|
"""Provider health check endpoint."""
|
||||||
|
|
||||||
|
import httpx
|
||||||
from starlette.responses import JSONResponse
|
from starlette.responses import JSONResponse
|
||||||
from utils.logging_config import get_logger
|
from utils.logging_config import get_logger
|
||||||
from config.settings import get_openrag_config
|
from config.settings import get_openrag_config
|
||||||
from api.provider_validation import validate_provider_setup
|
from api.provider_validation import validate_provider_setup, _test_ollama_lightweight_health
|
||||||
|
|
||||||
logger = get_logger(__name__)
|
logger = get_logger(__name__)
|
||||||
|
|
||||||
|
|
@ -116,31 +117,69 @@ async def check_provider_health(request):
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
# Validate both LLM and embedding providers
|
# Validate both LLM and embedding providers
|
||||||
|
# Note: For Ollama, we use lightweight checks that don't require model inference.
|
||||||
|
# This prevents false-positive errors when Ollama is busy processing other requests.
|
||||||
llm_error = None
|
llm_error = None
|
||||||
embedding_error = None
|
embedding_error = None
|
||||||
|
|
||||||
# Validate LLM provider
|
# Validate LLM provider
|
||||||
try:
|
try:
|
||||||
await validate_provider_setup(
|
# For Ollama, use lightweight health check that doesn't block on active requests
|
||||||
provider=provider,
|
if provider == "ollama":
|
||||||
api_key=api_key,
|
try:
|
||||||
llm_model=llm_model,
|
await _test_ollama_lightweight_health(endpoint)
|
||||||
endpoint=endpoint,
|
except Exception as lightweight_error:
|
||||||
project_id=project_id,
|
# If lightweight check fails, Ollama is down or misconfigured
|
||||||
)
|
llm_error = str(lightweight_error)
|
||||||
|
logger.error(f"LLM provider ({provider}) lightweight check failed: {llm_error}")
|
||||||
|
raise
|
||||||
|
else:
|
||||||
|
await validate_provider_setup(
|
||||||
|
provider=provider,
|
||||||
|
api_key=api_key,
|
||||||
|
llm_model=llm_model,
|
||||||
|
endpoint=endpoint,
|
||||||
|
project_id=project_id,
|
||||||
|
)
|
||||||
|
except httpx.TimeoutException as e:
|
||||||
|
# Timeout means provider is busy, not misconfigured
|
||||||
|
if provider == "ollama":
|
||||||
|
llm_error = None # Don't treat as error
|
||||||
|
logger.info(f"LLM provider ({provider}) appears busy: {str(e)}")
|
||||||
|
else:
|
||||||
|
llm_error = str(e)
|
||||||
|
logger.error(f"LLM provider ({provider}) validation timed out: {llm_error}")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
llm_error = str(e)
|
llm_error = str(e)
|
||||||
logger.error(f"LLM provider ({provider}) validation failed: {llm_error}")
|
logger.error(f"LLM provider ({provider}) validation failed: {llm_error}")
|
||||||
|
|
||||||
# Validate embedding provider
|
# Validate embedding provider
|
||||||
try:
|
try:
|
||||||
await validate_provider_setup(
|
# For Ollama, use lightweight health check first
|
||||||
provider=embedding_provider,
|
if embedding_provider == "ollama":
|
||||||
api_key=embedding_api_key,
|
try:
|
||||||
embedding_model=embedding_model,
|
await _test_ollama_lightweight_health(embedding_endpoint)
|
||||||
endpoint=embedding_endpoint,
|
except Exception as lightweight_error:
|
||||||
project_id=embedding_project_id,
|
# If lightweight check fails, Ollama is down or misconfigured
|
||||||
)
|
embedding_error = str(lightweight_error)
|
||||||
|
logger.error(f"Embedding provider ({embedding_provider}) lightweight check failed: {embedding_error}")
|
||||||
|
raise
|
||||||
|
else:
|
||||||
|
await validate_provider_setup(
|
||||||
|
provider=embedding_provider,
|
||||||
|
api_key=embedding_api_key,
|
||||||
|
embedding_model=embedding_model,
|
||||||
|
endpoint=embedding_endpoint,
|
||||||
|
project_id=embedding_project_id,
|
||||||
|
)
|
||||||
|
except httpx.TimeoutException as e:
|
||||||
|
# Timeout means provider is busy, not misconfigured
|
||||||
|
if embedding_provider == "ollama":
|
||||||
|
embedding_error = None # Don't treat as error
|
||||||
|
logger.info(f"Embedding provider ({embedding_provider}) appears busy: {str(e)}")
|
||||||
|
else:
|
||||||
|
embedding_error = str(e)
|
||||||
|
logger.error(f"Embedding provider ({embedding_provider}) validation timed out: {embedding_error}")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
embedding_error = str(e)
|
embedding_error = str(e)
|
||||||
logger.error(f"Embedding provider ({embedding_provider}) validation failed: {embedding_error}")
|
logger.error(f"Embedding provider ({embedding_provider}) validation failed: {embedding_error}")
|
||||||
|
|
|
||||||
|
|
@ -364,6 +364,34 @@ async def _test_watsonx_embedding(
|
||||||
|
|
||||||
|
|
||||||
# Ollama validation functions
|
# Ollama validation functions
|
||||||
|
async def _test_ollama_lightweight_health(endpoint: str) -> None:
|
||||||
|
"""Test Ollama availability with lightweight status check.
|
||||||
|
|
||||||
|
Only checks if the endpoint returns a 200 status without fetching data.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
ollama_url = transform_localhost_url(endpoint)
|
||||||
|
|
||||||
|
async with httpx.AsyncClient() as client:
|
||||||
|
response = await client.get(
|
||||||
|
ollama_url,
|
||||||
|
timeout=10.0, # Short timeout for lightweight check
|
||||||
|
)
|
||||||
|
|
||||||
|
if response.status_code != 200:
|
||||||
|
logger.error(f"Ollama lightweight health check failed: {response.status_code}")
|
||||||
|
raise Exception(f"Ollama endpoint not responding: {response.status_code}")
|
||||||
|
|
||||||
|
logger.info("Ollama lightweight health check passed")
|
||||||
|
|
||||||
|
except httpx.TimeoutException:
|
||||||
|
logger.error("Ollama lightweight health check timed out")
|
||||||
|
raise Exception("Ollama endpoint timed out")
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Ollama lightweight health check failed: {str(e)}")
|
||||||
|
raise
|
||||||
|
|
||||||
|
|
||||||
async def _test_ollama_completion_with_tools(llm_model: str, endpoint: str) -> None:
|
async def _test_ollama_completion_with_tools(llm_model: str, endpoint: str) -> None:
|
||||||
"""Test Ollama completion with tool calling."""
|
"""Test Ollama completion with tool calling."""
|
||||||
try:
|
try:
|
||||||
|
|
@ -401,7 +429,7 @@ async def _test_ollama_completion_with_tools(llm_model: str, endpoint: str) -> N
|
||||||
response = await client.post(
|
response = await client.post(
|
||||||
url,
|
url,
|
||||||
json=payload,
|
json=payload,
|
||||||
timeout=30.0,
|
timeout=60.0, # Increased timeout for Ollama when potentially busy
|
||||||
)
|
)
|
||||||
|
|
||||||
if response.status_code != 200:
|
if response.status_code != 200:
|
||||||
|
|
@ -412,7 +440,7 @@ async def _test_ollama_completion_with_tools(llm_model: str, endpoint: str) -> N
|
||||||
|
|
||||||
except httpx.TimeoutException:
|
except httpx.TimeoutException:
|
||||||
logger.error("Ollama completion test timed out")
|
logger.error("Ollama completion test timed out")
|
||||||
raise Exception("Request timed out")
|
raise httpx.TimeoutException("Ollama is busy or model inference timed out")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Ollama completion test failed: {str(e)}")
|
logger.error(f"Ollama completion test failed: {str(e)}")
|
||||||
raise
|
raise
|
||||||
|
|
@ -433,7 +461,7 @@ async def _test_ollama_embedding(embedding_model: str, endpoint: str) -> None:
|
||||||
response = await client.post(
|
response = await client.post(
|
||||||
url,
|
url,
|
||||||
json=payload,
|
json=payload,
|
||||||
timeout=30.0,
|
timeout=60.0, # Increased timeout for Ollama when potentially busy
|
||||||
)
|
)
|
||||||
|
|
||||||
if response.status_code != 200:
|
if response.status_code != 200:
|
||||||
|
|
@ -448,7 +476,7 @@ async def _test_ollama_embedding(embedding_model: str, endpoint: str) -> None:
|
||||||
|
|
||||||
except httpx.TimeoutException:
|
except httpx.TimeoutException:
|
||||||
logger.error("Ollama embedding test timed out")
|
logger.error("Ollama embedding test timed out")
|
||||||
raise Exception("Request timed out")
|
raise httpx.TimeoutException("Ollama is busy or embedding generation timed out")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Ollama embedding test failed: {str(e)}")
|
logger.error(f"Ollama embedding test failed: {str(e)}")
|
||||||
raise
|
raise
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue