From c295431484005b385ff611d9e481bd4514bc4f96 Mon Sep 17 00:00:00 2001 From: Lucas Oliveira <62335616+lucaseduoli@users.noreply.github.com> Date: Fri, 14 Nov 2025 18:09:47 -0300 Subject: [PATCH] fix: refactor models validation to fix bugs related to ollama, watsonx and openai (#406) * Fixed models service to try api key with first available model * fixed ibm onboarding to not disable query when no data is available * make ibm query disabled when not configured * enable ollama query only when configured or endpoint present * enable get openai models query when already configured * just enable get from env when not configured * Simplify ollama models validation * fix max_tokens error on gpt 4o --- .../onboarding/components/ibm-onboarding.tsx | 10 +-- .../components/ollama-onboarding.tsx | 1 + .../components/openai-onboarding.tsx | 4 +- src/api/provider_validation.py | 16 +++- src/services/models_service.py | 80 +++++++++---------- 5 files changed, 60 insertions(+), 51 deletions(-) diff --git a/frontend/src/app/onboarding/components/ibm-onboarding.tsx b/frontend/src/app/onboarding/components/ibm-onboarding.tsx index 3d480248..3bb830b6 100644 --- a/frontend/src/app/onboarding/components/ibm-onboarding.tsx +++ b/frontend/src/app/onboarding/components/ibm-onboarding.tsx @@ -73,13 +73,11 @@ export function IBMOnboarding({ error: modelsError, } = useGetIBMModelsQuery( { - endpoint: debouncedEndpoint, - apiKey: debouncedApiKey, - projectId: debouncedProjectId, - }, - { - enabled: !!debouncedEndpoint && !!debouncedApiKey && !!debouncedProjectId, + endpoint: debouncedEndpoint ? debouncedEndpoint : undefined, + apiKey: debouncedApiKey ? debouncedApiKey : undefined, + projectId: debouncedProjectId ? debouncedProjectId : undefined, }, + { enabled: !!debouncedEndpoint || !!debouncedApiKey || !!debouncedProjectId || alreadyConfigured }, ); // Use custom hook for model selection logic diff --git a/frontend/src/app/onboarding/components/ollama-onboarding.tsx b/frontend/src/app/onboarding/components/ollama-onboarding.tsx index 99c26d2a..e85366ba 100644 --- a/frontend/src/app/onboarding/components/ollama-onboarding.tsx +++ b/frontend/src/app/onboarding/components/ollama-onboarding.tsx @@ -36,6 +36,7 @@ export function OllamaOnboarding({ error: modelsError, } = useGetOllamaModelsQuery( debouncedEndpoint ? { endpoint: debouncedEndpoint } : undefined, + { enabled: !!debouncedEndpoint || alreadyConfigured }, ); // Use custom hook for model selection logic diff --git a/frontend/src/app/onboarding/components/openai-onboarding.tsx b/frontend/src/app/onboarding/components/openai-onboarding.tsx index d4fc73a4..47c427a9 100644 --- a/frontend/src/app/onboarding/components/openai-onboarding.tsx +++ b/frontend/src/app/onboarding/components/openai-onboarding.tsx @@ -34,7 +34,7 @@ export function OpenAIOnboarding({ alreadyConfigured?: boolean; }) { const [apiKey, setApiKey] = useState(""); - const [getFromEnv, setGetFromEnv] = useState(hasEnvApiKey); + const [getFromEnv, setGetFromEnv] = useState(hasEnvApiKey && !alreadyConfigured); const debouncedApiKey = useDebouncedValue(apiKey, 500); // Fetch models from API when API key is provided @@ -48,7 +48,7 @@ export function OpenAIOnboarding({ : debouncedApiKey ? { apiKey: debouncedApiKey } : undefined, - { enabled: debouncedApiKey !== "" || getFromEnv }, + { enabled: debouncedApiKey !== "" || getFromEnv || alreadyConfigured }, ); // Use custom hook for model selection logic const { diff --git a/src/api/provider_validation.py b/src/api/provider_validation.py index e51cc3bc..2fcc1e65 100644 --- a/src/api/provider_validation.py +++ b/src/api/provider_validation.py @@ -112,7 +112,7 @@ async def _test_openai_completion_with_tools(api_key: str, llm_model: str) -> No } # Simple tool calling test - payload = { + base_payload = { "model": llm_model, "messages": [ {"role": "user", "content": "What tools do you have available?"} @@ -136,10 +136,11 @@ async def _test_openai_completion_with_tools(api_key: str, llm_model: str) -> No } } ], - "max_tokens": 50, } async with httpx.AsyncClient() as client: + # Try with max_tokens first + payload = {**base_payload, "max_tokens": 50} response = await client.post( "https://api.openai.com/v1/chat/completions", headers=headers, @@ -147,6 +148,17 @@ async def _test_openai_completion_with_tools(api_key: str, llm_model: str) -> No timeout=30.0, ) + # If max_tokens doesn't work, try with max_completion_tokens + if response.status_code != 200: + logger.info("max_tokens parameter failed, trying max_completion_tokens instead") + payload = {**base_payload, "max_completion_tokens": 50} + response = await client.post( + "https://api.openai.com/v1/chat/completions", + headers=headers, + json=payload, + timeout=30.0, + ) + if response.status_code != 200: logger.error(f"OpenAI completion test failed: {response.status_code} - {response.text}") raise Exception(f"OpenAI API error: {response.status_code}") diff --git a/src/services/models_service.py b/src/services/models_service.py index 28dee73a..f26d0594 100644 --- a/src/services/models_service.py +++ b/src/services/models_service.py @@ -1,6 +1,5 @@ import httpx from typing import Dict, List -from api.provider_validation import test_embedding from utils.container_utils import transform_localhost_url from utils.logging_config import get_logger @@ -229,20 +228,14 @@ class ModelsService: f"Model: {model_name}, Capabilities: {capabilities}" ) - # Check if model has required capabilities + # Check if model has embedding capability + has_embedding = "embedding" in capabilities + # Check if model has required capabilities for language models has_completion = DESIRED_CAPABILITY in capabilities has_tools = TOOL_CALLING_CAPABILITY in capabilities - # Check if it's an embedding model - try: - await test_embedding("ollama", endpoint=endpoint, embedding_model=model_name) - is_embedding = True - except Exception as e: - logger.warning(f"Failed to test embedding for model {model_name}: {str(e)}") - is_embedding = False - - if is_embedding: - # Embedding models only need completion capability + if has_embedding: + # Embedding models have embedding capability embedding_models.append( { "value": model_name, @@ -250,7 +243,7 @@ class ModelsService: "default": "nomic-embed-text" in model_name.lower(), } ) - elif not is_embedding and has_completion and has_tools: + if has_completion and has_tools: # Language models need both completion and tool calling language_models.append( { @@ -333,34 +326,6 @@ class ModelsService: if project_id: headers["Project-ID"] = project_id - # Validate credentials with a minimal completion request - async with httpx.AsyncClient() as client: - validation_url = f"{watson_endpoint}/ml/v1/text/generation" - validation_params = {"version": "2024-09-16"} - validation_payload = { - "input": "test", - "model_id": "ibm/granite-3-2b-instruct", - "project_id": project_id, - "parameters": { - "max_new_tokens": 1, - }, - } - - validation_response = await client.post( - validation_url, - headers=headers, - params=validation_params, - json=validation_payload, - timeout=10.0, - ) - - if validation_response.status_code != 200: - raise Exception( - f"Invalid credentials or endpoint: {validation_response.status_code} - {validation_response.text}" - ) - - logger.info("IBM Watson credentials validated successfully") - # Fetch foundation models using the correct endpoint models_url = f"{watson_endpoint}/ml/v1/foundation_model_specs" @@ -424,6 +389,39 @@ class ModelsService: } ) + # Validate credentials with the first available LLM model + if language_models: + first_llm_model = language_models[0]["value"] + + async with httpx.AsyncClient() as client: + validation_url = f"{watson_endpoint}/ml/v1/text/generation" + validation_params = {"version": "2024-09-16"} + validation_payload = { + "input": "test", + "model_id": first_llm_model, + "project_id": project_id, + "parameters": { + "max_new_tokens": 1, + }, + } + + validation_response = await client.post( + validation_url, + headers=headers, + params=validation_params, + json=validation_payload, + timeout=10.0, + ) + + if validation_response.status_code != 200: + raise Exception( + f"Invalid credentials or endpoint: {validation_response.status_code} - {validation_response.text}" + ) + + logger.info(f"IBM Watson credentials validated successfully using model: {first_llm_model}") + else: + logger.warning("No language models available to validate credentials") + if not language_models and not embedding_models: raise Exception("No IBM models retrieved from API")