From c295431484005b385ff611d9e481bd4514bc4f96 Mon Sep 17 00:00:00 2001
From: Lucas Oliveira <62335616+lucaseduoli@users.noreply.github.com>
Date: Fri, 14 Nov 2025 18:09:47 -0300
Subject: [PATCH] fix: refactor models validation to fix bugs related to
 ollama, watsonx and openai (#406)

* Fixed models service to try api key with first available model

* fixed ibm onboarding to not disable query when no data is available

* make ibm query disabled when not configured

* enable ollama query only when configured or endpoint present

* enable get openai models query when already configured

* just enable get from env when not configured

* Simplify ollama models validation

* fix max_tokens error on gpt 4o
---
 .../onboarding/components/ibm-onboarding.tsx  | 10 +--
 .../components/ollama-onboarding.tsx          |  1 +
 .../components/openai-onboarding.tsx          |  4 +-
 src/api/provider_validation.py                | 16 +++-
 src/services/models_service.py                | 80 +++++++++----------
 5 files changed, 60 insertions(+), 51 deletions(-)

diff --git a/frontend/src/app/onboarding/components/ibm-onboarding.tsx b/frontend/src/app/onboarding/components/ibm-onboarding.tsx
index 3d480248..3bb830b6 100644
--- a/frontend/src/app/onboarding/components/ibm-onboarding.tsx
+++ b/frontend/src/app/onboarding/components/ibm-onboarding.tsx
@@ -73,13 +73,11 @@ export function IBMOnboarding({
     error: modelsError,
   } = useGetIBMModelsQuery(
     {
-      endpoint: debouncedEndpoint,
-      apiKey: debouncedApiKey,
-      projectId: debouncedProjectId,
-    },
-    {
-      enabled: !!debouncedEndpoint && !!debouncedApiKey && !!debouncedProjectId,
+      endpoint: debouncedEndpoint ? debouncedEndpoint : undefined,
+      apiKey: debouncedApiKey ? debouncedApiKey : undefined,
+      projectId: debouncedProjectId ? debouncedProjectId : undefined,
     },
+    { enabled: !!debouncedEndpoint || !!debouncedApiKey || !!debouncedProjectId || alreadyConfigured },
   );
 
   // Use custom hook for model selection logic
diff --git a/frontend/src/app/onboarding/components/ollama-onboarding.tsx b/frontend/src/app/onboarding/components/ollama-onboarding.tsx
index 99c26d2a..e85366ba 100644
--- a/frontend/src/app/onboarding/components/ollama-onboarding.tsx
+++ b/frontend/src/app/onboarding/components/ollama-onboarding.tsx
@@ -36,6 +36,7 @@ export function OllamaOnboarding({
     error: modelsError,
   } = useGetOllamaModelsQuery(
     debouncedEndpoint ? { endpoint: debouncedEndpoint } : undefined,
+    { enabled: !!debouncedEndpoint || alreadyConfigured },
   );
 
   // Use custom hook for model selection logic
diff --git a/frontend/src/app/onboarding/components/openai-onboarding.tsx b/frontend/src/app/onboarding/components/openai-onboarding.tsx
index d4fc73a4..47c427a9 100644
--- a/frontend/src/app/onboarding/components/openai-onboarding.tsx
+++ b/frontend/src/app/onboarding/components/openai-onboarding.tsx
@@ -34,7 +34,7 @@ export function OpenAIOnboarding({
   alreadyConfigured?: boolean;
 }) {
   const [apiKey, setApiKey] = useState("");
-  const [getFromEnv, setGetFromEnv] = useState(hasEnvApiKey);
+  const [getFromEnv, setGetFromEnv] = useState(hasEnvApiKey && !alreadyConfigured);
   const debouncedApiKey = useDebouncedValue(apiKey, 500);
 
   // Fetch models from API when API key is provided
@@ -48,7 +48,7 @@ export function OpenAIOnboarding({
       : debouncedApiKey
         ? { apiKey: debouncedApiKey }
         : undefined,
-    { enabled: debouncedApiKey !== "" || getFromEnv },
+    { enabled: debouncedApiKey !== "" || getFromEnv || alreadyConfigured },
   );
   // Use custom hook for model selection logic
   const {
diff --git a/src/api/provider_validation.py b/src/api/provider_validation.py
index e51cc3bc..2fcc1e65 100644
--- a/src/api/provider_validation.py
+++ b/src/api/provider_validation.py
@@ -112,7 +112,7 @@ async def _test_openai_completion_with_tools(api_key: str, llm_model: str) -> No
         }
 
         # Simple tool calling test
-        payload = {
+        base_payload = {
             "model": llm_model,
             "messages": [
                 {"role": "user", "content": "What tools do you have available?"}
@@ -136,10 +136,11 @@ async def _test_openai_completion_with_tools(api_key: str, llm_model: str) -> No
                     }
                 }
             ],
-            "max_tokens": 50,
         }
 
         async with httpx.AsyncClient() as client:
+            # Try with max_tokens first
+            payload = {**base_payload, "max_tokens": 50}
             response = await client.post(
                 "https://api.openai.com/v1/chat/completions",
                 headers=headers,
@@ -147,6 +148,17 @@ async def _test_openai_completion_with_tools(api_key: str, llm_model: str) -> No
                 timeout=30.0,
             )
 
+            # If max_tokens doesn't work, try with max_completion_tokens
+            if response.status_code != 200:
+                logger.info("max_tokens parameter failed, trying max_completion_tokens instead")
+                payload = {**base_payload, "max_completion_tokens": 50}
+                response = await client.post(
+                    "https://api.openai.com/v1/chat/completions",
+                    headers=headers,
+                    json=payload,
+                    timeout=30.0,
+                )
+
             if response.status_code != 200:
                 logger.error(f"OpenAI completion test failed: {response.status_code} - {response.text}")
                 raise Exception(f"OpenAI API error: {response.status_code}")
diff --git a/src/services/models_service.py b/src/services/models_service.py
index 28dee73a..f26d0594 100644
--- a/src/services/models_service.py
+++ b/src/services/models_service.py
@@ -1,6 +1,5 @@
 import httpx
 from typing import Dict, List
-from api.provider_validation import test_embedding
 from utils.container_utils import transform_localhost_url
 from utils.logging_config import get_logger
 
@@ -229,20 +228,14 @@ class ModelsService:
                             f"Model: {model_name}, Capabilities: {capabilities}"
                         )
 
-                        # Check if model has required capabilities
+                        # Check if model has embedding capability
+                        has_embedding = "embedding" in capabilities
+                        # Check if model has required capabilities for language models
                         has_completion = DESIRED_CAPABILITY in capabilities
                         has_tools = TOOL_CALLING_CAPABILITY in capabilities
 
-                        # Check if it's an embedding model
-                        try:
-                            await test_embedding("ollama", endpoint=endpoint, embedding_model=model_name)
-                            is_embedding = True
-                        except Exception as e:
-                            logger.warning(f"Failed to test embedding for model {model_name}: {str(e)}")
-                            is_embedding = False
-
-                        if is_embedding:
-                            # Embedding models only need completion capability
+                        if has_embedding:
+                            # Embedding models have embedding capability
                             embedding_models.append(
                                 {
                                     "value": model_name,
@@ -250,7 +243,7 @@ class ModelsService:
                                     "default": "nomic-embed-text" in model_name.lower(),
                                 }
                             )
-                        elif not is_embedding and has_completion and has_tools:
+                        if has_completion and has_tools:
                             # Language models need both completion and tool calling
                             language_models.append(
                                 {
@@ -333,34 +326,6 @@ class ModelsService:
             if project_id:
                 headers["Project-ID"] = project_id
 
-            # Validate credentials with a minimal completion request
-            async with httpx.AsyncClient() as client:
-                validation_url = f"{watson_endpoint}/ml/v1/text/generation"
-                validation_params = {"version": "2024-09-16"}
-                validation_payload = {
-                    "input": "test",
-                    "model_id": "ibm/granite-3-2b-instruct",
-                    "project_id": project_id,
-                    "parameters": {
-                        "max_new_tokens": 1,
-                    },
-                }
-
-                validation_response = await client.post(
-                    validation_url,
-                    headers=headers,
-                    params=validation_params,
-                    json=validation_payload,
-                    timeout=10.0,
-                )
-
-                if validation_response.status_code != 200:
-                    raise Exception(
-                        f"Invalid credentials or endpoint: {validation_response.status_code} - {validation_response.text}"
-                    )
-
-                logger.info("IBM Watson credentials validated successfully")
-
             # Fetch foundation models using the correct endpoint
             models_url = f"{watson_endpoint}/ml/v1/foundation_model_specs"
 
@@ -424,6 +389,39 @@ class ModelsService:
                             }
                         )
 
+            # Validate credentials with the first available LLM model
+            if language_models:
+                first_llm_model = language_models[0]["value"]
+                
+                async with httpx.AsyncClient() as client:
+                    validation_url = f"{watson_endpoint}/ml/v1/text/generation"
+                    validation_params = {"version": "2024-09-16"}
+                    validation_payload = {
+                        "input": "test",
+                        "model_id": first_llm_model,
+                        "project_id": project_id,
+                        "parameters": {
+                            "max_new_tokens": 1,
+                        },
+                    }
+
+                    validation_response = await client.post(
+                        validation_url,
+                        headers=headers,
+                        params=validation_params,
+                        json=validation_payload,
+                        timeout=10.0,
+                    )
+
+                    if validation_response.status_code != 200:
+                        raise Exception(
+                            f"Invalid credentials or endpoint: {validation_response.status_code} - {validation_response.text}"
+                        )
+
+                    logger.info(f"IBM Watson credentials validated successfully using model: {first_llm_model}")
+            else:
+                logger.warning("No language models available to validate credentials")
+
             if not language_models and not embedding_models:
                 raise Exception("No IBM models retrieved from API")