fix max_tokens error on gpt 4o

Simplify ollama models validation
just enable get from env when not configured
2025-11-14 18:02:56 -03:00 · 2025-11-14 18:01:13 -03:00 · 2025-11-14 17:56:50 -03:00 · 2025-11-14 17:56:28 -03:00 · 2025-11-14 17:55:52 -03:00 · 2025-11-14 17:54:59 -03:00
5 changed files with 60 additions and 51 deletions
--- a/frontend/src/app/onboarding/components/ibm-onboarding.tsx
+++ b/frontend/src/app/onboarding/components/ibm-onboarding.tsx
@ -73,13 +73,11 @@ export function IBMOnboarding({
    error: modelsError,
  } = useGetIBMModelsQuery(
    {
-      endpoint: debouncedEndpoint,
+      endpoint: debouncedEndpoint ? debouncedEndpoint : undefined,
-      apiKey: debouncedApiKey,
+      apiKey: debouncedApiKey ? debouncedApiKey : undefined,
-      projectId: debouncedProjectId,
+      projectId: debouncedProjectId ? debouncedProjectId : undefined,
    },
    {
      enabled: !!debouncedEndpoint && !!debouncedApiKey && !!debouncedProjectId,
    },
    { enabled: !!debouncedEndpoint || !!debouncedApiKey || !!debouncedProjectId || alreadyConfigured },
  );
  // Use custom hook for model selection logic
--- a/frontend/src/app/onboarding/components/ollama-onboarding.tsx
+++ b/frontend/src/app/onboarding/components/ollama-onboarding.tsx
@ -36,6 +36,7 @@ export function OllamaOnboarding({
    error: modelsError,
  } = useGetOllamaModelsQuery(
    debouncedEndpoint ? { endpoint: debouncedEndpoint } : undefined,
    { enabled: !!debouncedEndpoint || alreadyConfigured },
  );
  // Use custom hook for model selection logic
--- a/frontend/src/app/onboarding/components/openai-onboarding.tsx
+++ b/frontend/src/app/onboarding/components/openai-onboarding.tsx
@ -34,7 +34,7 @@ export function OpenAIOnboarding({
  alreadyConfigured?: boolean;
 }) {
  const [apiKey, setApiKey] = useState("");
-  const [getFromEnv, setGetFromEnv] = useState(hasEnvApiKey);
+  const [getFromEnv, setGetFromEnv] = useState(hasEnvApiKey && !alreadyConfigured);
  const debouncedApiKey = useDebouncedValue(apiKey, 500);
  // Fetch models from API when API key is provided
@ -48,7 +48,7 @@ export function OpenAIOnboarding({
      : debouncedApiKey
        ? { apiKey: debouncedApiKey }
        : undefined,
-    { enabled: debouncedApiKey !== "" || getFromEnv },
+    { enabled: debouncedApiKey !== "" || getFromEnv || alreadyConfigured },
  );
  // Use custom hook for model selection logic
  const {
--- a/src/api/provider_validation.py
+++ b/src/api/provider_validation.py
@ -112,7 +112,7 @@ async def _test_openai_completion_with_tools(api_key: str, llm_model: str) -> No
        }
        # Simple tool calling test
-        payload = {
+        base_payload = {
            "model": llm_model,
            "messages": [
                {"role": "user", "content": "What tools do you have available?"}
@ -136,10 +136,11 @@ async def _test_openai_completion_with_tools(api_key: str, llm_model: str) -> No
                    }
                }
            ],
            "max_tokens": 50,
        }
        async with httpx.AsyncClient() as client:
            # Try with max_tokens first
            payload = {**base_payload, "max_tokens": 50}
            response = await client.post(
                "https://api.openai.com/v1/chat/completions",
                headers=headers,
@ -147,6 +148,17 @@ async def _test_openai_completion_with_tools(api_key: str, llm_model: str) -> No
                timeout=30.0,
            )
            # If max_tokens doesn't work, try with max_completion_tokens
            if response.status_code != 200:
                logger.info("max_tokens parameter failed, trying max_completion_tokens instead")
                payload = {**base_payload, "max_completion_tokens": 50}
                response = await client.post(
                    "https://api.openai.com/v1/chat/completions",
                    headers=headers,
                    json=payload,
                    timeout=30.0,
                )
            if response.status_code != 200:
                logger.error(f"OpenAI completion test failed: {response.status_code} - {response.text}")
                raise Exception(f"OpenAI API error: {response.status_code}")
--- a/src/services/models_service.py
+++ b/src/services/models_service.py
@ -1,6 +1,5 @@
 import httpx
 from typing import Dict, List
 from api.provider_validation import test_embedding
 from utils.container_utils import transform_localhost_url
 from utils.logging_config import get_logger
@ -229,20 +228,14 @@ class ModelsService:
                            f"Model: {model_name}, Capabilities: {capabilities}"
                        )
-                        # Check if model has required capabilities
+                        # Check if model has embedding capability
                        has_embedding = "embedding" in capabilities
                        # Check if model has required capabilities for language models
                        has_completion = DESIRED_CAPABILITY in capabilities
                        has_tools = TOOL_CALLING_CAPABILITY in capabilities
-                        # Check if it's an embedding model
+                        if has_embedding:
-                        try:
+                            # Embedding models have embedding capability
                            await test_embedding("ollama", endpoint=endpoint, embedding_model=model_name)
                            is_embedding = True
                        except Exception as e:
                            logger.warning(f"Failed to test embedding for model {model_name}: {str(e)}")
                            is_embedding = False
                        if is_embedding:
                            # Embedding models only need completion capability
                            embedding_models.append(
                                {
                                    "value": model_name,
@ -250,7 +243,7 @@ class ModelsService:
                                    "default": "nomic-embed-text" in model_name.lower(),
                                }
                            )
-                        elif not is_embedding and has_completion and has_tools:
+                        if has_completion and has_tools:
                            # Language models need both completion and tool calling
                            language_models.append(
                                {
@ -333,34 +326,6 @@ class ModelsService:
            if project_id:
                headers["Project-ID"] = project_id
            # Validate credentials with a minimal completion request
            async with httpx.AsyncClient() as client:
                validation_url = f"{watson_endpoint}/ml/v1/text/generation"
                validation_params = {"version": "2024-09-16"}
                validation_payload = {
                    "input": "test",
                    "model_id": "ibm/granite-3-2b-instruct",
                    "project_id": project_id,
                    "parameters": {
                        "max_new_tokens": 1,
                    },
                }
                validation_response = await client.post(
                    validation_url,
                    headers=headers,
                    params=validation_params,
                    json=validation_payload,
                    timeout=10.0,
                )
                if validation_response.status_code != 200:
                    raise Exception(
                        f"Invalid credentials or endpoint: {validation_response.status_code} - {validation_response.text}"
                    )
                logger.info("IBM Watson credentials validated successfully")
            # Fetch foundation models using the correct endpoint
            models_url = f"{watson_endpoint}/ml/v1/foundation_model_specs"
@ -424,6 +389,39 @@ class ModelsService:
                            }
                        )
            # Validate credentials with the first available LLM model
            if language_models:
                first_llm_model = language_models[0]["value"]
                async with httpx.AsyncClient() as client:
                    validation_url = f"{watson_endpoint}/ml/v1/text/generation"
                    validation_params = {"version": "2024-09-16"}
                    validation_payload = {
                        "input": "test",
                        "model_id": first_llm_model,
                        "project_id": project_id,
                        "parameters": {
                            "max_new_tokens": 1,
                        },
                    }
                    validation_response = await client.post(
                        validation_url,
                        headers=headers,
                        params=validation_params,
                        json=validation_payload,
                        timeout=10.0,
                    )
                    if validation_response.status_code != 200:
                        raise Exception(
                            f"Invalid credentials or endpoint: {validation_response.status_code} - {validation_response.text}"
                        )
                    logger.info(f"IBM Watson credentials validated successfully using model: {first_llm_model}")
            else:
                logger.warning("No language models available to validate credentials")
            if not language_models and not embedding_models:
                raise Exception("No IBM models retrieved from API")
Author	SHA1	Message	Date
Lucas Oliveira	2f38b0b190	fix max_tokens error on gpt 4o	2025-11-14 18:02:56 -03:00
Lucas Oliveira	a2e5baa665	Simplify ollama models validation	2025-11-14 18:01:13 -03:00
Lucas Oliveira	a2370d378b	just enable get from env when not configured	2025-11-14 17:56:50 -03:00
Lucas Oliveira	c9ec98a77c	enable get openai models query when already configured	2025-11-14 17:56:28 -03:00
Lucas Oliveira	2b225ab554	enable ollama query only when configured or endpoint present	2025-11-14 17:55:52 -03:00
Lucas Oliveira	449b5dc38e	make ibm query disabled when not configured	2025-11-14 17:54:59 -03:00
Lucas Oliveira	74b9544515	fixed ibm onboarding to not disable query when no data is available	2025-11-14 17:54:26 -03:00
Lucas Oliveira	a525b07f6f	Fixed models service to try api key with first available model	2025-11-14 17:54:13 -03:00
Lucas Oliveira	6edddbebca	Merge branch 'main' into fix/agent_thinking	2025-11-14 17:50:18 -03:00
Lucas Oliveira	65a044d14d	fixed ibm and ollama overwriting values	2025-11-14 17:38:59 -03:00
Lucas Oliveira	1f4743ceab	Added thinking message to assistant message	2025-11-14 17:29:44 -03:00