Compare commits

...
Sign in to create a new pull request.

11 commits

Author SHA1 Message Date
Lucas Oliveira
2f38b0b190 fix max_tokens error on gpt 4o 2025-11-14 18:02:56 -03:00
Lucas Oliveira
a2e5baa665 Simplify ollama models validation 2025-11-14 18:01:13 -03:00
Lucas Oliveira
a2370d378b just enable get from env when not configured 2025-11-14 17:56:50 -03:00
Lucas Oliveira
c9ec98a77c enable get openai models query when already configured 2025-11-14 17:56:28 -03:00
Lucas Oliveira
2b225ab554 enable ollama query only when configured or endpoint present 2025-11-14 17:55:52 -03:00
Lucas Oliveira
449b5dc38e make ibm query disabled when not configured 2025-11-14 17:54:59 -03:00
Lucas Oliveira
74b9544515 fixed ibm onboarding to not disable query when no data is available 2025-11-14 17:54:26 -03:00
Lucas Oliveira
a525b07f6f Fixed models service to try api key with first available model 2025-11-14 17:54:13 -03:00
Lucas Oliveira
6edddbebca Merge branch 'main' into fix/agent_thinking 2025-11-14 17:50:18 -03:00
Lucas Oliveira
65a044d14d fixed ibm and ollama overwriting values 2025-11-14 17:38:59 -03:00
Lucas Oliveira
1f4743ceab Added thinking message to assistant message 2025-11-14 17:29:44 -03:00
5 changed files with 60 additions and 51 deletions

View file

@ -73,13 +73,11 @@ export function IBMOnboarding({
error: modelsError,
} = useGetIBMModelsQuery(
{
endpoint: debouncedEndpoint,
apiKey: debouncedApiKey,
projectId: debouncedProjectId,
},
{
enabled: !!debouncedEndpoint && !!debouncedApiKey && !!debouncedProjectId,
endpoint: debouncedEndpoint ? debouncedEndpoint : undefined,
apiKey: debouncedApiKey ? debouncedApiKey : undefined,
projectId: debouncedProjectId ? debouncedProjectId : undefined,
},
{ enabled: !!debouncedEndpoint || !!debouncedApiKey || !!debouncedProjectId || alreadyConfigured },
);
// Use custom hook for model selection logic

View file

@ -36,6 +36,7 @@ export function OllamaOnboarding({
error: modelsError,
} = useGetOllamaModelsQuery(
debouncedEndpoint ? { endpoint: debouncedEndpoint } : undefined,
{ enabled: !!debouncedEndpoint || alreadyConfigured },
);
// Use custom hook for model selection logic

View file

@ -34,7 +34,7 @@ export function OpenAIOnboarding({
alreadyConfigured?: boolean;
}) {
const [apiKey, setApiKey] = useState("");
const [getFromEnv, setGetFromEnv] = useState(hasEnvApiKey);
const [getFromEnv, setGetFromEnv] = useState(hasEnvApiKey && !alreadyConfigured);
const debouncedApiKey = useDebouncedValue(apiKey, 500);
// Fetch models from API when API key is provided
@ -48,7 +48,7 @@ export function OpenAIOnboarding({
: debouncedApiKey
? { apiKey: debouncedApiKey }
: undefined,
{ enabled: debouncedApiKey !== "" || getFromEnv },
{ enabled: debouncedApiKey !== "" || getFromEnv || alreadyConfigured },
);
// Use custom hook for model selection logic
const {

View file

@ -112,7 +112,7 @@ async def _test_openai_completion_with_tools(api_key: str, llm_model: str) -> No
}
# Simple tool calling test
payload = {
base_payload = {
"model": llm_model,
"messages": [
{"role": "user", "content": "What tools do you have available?"}
@ -136,10 +136,11 @@ async def _test_openai_completion_with_tools(api_key: str, llm_model: str) -> No
}
}
],
"max_tokens": 50,
}
async with httpx.AsyncClient() as client:
# Try with max_tokens first
payload = {**base_payload, "max_tokens": 50}
response = await client.post(
"https://api.openai.com/v1/chat/completions",
headers=headers,
@ -147,6 +148,17 @@ async def _test_openai_completion_with_tools(api_key: str, llm_model: str) -> No
timeout=30.0,
)
# If max_tokens doesn't work, try with max_completion_tokens
if response.status_code != 200:
logger.info("max_tokens parameter failed, trying max_completion_tokens instead")
payload = {**base_payload, "max_completion_tokens": 50}
response = await client.post(
"https://api.openai.com/v1/chat/completions",
headers=headers,
json=payload,
timeout=30.0,
)
if response.status_code != 200:
logger.error(f"OpenAI completion test failed: {response.status_code} - {response.text}")
raise Exception(f"OpenAI API error: {response.status_code}")

View file

@ -1,6 +1,5 @@
import httpx
from typing import Dict, List
from api.provider_validation import test_embedding
from utils.container_utils import transform_localhost_url
from utils.logging_config import get_logger
@ -229,20 +228,14 @@ class ModelsService:
f"Model: {model_name}, Capabilities: {capabilities}"
)
# Check if model has required capabilities
# Check if model has embedding capability
has_embedding = "embedding" in capabilities
# Check if model has required capabilities for language models
has_completion = DESIRED_CAPABILITY in capabilities
has_tools = TOOL_CALLING_CAPABILITY in capabilities
# Check if it's an embedding model
try:
await test_embedding("ollama", endpoint=endpoint, embedding_model=model_name)
is_embedding = True
except Exception as e:
logger.warning(f"Failed to test embedding for model {model_name}: {str(e)}")
is_embedding = False
if is_embedding:
# Embedding models only need completion capability
if has_embedding:
# Embedding models have embedding capability
embedding_models.append(
{
"value": model_name,
@ -250,7 +243,7 @@ class ModelsService:
"default": "nomic-embed-text" in model_name.lower(),
}
)
elif not is_embedding and has_completion and has_tools:
if has_completion and has_tools:
# Language models need both completion and tool calling
language_models.append(
{
@ -333,34 +326,6 @@ class ModelsService:
if project_id:
headers["Project-ID"] = project_id
# Validate credentials with a minimal completion request
async with httpx.AsyncClient() as client:
validation_url = f"{watson_endpoint}/ml/v1/text/generation"
validation_params = {"version": "2024-09-16"}
validation_payload = {
"input": "test",
"model_id": "ibm/granite-3-2b-instruct",
"project_id": project_id,
"parameters": {
"max_new_tokens": 1,
},
}
validation_response = await client.post(
validation_url,
headers=headers,
params=validation_params,
json=validation_payload,
timeout=10.0,
)
if validation_response.status_code != 200:
raise Exception(
f"Invalid credentials or endpoint: {validation_response.status_code} - {validation_response.text}"
)
logger.info("IBM Watson credentials validated successfully")
# Fetch foundation models using the correct endpoint
models_url = f"{watson_endpoint}/ml/v1/foundation_model_specs"
@ -424,6 +389,39 @@ class ModelsService:
}
)
# Validate credentials with the first available LLM model
if language_models:
first_llm_model = language_models[0]["value"]
async with httpx.AsyncClient() as client:
validation_url = f"{watson_endpoint}/ml/v1/text/generation"
validation_params = {"version": "2024-09-16"}
validation_payload = {
"input": "test",
"model_id": first_llm_model,
"project_id": project_id,
"parameters": {
"max_new_tokens": 1,
},
}
validation_response = await client.post(
validation_url,
headers=headers,
params=validation_params,
json=validation_payload,
timeout=10.0,
)
if validation_response.status_code != 200:
raise Exception(
f"Invalid credentials or endpoint: {validation_response.status_code} - {validation_response.text}"
)
logger.info(f"IBM Watson credentials validated successfully using model: {first_llm_model}")
else:
logger.warning("No language models available to validate credentials")
if not language_models and not embedding_models:
raise Exception("No IBM models retrieved from API")