Compare commits
11 commits
main
...
fix/agent_
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
2f38b0b190 | ||
|
|
a2e5baa665 | ||
|
|
a2370d378b | ||
|
|
c9ec98a77c | ||
|
|
2b225ab554 | ||
|
|
449b5dc38e | ||
|
|
74b9544515 | ||
|
|
a525b07f6f | ||
|
|
6edddbebca | ||
|
|
65a044d14d | ||
|
|
1f4743ceab |
5 changed files with 60 additions and 51 deletions
|
|
@ -73,13 +73,11 @@ export function IBMOnboarding({
|
|||
error: modelsError,
|
||||
} = useGetIBMModelsQuery(
|
||||
{
|
||||
endpoint: debouncedEndpoint,
|
||||
apiKey: debouncedApiKey,
|
||||
projectId: debouncedProjectId,
|
||||
},
|
||||
{
|
||||
enabled: !!debouncedEndpoint && !!debouncedApiKey && !!debouncedProjectId,
|
||||
endpoint: debouncedEndpoint ? debouncedEndpoint : undefined,
|
||||
apiKey: debouncedApiKey ? debouncedApiKey : undefined,
|
||||
projectId: debouncedProjectId ? debouncedProjectId : undefined,
|
||||
},
|
||||
{ enabled: !!debouncedEndpoint || !!debouncedApiKey || !!debouncedProjectId || alreadyConfigured },
|
||||
);
|
||||
|
||||
// Use custom hook for model selection logic
|
||||
|
|
|
|||
|
|
@ -36,6 +36,7 @@ export function OllamaOnboarding({
|
|||
error: modelsError,
|
||||
} = useGetOllamaModelsQuery(
|
||||
debouncedEndpoint ? { endpoint: debouncedEndpoint } : undefined,
|
||||
{ enabled: !!debouncedEndpoint || alreadyConfigured },
|
||||
);
|
||||
|
||||
// Use custom hook for model selection logic
|
||||
|
|
|
|||
|
|
@ -34,7 +34,7 @@ export function OpenAIOnboarding({
|
|||
alreadyConfigured?: boolean;
|
||||
}) {
|
||||
const [apiKey, setApiKey] = useState("");
|
||||
const [getFromEnv, setGetFromEnv] = useState(hasEnvApiKey);
|
||||
const [getFromEnv, setGetFromEnv] = useState(hasEnvApiKey && !alreadyConfigured);
|
||||
const debouncedApiKey = useDebouncedValue(apiKey, 500);
|
||||
|
||||
// Fetch models from API when API key is provided
|
||||
|
|
@ -48,7 +48,7 @@ export function OpenAIOnboarding({
|
|||
: debouncedApiKey
|
||||
? { apiKey: debouncedApiKey }
|
||||
: undefined,
|
||||
{ enabled: debouncedApiKey !== "" || getFromEnv },
|
||||
{ enabled: debouncedApiKey !== "" || getFromEnv || alreadyConfigured },
|
||||
);
|
||||
// Use custom hook for model selection logic
|
||||
const {
|
||||
|
|
|
|||
|
|
@ -112,7 +112,7 @@ async def _test_openai_completion_with_tools(api_key: str, llm_model: str) -> No
|
|||
}
|
||||
|
||||
# Simple tool calling test
|
||||
payload = {
|
||||
base_payload = {
|
||||
"model": llm_model,
|
||||
"messages": [
|
||||
{"role": "user", "content": "What tools do you have available?"}
|
||||
|
|
@ -136,10 +136,11 @@ async def _test_openai_completion_with_tools(api_key: str, llm_model: str) -> No
|
|||
}
|
||||
}
|
||||
],
|
||||
"max_tokens": 50,
|
||||
}
|
||||
|
||||
async with httpx.AsyncClient() as client:
|
||||
# Try with max_tokens first
|
||||
payload = {**base_payload, "max_tokens": 50}
|
||||
response = await client.post(
|
||||
"https://api.openai.com/v1/chat/completions",
|
||||
headers=headers,
|
||||
|
|
@ -147,6 +148,17 @@ async def _test_openai_completion_with_tools(api_key: str, llm_model: str) -> No
|
|||
timeout=30.0,
|
||||
)
|
||||
|
||||
# If max_tokens doesn't work, try with max_completion_tokens
|
||||
if response.status_code != 200:
|
||||
logger.info("max_tokens parameter failed, trying max_completion_tokens instead")
|
||||
payload = {**base_payload, "max_completion_tokens": 50}
|
||||
response = await client.post(
|
||||
"https://api.openai.com/v1/chat/completions",
|
||||
headers=headers,
|
||||
json=payload,
|
||||
timeout=30.0,
|
||||
)
|
||||
|
||||
if response.status_code != 200:
|
||||
logger.error(f"OpenAI completion test failed: {response.status_code} - {response.text}")
|
||||
raise Exception(f"OpenAI API error: {response.status_code}")
|
||||
|
|
|
|||
|
|
@ -1,6 +1,5 @@
|
|||
import httpx
|
||||
from typing import Dict, List
|
||||
from api.provider_validation import test_embedding
|
||||
from utils.container_utils import transform_localhost_url
|
||||
from utils.logging_config import get_logger
|
||||
|
||||
|
|
@ -229,20 +228,14 @@ class ModelsService:
|
|||
f"Model: {model_name}, Capabilities: {capabilities}"
|
||||
)
|
||||
|
||||
# Check if model has required capabilities
|
||||
# Check if model has embedding capability
|
||||
has_embedding = "embedding" in capabilities
|
||||
# Check if model has required capabilities for language models
|
||||
has_completion = DESIRED_CAPABILITY in capabilities
|
||||
has_tools = TOOL_CALLING_CAPABILITY in capabilities
|
||||
|
||||
# Check if it's an embedding model
|
||||
try:
|
||||
await test_embedding("ollama", endpoint=endpoint, embedding_model=model_name)
|
||||
is_embedding = True
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to test embedding for model {model_name}: {str(e)}")
|
||||
is_embedding = False
|
||||
|
||||
if is_embedding:
|
||||
# Embedding models only need completion capability
|
||||
if has_embedding:
|
||||
# Embedding models have embedding capability
|
||||
embedding_models.append(
|
||||
{
|
||||
"value": model_name,
|
||||
|
|
@ -250,7 +243,7 @@ class ModelsService:
|
|||
"default": "nomic-embed-text" in model_name.lower(),
|
||||
}
|
||||
)
|
||||
elif not is_embedding and has_completion and has_tools:
|
||||
if has_completion and has_tools:
|
||||
# Language models need both completion and tool calling
|
||||
language_models.append(
|
||||
{
|
||||
|
|
@ -333,34 +326,6 @@ class ModelsService:
|
|||
if project_id:
|
||||
headers["Project-ID"] = project_id
|
||||
|
||||
# Validate credentials with a minimal completion request
|
||||
async with httpx.AsyncClient() as client:
|
||||
validation_url = f"{watson_endpoint}/ml/v1/text/generation"
|
||||
validation_params = {"version": "2024-09-16"}
|
||||
validation_payload = {
|
||||
"input": "test",
|
||||
"model_id": "ibm/granite-3-2b-instruct",
|
||||
"project_id": project_id,
|
||||
"parameters": {
|
||||
"max_new_tokens": 1,
|
||||
},
|
||||
}
|
||||
|
||||
validation_response = await client.post(
|
||||
validation_url,
|
||||
headers=headers,
|
||||
params=validation_params,
|
||||
json=validation_payload,
|
||||
timeout=10.0,
|
||||
)
|
||||
|
||||
if validation_response.status_code != 200:
|
||||
raise Exception(
|
||||
f"Invalid credentials or endpoint: {validation_response.status_code} - {validation_response.text}"
|
||||
)
|
||||
|
||||
logger.info("IBM Watson credentials validated successfully")
|
||||
|
||||
# Fetch foundation models using the correct endpoint
|
||||
models_url = f"{watson_endpoint}/ml/v1/foundation_model_specs"
|
||||
|
||||
|
|
@ -424,6 +389,39 @@ class ModelsService:
|
|||
}
|
||||
)
|
||||
|
||||
# Validate credentials with the first available LLM model
|
||||
if language_models:
|
||||
first_llm_model = language_models[0]["value"]
|
||||
|
||||
async with httpx.AsyncClient() as client:
|
||||
validation_url = f"{watson_endpoint}/ml/v1/text/generation"
|
||||
validation_params = {"version": "2024-09-16"}
|
||||
validation_payload = {
|
||||
"input": "test",
|
||||
"model_id": first_llm_model,
|
||||
"project_id": project_id,
|
||||
"parameters": {
|
||||
"max_new_tokens": 1,
|
||||
},
|
||||
}
|
||||
|
||||
validation_response = await client.post(
|
||||
validation_url,
|
||||
headers=headers,
|
||||
params=validation_params,
|
||||
json=validation_payload,
|
||||
timeout=10.0,
|
||||
)
|
||||
|
||||
if validation_response.status_code != 200:
|
||||
raise Exception(
|
||||
f"Invalid credentials or endpoint: {validation_response.status_code} - {validation_response.text}"
|
||||
)
|
||||
|
||||
logger.info(f"IBM Watson credentials validated successfully using model: {first_llm_model}")
|
||||
else:
|
||||
logger.warning("No language models available to validate credentials")
|
||||
|
||||
if not language_models and not embedding_models:
|
||||
raise Exception("No IBM models retrieved from API")
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue