import httpx from typing import Dict, List from utils.container_utils import transform_localhost_url from utils.logging_config import get_logger logger = get_logger(__name__) class ModelsService: """Service for fetching available models from different AI providers""" OPENAI_TOOL_CALLING_MODELS = [ "gpt-5", "gpt-5-mini", "gpt-5-nano", "gpt-4o-mini", "gpt-4o", "gpt-4.1", "gpt-4.1-mini", "gpt-4.1-nano", "gpt-4-turbo", "gpt-4-turbo-preview", "gpt-4", "gpt-3.5-turbo", "o1", "o3-mini", "o3", "o3-pro", "o4-mini", "o4-mini-high", ] ANTHROPIC_MODELS = [ "claude-sonnet-4-5-20250929", "claude-opus-4-1-20250805", "claude-opus-4-20250514", "claude-sonnet-4-20250514", "claude-3-7-sonnet-latest", "claude-3-5-sonnet-latest", "claude-3-5-haiku-latest", "claude-3-opus-latest", "claude-3-sonnet-20240229", "claude-3-5-sonnet-20240620", "claude-3-5-sonnet-20241022", "claude-3-5-haiku-20241022", "claude-3-haiku-20240307", ] def __init__(self): self.session_manager = None async def get_openai_models(self, api_key: str) -> Dict[str, List[Dict[str, str]]]: """Fetch available models from OpenAI API with lightweight validation""" try: headers = { "Authorization": f"Bearer {api_key}", "Content-Type": "application/json", } async with httpx.AsyncClient() as client: # Lightweight validation: just check if API key is valid # This doesn't consume credits, only validates the key response = await client.get( "https://api.openai.com/v1/models", headers=headers, timeout=10.0 ) if response.status_code == 200: data = response.json() models = data.get("data", []) # Filter for relevant models language_models = [] embedding_models = [] for model in models: model_id = model.get("id", "") # Language models (GPT models) if model_id in self.OPENAI_TOOL_CALLING_MODELS: language_models.append( { "value": model_id, "label": model_id, "default": model_id == "gpt-4o", } ) # Embedding models elif "text-embedding" in model_id: embedding_models.append( { "value": model_id, "label": model_id, "default": model_id == "text-embedding-3-small", } ) # Sort by name and ensure defaults are first language_models.sort( key=lambda x: (not x.get("default", False), x["value"]) ) embedding_models.sort( key=lambda x: (not x.get("default", False), x["value"]) ) logger.info("OpenAI API key validated successfully without consuming credits") return { "language_models": language_models, "embedding_models": embedding_models, } else: logger.error(f"Failed to fetch OpenAI models: {response.status_code}") raise Exception( f"OpenAI API returned status code {response.status_code}, {response.text}" ) except Exception as e: logger.error(f"Error fetching OpenAI models: {str(e)}") raise async def get_anthropic_models(self, api_key: str) -> Dict[str, List[Dict[str, str]]]: """Fetch available models from Anthropic API""" try: headers = { "x-api-key": api_key, "anthropic-version": "2023-06-01", "Content-Type": "application/json", } # Anthropic doesn't have a models list endpoint, so we'll validate the key # and return our curated list of models async with httpx.AsyncClient() as client: # Validate the API key with a minimal messages request validation_payload = { "model": "claude-3-5-haiku-latest", "max_tokens": 1, "messages": [{"role": "user", "content": "test"}], } response = await client.post( "https://api.anthropic.com/v1/messages", headers=headers, json=validation_payload, timeout=10.0, ) if response.status_code == 200: # API key is valid, return our curated list language_models = [] for model_id in self.ANTHROPIC_MODELS: language_models.append( { "value": model_id, "label": model_id, "default": model_id == "claude-sonnet-4-5-20250929", } ) # Sort by default first, then by name language_models.sort( key=lambda x: (not x.get("default", False), x["value"]) ) return { "language_models": language_models, "embedding_models": [], # Anthropic doesn't provide embedding models } else: logger.error(f"Failed to validate Anthropic API key: {response.status_code}") raise Exception( f"Anthropic API returned status code {response.status_code}" ) except Exception as e: logger.error(f"Error fetching Anthropic models: {str(e)}") raise async def get_ollama_models( self, endpoint: str = None ) -> Dict[str, List[Dict[str, str]]]: """Fetch available models from Ollama API with tool calling capabilities for language models""" try: # Use provided endpoint or default ollama_url = transform_localhost_url(endpoint) # API endpoints tags_url = f"{ollama_url}/api/tags" show_url = f"{ollama_url}/api/show" # Constants for JSON parsing JSON_MODELS_KEY = "models" JSON_NAME_KEY = "name" JSON_CAPABILITIES_KEY = "capabilities" DESIRED_CAPABILITY = "completion" TOOL_CALLING_CAPABILITY = "tools" async with httpx.AsyncClient() as client: # Fetch available models tags_response = await client.get(tags_url, timeout=10.0) tags_response.raise_for_status() models_data = tags_response.json() logger.debug(f"Available models: {models_data}") # Filter models based on capabilities language_models = [] embedding_models = [] models = models_data.get(JSON_MODELS_KEY, []) for model in models: model_name = model.get(JSON_NAME_KEY, "") if not model_name: continue logger.debug(f"Checking model: {model_name}") # Check model capabilities payload = {"model": model_name} try: show_response = await client.post( show_url, json=payload, timeout=10.0 ) show_response.raise_for_status() json_data = show_response.json() capabilities = json_data.get(JSON_CAPABILITIES_KEY, []) logger.debug( f"Model: {model_name}, Capabilities: {capabilities}" ) # Check if model has embedding capability has_embedding = "embedding" in capabilities # Check if model has required capabilities for language models has_completion = DESIRED_CAPABILITY in capabilities has_tools = TOOL_CALLING_CAPABILITY in capabilities if has_embedding: # Embedding models have embedding capability embedding_models.append( { "value": model_name, "label": model_name, "default": "nomic-embed-text" in model_name.lower(), } ) if has_completion and has_tools: # Language models need both completion and tool calling language_models.append( { "value": model_name, "label": model_name, "default": "gpt-oss" in model_name.lower(), } ) except Exception as e: logger.warning( f"Failed to check capabilities for model {model_name}: {str(e)}" ) continue # Remove duplicates and sort language_models = list( {m["value"]: m for m in language_models}.values() ) embedding_models = list( {m["value"]: m for m in embedding_models}.values() ) language_models.sort( key=lambda x: (not x.get("default", False), x["value"]) ) embedding_models.sort(key=lambda x: x["value"]) logger.info( f"Found {len(language_models)} language models with tool calling and {len(embedding_models)} embedding models" ) return { "language_models": language_models, "embedding_models": embedding_models, } except Exception as e: logger.error(f"Error fetching Ollama models: {str(e)}") raise async def get_ibm_models( self, endpoint: str = None, api_key: str = None, project_id: str = None ) -> Dict[str, List[Dict[str, str]]]: """Fetch available models from IBM Watson API""" try: # Use provided endpoint or default watson_endpoint = endpoint # Get bearer token from IBM IAM bearer_token = None if api_key: async with httpx.AsyncClient() as client: token_response = await client.post( "https://iam.cloud.ibm.com/identity/token", headers={"Content-Type": "application/x-www-form-urlencoded"}, data={ "grant_type": "urn:ibm:params:oauth:grant-type:apikey", "apikey": api_key, }, timeout=10.0, ) if token_response.status_code != 200: raise Exception( f"Failed to get IBM IAM token: {token_response.status_code} - {token_response.text}" ) token_data = token_response.json() bearer_token = token_data.get("access_token") if not bearer_token: raise Exception("No access_token in IBM IAM response") # Prepare headers for authentication headers = { "Content-Type": "application/json", } if bearer_token: headers["Authorization"] = f"Bearer {bearer_token}" if project_id: headers["Project-ID"] = project_id # Fetch foundation models using the correct endpoint models_url = f"{watson_endpoint}/ml/v1/foundation_model_specs" language_models = [] embedding_models = [] async with httpx.AsyncClient() as client: # Fetch text chat models text_params = { "version": "2024-09-16", "filters": "function_text_chat,!lifecycle_withdrawn", } if project_id: text_params["project_id"] = project_id text_response = await client.get( models_url, params=text_params, headers=headers, timeout=10.0 ) if text_response.status_code == 200: text_data = text_response.json() text_models = text_data.get("resources", []) logger.info(f"Retrieved {len(text_models)} text chat models from Watson API") for i, model in enumerate(text_models): model_id = model.get("model_id", "") model_name = model.get("name", model_id) language_models.append( { "value": model_id, "label": model_name or model_id, "default": i == 0, # First model is default } ) else: logger.warning( f"Failed to retrieve text chat models. Status: {text_response.status_code}, " f"Response: {text_response.text[:200]}" ) # Fetch embedding models embed_params = { "version": "2024-09-16", "filters": "function_embedding,!lifecycle_withdrawn", } if project_id: embed_params["project_id"] = project_id embed_response = await client.get( models_url, params=embed_params, headers=headers, timeout=10.0 ) if embed_response.status_code == 200: embed_data = embed_response.json() embed_models = embed_data.get("resources", []) logger.info(f"Retrieved {len(embed_models)} embedding models from Watson API") for i, model in enumerate(embed_models): model_id = model.get("model_id", "") model_name = model.get("name", model_id) embedding_models.append( { "value": model_id, "label": model_name or model_id, "default": i == 0, # First model is default } ) else: logger.warning( f"Failed to retrieve embedding models. Status: {embed_response.status_code}, " f"Response: {embed_response.text[:200]}" ) # Lightweight validation: API key is already validated by successfully getting bearer token # No need to make a generation request that consumes credits if bearer_token: logger.info("IBM Watson API key validated successfully without consuming credits") else: logger.warning("No bearer token available - API key validation may have failed") if not language_models and not embedding_models: # Provide more specific error message about missing models error_msg = ( "API key is valid, but no models are available. " "This usually means your Watson Machine Learning (WML) project is not properly configured. " "Please ensure: (1) Your watsonx.ai project is associated with a WML service instance, " "and (2) The project has access to foundation models. " "Visit your watsonx.ai project settings to configure the WML service association." ) raise Exception(error_msg) return { "language_models": language_models, "embedding_models": embedding_models, } except Exception as e: logger.error(f"Error fetching IBM models: {str(e)}") raise