openrag/src/services/models_service.py
2025-10-30 15:06:44 -03:00

363 lines
14 KiB
Python

import httpx
from typing import Dict, List
from api.provider_validation import test_embedding
from utils.container_utils import transform_localhost_url
from utils.logging_config import get_logger
logger = get_logger(__name__)
class ModelsService:
"""Service for fetching available models from different AI providers"""
OPENAI_TOOL_CALLING_MODELS = [
"gpt-5",
"gpt-5-mini",
"gpt-5-nano",
"gpt-4o-mini",
"gpt-4o",
"gpt-4.1",
"gpt-4.1-mini",
"gpt-4.1-nano",
"gpt-4-turbo",
"gpt-4-turbo-preview",
"gpt-4",
"gpt-3.5-turbo",
"o1",
"o3-mini",
"o3",
"o3-pro",
"o4-mini",
"o4-mini-high",
]
def __init__(self):
self.session_manager = None
async def get_openai_models(self, api_key: str) -> Dict[str, List[Dict[str, str]]]:
"""Fetch available models from OpenAI API"""
try:
headers = {
"Authorization": f"Bearer {api_key}",
"Content-Type": "application/json",
}
async with httpx.AsyncClient() as client:
response = await client.get(
"https://api.openai.com/v1/models", headers=headers, timeout=10.0
)
if response.status_code == 200:
data = response.json()
models = data.get("data", [])
# Filter for relevant models
language_models = []
embedding_models = []
for model in models:
model_id = model.get("id", "")
# Language models (GPT models)
if model_id in self.OPENAI_TOOL_CALLING_MODELS:
language_models.append(
{
"value": model_id,
"label": model_id,
"default": model_id == "gpt-5",
}
)
# Embedding models
elif "text-embedding" in model_id:
embedding_models.append(
{
"value": model_id,
"label": model_id,
"default": model_id == "text-embedding-3-small",
}
)
# Sort by name and ensure defaults are first
language_models.sort(
key=lambda x: (not x.get("default", False), x["value"])
)
embedding_models.sort(
key=lambda x: (not x.get("default", False), x["value"])
)
return {
"language_models": language_models,
"embedding_models": embedding_models,
}
else:
logger.error(f"Failed to fetch OpenAI models: {response.status_code}")
raise Exception(
f"OpenAI API returned status code {response.status_code}"
)
except Exception as e:
logger.error(f"Error fetching OpenAI models: {str(e)}")
raise
async def get_ollama_models(
self, endpoint: str = None
) -> Dict[str, List[Dict[str, str]]]:
"""Fetch available models from Ollama API with tool calling capabilities for language models"""
try:
# Use provided endpoint or default
ollama_url = transform_localhost_url(endpoint)
# API endpoints
tags_url = f"{ollama_url}/api/tags"
show_url = f"{ollama_url}/api/show"
# Constants for JSON parsing
JSON_MODELS_KEY = "models"
JSON_NAME_KEY = "name"
JSON_CAPABILITIES_KEY = "capabilities"
DESIRED_CAPABILITY = "completion"
TOOL_CALLING_CAPABILITY = "tools"
async with httpx.AsyncClient() as client:
# Fetch available models
tags_response = await client.get(tags_url, timeout=10.0)
tags_response.raise_for_status()
models_data = tags_response.json()
logger.debug(f"Available models: {models_data}")
# Filter models based on capabilities
language_models = []
embedding_models = []
models = models_data.get(JSON_MODELS_KEY, [])
for model in models:
model_name = model.get(JSON_NAME_KEY, "")
if not model_name:
continue
logger.debug(f"Checking model: {model_name}")
# Check model capabilities
payload = {"model": model_name}
try:
show_response = await client.post(
show_url, json=payload, timeout=10.0
)
show_response.raise_for_status()
json_data = show_response.json()
capabilities = json_data.get(JSON_CAPABILITIES_KEY, [])
logger.debug(
f"Model: {model_name}, Capabilities: {capabilities}"
)
# Check if model has required capabilities
has_completion = DESIRED_CAPABILITY in capabilities
has_tools = TOOL_CALLING_CAPABILITY in capabilities
# Check if it's an embedding model
try:
await test_embedding("ollama", endpoint=endpoint, embedding_model=model_name)
is_embedding = True
except Exception as e:
logger.warning(f"Failed to test embedding for model {model_name}: {str(e)}")
is_embedding = False
if is_embedding:
# Embedding models only need completion capability
embedding_models.append(
{
"value": model_name,
"label": model_name,
"default": "nomic-embed-text" in model_name.lower(),
}
)
elif not is_embedding and has_completion and has_tools:
# Language models need both completion and tool calling
language_models.append(
{
"value": model_name,
"label": model_name,
"default": "gpt-oss" in model_name.lower(),
}
)
except Exception as e:
logger.warning(
f"Failed to check capabilities for model {model_name}: {str(e)}"
)
continue
# Remove duplicates and sort
language_models = list(
{m["value"]: m for m in language_models}.values()
)
embedding_models = list(
{m["value"]: m for m in embedding_models}.values()
)
language_models.sort(
key=lambda x: (not x.get("default", False), x["value"])
)
embedding_models.sort(key=lambda x: x["value"])
logger.info(
f"Found {len(language_models)} language models with tool calling and {len(embedding_models)} embedding models"
)
return {
"language_models": language_models,
"embedding_models": embedding_models,
}
except Exception as e:
logger.error(f"Error fetching Ollama models: {str(e)}")
raise
async def get_ibm_models(
self, endpoint: str = None, api_key: str = None, project_id: str = None
) -> Dict[str, List[Dict[str, str]]]:
"""Fetch available models from IBM Watson API"""
try:
# Use provided endpoint or default
watson_endpoint = endpoint
# Get bearer token from IBM IAM
bearer_token = None
if api_key:
async with httpx.AsyncClient() as client:
token_response = await client.post(
"https://iam.cloud.ibm.com/identity/token",
headers={"Content-Type": "application/x-www-form-urlencoded"},
data={
"grant_type": "urn:ibm:params:oauth:grant-type:apikey",
"apikey": api_key,
},
timeout=10.0,
)
if token_response.status_code != 200:
raise Exception(
f"Failed to get IBM IAM token: {token_response.status_code} - {token_response.text}"
)
token_data = token_response.json()
bearer_token = token_data.get("access_token")
if not bearer_token:
raise Exception("No access_token in IBM IAM response")
# Prepare headers for authentication
headers = {
"Content-Type": "application/json",
}
if bearer_token:
headers["Authorization"] = f"Bearer {bearer_token}"
if project_id:
headers["Project-ID"] = project_id
# Validate credentials with a minimal completion request
async with httpx.AsyncClient() as client:
validation_url = f"{watson_endpoint}/ml/v1/text/generation"
validation_params = {"version": "2024-09-16"}
validation_payload = {
"input": "test",
"model_id": "ibm/granite-3-2b-instruct",
"project_id": project_id,
"parameters": {
"max_new_tokens": 1,
},
}
validation_response = await client.post(
validation_url,
headers=headers,
params=validation_params,
json=validation_payload,
timeout=10.0,
)
if validation_response.status_code != 200:
raise Exception(
f"Invalid credentials or endpoint: {validation_response.status_code} - {validation_response.text}"
)
logger.info("IBM Watson credentials validated successfully")
# Fetch foundation models using the correct endpoint
models_url = f"{watson_endpoint}/ml/v1/foundation_model_specs"
language_models = []
embedding_models = []
async with httpx.AsyncClient() as client:
# Fetch text chat models
text_params = {
"version": "2024-09-16",
"filters": "function_text_chat,!lifecycle_withdrawn",
}
if project_id:
text_params["project_id"] = project_id
text_response = await client.get(
models_url, params=text_params, headers=headers, timeout=10.0
)
if text_response.status_code == 200:
text_data = text_response.json()
text_models = text_data.get("resources", [])
for i, model in enumerate(text_models):
model_id = model.get("model_id", "")
model_name = model.get("name", model_id)
language_models.append(
{
"value": model_id,
"label": model_name or model_id,
"default": i == 0, # First model is default
}
)
# Fetch embedding models
embed_params = {
"version": "2024-09-16",
"filters": "function_embedding,!lifecycle_withdrawn",
}
if project_id:
embed_params["project_id"] = project_id
embed_response = await client.get(
models_url, params=embed_params, headers=headers, timeout=10.0
)
if embed_response.status_code == 200:
embed_data = embed_response.json()
embed_models = embed_data.get("resources", [])
for i, model in enumerate(embed_models):
model_id = model.get("model_id", "")
model_name = model.get("name", model_id)
embedding_models.append(
{
"value": model_id,
"label": model_name or model_id,
"default": i == 0, # First model is default
}
)
if not language_models and not embedding_models:
raise Exception("No IBM models retrieved from API")
return {
"language_models": language_models,
"embedding_models": embedding_models,
}
except Exception as e:
logger.error(f"Error fetching IBM models: {str(e)}")
raise