Moved consts to settings.py

This commit is contained in:
Lucas Oliveira 2025-09-26 12:03:15 -03:00
parent 15def39f3a
commit 07921d69c5
2 changed files with 29 additions and 32 deletions

View file

@ -78,6 +78,31 @@ INDEX_NAME = "documents"
VECTOR_DIM = 1536
EMBED_MODEL = "text-embedding-3-small"
OPENAI_EMBEDDING_DIMENSIONS = {
"text-embedding-3-small": 1536,
"text-embedding-3-large": 3072,
"text-embedding-ada-002": 1536,
}
OLLAMA_EMBEDDING_DIMENSIONS = {
"nomic-embed-text": 768,
"all-minilm": 384,
"mxbai-embed-large": 1024,
}
WATSONX_EMBEDDING_DIMENSIONS = {
# IBM Models
"ibm/granite-embedding-107m-multilingual": 384,
"ibm/granite-embedding-278m-multilingual": 1024,
"ibm/slate-125m-english-rtrvr": 768,
"ibm/slate-125m-english-rtrvr-v2": 768,
"ibm/slate-30m-english-rtrvr": 384,
"ibm/slate-30m-english-rtrvr-v2": 384,
# Third Party Models
"intfloat/multilingual-e5-large": 1024,
"sentence-transformers/all-minilm-l6-v2": 384,
}
INDEX_BODY = {
"settings": {
"index": {"knn": True},

View file

@ -1,3 +1,4 @@
from config.settings import OLLAMA_EMBEDDING_DIMENSIONS, OPENAI_EMBEDDING_DIMENSIONS, VECTOR_DIM, WATSONX_EMBEDDING_DIMENSIONS
from utils.logging_config import get_logger
@ -5,48 +6,19 @@ logger = get_logger(__name__)
def get_embedding_dimensions(model_name: str) -> int:
"""Get the embedding dimensions for a given model name."""
# OpenAI models
openai_models = {
"text-embedding-3-small": 1536,
"text-embedding-3-large": 3072,
"text-embedding-ada-002": 1536,
}
# Ollama models (common embedding models)
ollama_models = {
"nomic-embed-text": 768,
"all-minilm": 384,
"mxbai-embed-large": 1024,
}
# Watson/IBM models
watsonx_models = {
# IBM Models
"ibm/granite-embedding-107m-multilingual": 384,
"ibm/granite-embedding-278m-multilingual": 1024,
"ibm/slate-125m-english-rtrvr": 768,
"ibm/slate-125m-english-rtrvr-v2": 768,
"ibm/slate-30m-english-rtrvr": 384,
"ibm/slate-30m-english-rtrvr-v2": 384,
# Third Party Models
"intfloat/multilingual-e5-large": 1024,
"sentence-transformers/all-minilm-l6-v2": 384,
}
# Check all model dictionaries
all_models = {**openai_models, **ollama_models, **watsonx_models}
all_models = {**OPENAI_EMBEDDING_DIMENSIONS, **OLLAMA_EMBEDDING_DIMENSIONS, **WATSONX_EMBEDDING_DIMENSIONS}
if model_name in all_models:
dimensions = all_models[model_name]
logger.info(f"Found dimensions for model '{model_name}': {dimensions}")
return dimensions
# Default fallback
default_dimensions = 1536
logger.warning(
f"Unknown embedding model '{model_name}', using default dimensions: {default_dimensions}"
f"Unknown embedding model '{model_name}', using default dimensions: {VECTOR_DIM}"
)
return default_dimensions
return VECTOR_DIM
def create_dynamic_index_body(embedding_model: str) -> dict: