Moved consts to settings.py
This commit is contained in:
parent
15def39f3a
commit
07921d69c5
2 changed files with 29 additions and 32 deletions
|
|
@ -78,6 +78,31 @@ INDEX_NAME = "documents"
|
||||||
VECTOR_DIM = 1536
|
VECTOR_DIM = 1536
|
||||||
EMBED_MODEL = "text-embedding-3-small"
|
EMBED_MODEL = "text-embedding-3-small"
|
||||||
|
|
||||||
|
OPENAI_EMBEDDING_DIMENSIONS = {
|
||||||
|
"text-embedding-3-small": 1536,
|
||||||
|
"text-embedding-3-large": 3072,
|
||||||
|
"text-embedding-ada-002": 1536,
|
||||||
|
}
|
||||||
|
|
||||||
|
OLLAMA_EMBEDDING_DIMENSIONS = {
|
||||||
|
"nomic-embed-text": 768,
|
||||||
|
"all-minilm": 384,
|
||||||
|
"mxbai-embed-large": 1024,
|
||||||
|
}
|
||||||
|
|
||||||
|
WATSONX_EMBEDDING_DIMENSIONS = {
|
||||||
|
# IBM Models
|
||||||
|
"ibm/granite-embedding-107m-multilingual": 384,
|
||||||
|
"ibm/granite-embedding-278m-multilingual": 1024,
|
||||||
|
"ibm/slate-125m-english-rtrvr": 768,
|
||||||
|
"ibm/slate-125m-english-rtrvr-v2": 768,
|
||||||
|
"ibm/slate-30m-english-rtrvr": 384,
|
||||||
|
"ibm/slate-30m-english-rtrvr-v2": 384,
|
||||||
|
# Third Party Models
|
||||||
|
"intfloat/multilingual-e5-large": 1024,
|
||||||
|
"sentence-transformers/all-minilm-l6-v2": 384,
|
||||||
|
}
|
||||||
|
|
||||||
INDEX_BODY = {
|
INDEX_BODY = {
|
||||||
"settings": {
|
"settings": {
|
||||||
"index": {"knn": True},
|
"index": {"knn": True},
|
||||||
|
|
|
||||||
|
|
@ -1,3 +1,4 @@
|
||||||
|
from config.settings import OLLAMA_EMBEDDING_DIMENSIONS, OPENAI_EMBEDDING_DIMENSIONS, VECTOR_DIM, WATSONX_EMBEDDING_DIMENSIONS
|
||||||
from utils.logging_config import get_logger
|
from utils.logging_config import get_logger
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -5,48 +6,19 @@ logger = get_logger(__name__)
|
||||||
|
|
||||||
def get_embedding_dimensions(model_name: str) -> int:
|
def get_embedding_dimensions(model_name: str) -> int:
|
||||||
"""Get the embedding dimensions for a given model name."""
|
"""Get the embedding dimensions for a given model name."""
|
||||||
# OpenAI models
|
|
||||||
openai_models = {
|
|
||||||
"text-embedding-3-small": 1536,
|
|
||||||
"text-embedding-3-large": 3072,
|
|
||||||
"text-embedding-ada-002": 1536,
|
|
||||||
}
|
|
||||||
|
|
||||||
# Ollama models (common embedding models)
|
|
||||||
ollama_models = {
|
|
||||||
"nomic-embed-text": 768,
|
|
||||||
"all-minilm": 384,
|
|
||||||
"mxbai-embed-large": 1024,
|
|
||||||
}
|
|
||||||
|
|
||||||
# Watson/IBM models
|
|
||||||
watsonx_models = {
|
|
||||||
# IBM Models
|
|
||||||
"ibm/granite-embedding-107m-multilingual": 384,
|
|
||||||
"ibm/granite-embedding-278m-multilingual": 1024,
|
|
||||||
"ibm/slate-125m-english-rtrvr": 768,
|
|
||||||
"ibm/slate-125m-english-rtrvr-v2": 768,
|
|
||||||
"ibm/slate-30m-english-rtrvr": 384,
|
|
||||||
"ibm/slate-30m-english-rtrvr-v2": 384,
|
|
||||||
# Third Party Models
|
|
||||||
"intfloat/multilingual-e5-large": 1024,
|
|
||||||
"sentence-transformers/all-minilm-l6-v2": 384,
|
|
||||||
}
|
|
||||||
|
|
||||||
# Check all model dictionaries
|
# Check all model dictionaries
|
||||||
all_models = {**openai_models, **ollama_models, **watsonx_models}
|
all_models = {**OPENAI_EMBEDDING_DIMENSIONS, **OLLAMA_EMBEDDING_DIMENSIONS, **WATSONX_EMBEDDING_DIMENSIONS}
|
||||||
|
|
||||||
if model_name in all_models:
|
if model_name in all_models:
|
||||||
dimensions = all_models[model_name]
|
dimensions = all_models[model_name]
|
||||||
logger.info(f"Found dimensions for model '{model_name}': {dimensions}")
|
logger.info(f"Found dimensions for model '{model_name}': {dimensions}")
|
||||||
return dimensions
|
return dimensions
|
||||||
|
|
||||||
# Default fallback
|
|
||||||
default_dimensions = 1536
|
|
||||||
logger.warning(
|
logger.warning(
|
||||||
f"Unknown embedding model '{model_name}', using default dimensions: {default_dimensions}"
|
f"Unknown embedding model '{model_name}', using default dimensions: {VECTOR_DIM}"
|
||||||
)
|
)
|
||||||
return default_dimensions
|
return VECTOR_DIM
|
||||||
|
|
||||||
|
|
||||||
def create_dynamic_index_body(embedding_model: str) -> dict:
|
def create_dynamic_index_body(embedding_model: str) -> dict:
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue