* add container utils * added localhost url to settings * added localhost_url as a constant * added localhost_url to get settings query * make ollama onboarding have localhost url by default * make endpoint be changed in models service and in onboarding backend instead of onboarding screen * fixed embedding dimensions to get stripped model * make config come as localhost but global variable be set as the transformed endpoint * remove setting ollama url since it comes from the global variable * use localhost again on ollama --------- Co-authored-by: Lucas Oliveira <lucas.edu.oli@hotmail.com>
66 lines
No EOL
2.4 KiB
Python
66 lines
No EOL
2.4 KiB
Python
from config.settings import OLLAMA_EMBEDDING_DIMENSIONS, OPENAI_EMBEDDING_DIMENSIONS, VECTOR_DIM, WATSONX_EMBEDDING_DIMENSIONS
|
|
from utils.logging_config import get_logger
|
|
|
|
|
|
logger = get_logger(__name__)
|
|
|
|
def get_embedding_dimensions(model_name: str) -> int:
|
|
"""Get the embedding dimensions for a given model name."""
|
|
|
|
# Check all model dictionaries
|
|
all_models = {**OPENAI_EMBEDDING_DIMENSIONS, **OLLAMA_EMBEDDING_DIMENSIONS, **WATSONX_EMBEDDING_DIMENSIONS}
|
|
|
|
model_name = model_name.lower().strip().split(":")[0]
|
|
|
|
if model_name in all_models:
|
|
dimensions = all_models[model_name]
|
|
logger.info(f"Found dimensions for model '{model_name}': {dimensions}")
|
|
return dimensions
|
|
|
|
logger.warning(
|
|
f"Unknown embedding model '{model_name}', using default dimensions: {VECTOR_DIM}"
|
|
)
|
|
return VECTOR_DIM
|
|
|
|
|
|
def create_dynamic_index_body(embedding_model: str) -> dict:
|
|
"""Create a dynamic index body configuration based on the embedding model."""
|
|
dimensions = get_embedding_dimensions(embedding_model)
|
|
|
|
return {
|
|
"settings": {
|
|
"index": {"knn": True},
|
|
"number_of_shards": 1,
|
|
"number_of_replicas": 1,
|
|
},
|
|
"mappings": {
|
|
"properties": {
|
|
"document_id": {"type": "keyword"},
|
|
"filename": {"type": "keyword"},
|
|
"mimetype": {"type": "keyword"},
|
|
"page": {"type": "integer"},
|
|
"text": {"type": "text"},
|
|
"chunk_embedding": {
|
|
"type": "knn_vector",
|
|
"dimension": dimensions,
|
|
"method": {
|
|
"name": "disk_ann",
|
|
"engine": "jvector",
|
|
"space_type": "l2",
|
|
"parameters": {"ef_construction": 100, "m": 16},
|
|
},
|
|
},
|
|
"source_url": {"type": "keyword"},
|
|
"connector_type": {"type": "keyword"},
|
|
"owner": {"type": "keyword"},
|
|
"allowed_users": {"type": "keyword"},
|
|
"allowed_groups": {"type": "keyword"},
|
|
"user_permissions": {"type": "object"},
|
|
"group_permissions": {"type": "object"},
|
|
"created_time": {"type": "date"},
|
|
"modified_time": {"type": "date"},
|
|
"indexed_time": {"type": "date"},
|
|
"metadata": {"type": "object"},
|
|
}
|
|
},
|
|
} |