From 4b29ad73af1bd6b2d013bf4f362114ea8da799a8 Mon Sep 17 00:00:00 2001 From: phact Date: Fri, 10 Oct 2025 09:57:45 -0400 Subject: [PATCH] WIP, non-langflow mode only --- src/utils/add_embedding_model_field.py | 104 ------------------------- src/utils/embedding_fields.py | 6 +- 2 files changed, 5 insertions(+), 105 deletions(-) delete mode 100644 src/utils/add_embedding_model_field.py diff --git a/src/utils/add_embedding_model_field.py b/src/utils/add_embedding_model_field.py deleted file mode 100644 index e482c6ca..00000000 --- a/src/utils/add_embedding_model_field.py +++ /dev/null @@ -1,104 +0,0 @@ -#!/usr/bin/env python3 -""" -Migration script to add embedding_model field to existing OpenSearch index. -Run this once to fix the field type from text to keyword. -""" -import asyncio -import sys -from opensearchpy import AsyncOpenSearch -from opensearchpy._async.http_aiohttp import AIOHttpConnection - -# Add parent directory to path to import config -sys.path.insert(0, '/home/tato/Desktop/openrag/src') - -from config.settings import ( - OPENSEARCH_HOST, - OPENSEARCH_PORT, - OPENSEARCH_USERNAME, - OPENSEARCH_PASSWORD, - INDEX_NAME, -) -from utils.logging_config import get_logger - -logger = get_logger(__name__) - - -async def add_embedding_model_field(): - """Add embedding_model as keyword field to existing index""" - - # Create admin OpenSearch client - client = AsyncOpenSearch( - hosts=[{"host": OPENSEARCH_HOST, "port": OPENSEARCH_PORT}], - connection_class=AIOHttpConnection, - scheme="https", - use_ssl=True, - verify_certs=False, - ssl_assert_fingerprint=None, - http_auth=(OPENSEARCH_USERNAME, OPENSEARCH_PASSWORD), - http_compress=True, - ) - - try: - # Check if index exists - exists = await client.indices.exists(index=INDEX_NAME) - if not exists: - logger.error(f"Index {INDEX_NAME} does not exist") - return False - - # Get current mapping - mapping = await client.indices.get_mapping(index=INDEX_NAME) - current_props = mapping[INDEX_NAME]["mappings"].get("properties", {}) - - # Check if embedding_model field exists - if "embedding_model" in current_props: - current_type = current_props["embedding_model"].get("type") - logger.info(f"embedding_model field exists with type: {current_type}") - - if current_type == "keyword": - logger.info("Field is already correct type (keyword)") - return True - else: - logger.warning( - f"Field exists with wrong type: {current_type}. " - "Cannot change field type on existing field. " - "You need to reindex or use a different field name." - ) - return False - - # Add the field as keyword - logger.info("Adding embedding_model field as keyword type") - new_mapping = { - "properties": { - "embedding_model": {"type": "keyword"} - } - } - - response = await client.indices.put_mapping( - index=INDEX_NAME, - body=new_mapping - ) - - logger.info(f"Successfully added embedding_model field: {response}") - - # Verify the change - updated_mapping = await client.indices.get_mapping(index=INDEX_NAME) - updated_props = updated_mapping[INDEX_NAME]["mappings"]["properties"] - - if "embedding_model" in updated_props: - field_type = updated_props["embedding_model"].get("type") - logger.info(f"Verified: embedding_model field type is now: {field_type}") - return field_type == "keyword" - else: - logger.error("Field was not added successfully") - return False - - except Exception as e: - logger.error(f"Error adding embedding_model field: {e}") - return False - finally: - await client.close() - - -if __name__ == "__main__": - success = asyncio.run(add_embedding_model_field()) - sys.exit(0 if success else 1) diff --git a/src/utils/embedding_fields.py b/src/utils/embedding_fields.py index 66663750..0ba4f449 100644 --- a/src/utils/embedding_fields.py +++ b/src/utils/embedding_fields.py @@ -100,7 +100,7 @@ async def ensure_embedding_field_exists( dimensions=dimensions, ) - # Define the field mapping + # Define the field mapping for both the vector field and the tracking field mapping = { "properties": { field_name: { @@ -112,6 +112,10 @@ async def ensure_embedding_field_exists( "space_type": "l2", "parameters": {"ef_construction": 100, "m": 16}, }, + }, + # Also ensure the embedding_model tracking field exists as keyword + "embedding_model": { + "type": "keyword" } } }