one time migration script for existing indexes
This commit is contained in:
parent
4b29ad73af
commit
23a0efbbda
1 changed files with 104 additions and 0 deletions
104
scripts/migrate_embedding_model_field.py
Normal file
104
scripts/migrate_embedding_model_field.py
Normal file
|
|
@ -0,0 +1,104 @@
|
|||
#!/usr/bin/env python3
|
||||
"""
|
||||
Migration script to add embedding_model field to existing OpenSearch index.
|
||||
Run this once to fix the field type from text to keyword.
|
||||
"""
|
||||
import asyncio
|
||||
import sys
|
||||
from opensearchpy import AsyncOpenSearch
|
||||
from opensearchpy._async.http_aiohttp import AIOHttpConnection
|
||||
|
||||
# Add parent directory to path to import config
|
||||
sys.path.insert(0, '/home/tato/Desktop/openrag/src')
|
||||
|
||||
from config.settings import (
|
||||
OPENSEARCH_HOST,
|
||||
OPENSEARCH_PORT,
|
||||
OPENSEARCH_USERNAME,
|
||||
OPENSEARCH_PASSWORD,
|
||||
INDEX_NAME,
|
||||
)
|
||||
from utils.logging_config import get_logger
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
|
||||
async def add_embedding_model_field():
|
||||
"""Add embedding_model as keyword field to existing index"""
|
||||
|
||||
# Create admin OpenSearch client
|
||||
client = AsyncOpenSearch(
|
||||
hosts=[{"host": OPENSEARCH_HOST, "port": OPENSEARCH_PORT}],
|
||||
connection_class=AIOHttpConnection,
|
||||
scheme="https",
|
||||
use_ssl=True,
|
||||
verify_certs=False,
|
||||
ssl_assert_fingerprint=None,
|
||||
http_auth=(OPENSEARCH_USERNAME, OPENSEARCH_PASSWORD),
|
||||
http_compress=True,
|
||||
)
|
||||
|
||||
try:
|
||||
# Check if index exists
|
||||
exists = await client.indices.exists(index=INDEX_NAME)
|
||||
if not exists:
|
||||
logger.error(f"Index {INDEX_NAME} does not exist")
|
||||
return False
|
||||
|
||||
# Get current mapping
|
||||
mapping = await client.indices.get_mapping(index=INDEX_NAME)
|
||||
current_props = mapping[INDEX_NAME]["mappings"].get("properties", {})
|
||||
|
||||
# Check if embedding_model field exists
|
||||
if "embedding_model" in current_props:
|
||||
current_type = current_props["embedding_model"].get("type")
|
||||
logger.info(f"embedding_model field exists with type: {current_type}")
|
||||
|
||||
if current_type == "keyword":
|
||||
logger.info("Field is already correct type (keyword)")
|
||||
return True
|
||||
else:
|
||||
logger.warning(
|
||||
f"Field exists with wrong type: {current_type}. "
|
||||
"Cannot change field type on existing field. "
|
||||
"You need to reindex or use a different field name."
|
||||
)
|
||||
return False
|
||||
|
||||
# Add the field as keyword
|
||||
logger.info("Adding embedding_model field as keyword type")
|
||||
new_mapping = {
|
||||
"properties": {
|
||||
"embedding_model": {"type": "keyword"}
|
||||
}
|
||||
}
|
||||
|
||||
response = await client.indices.put_mapping(
|
||||
index=INDEX_NAME,
|
||||
body=new_mapping
|
||||
)
|
||||
|
||||
logger.info(f"Successfully added embedding_model field: {response}")
|
||||
|
||||
# Verify the change
|
||||
updated_mapping = await client.indices.get_mapping(index=INDEX_NAME)
|
||||
updated_props = updated_mapping[INDEX_NAME]["mappings"]["properties"]
|
||||
|
||||
if "embedding_model" in updated_props:
|
||||
field_type = updated_props["embedding_model"].get("type")
|
||||
logger.info(f"Verified: embedding_model field type is now: {field_type}")
|
||||
return field_type == "keyword"
|
||||
else:
|
||||
logger.error("Field was not added successfully")
|
||||
return False
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error adding embedding_model field: {e}")
|
||||
return False
|
||||
finally:
|
||||
await client.close()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
success = asyncio.run(add_embedding_model_field())
|
||||
sys.exit(0 if success else 1)
|
||||
Loading…
Add table
Reference in a new issue