WIP, non-langflow mode only
This commit is contained in:
parent
102c17d7ec
commit
4b29ad73af
2 changed files with 5 additions and 105 deletions
|
|
@ -1,104 +0,0 @@
|
||||||
#!/usr/bin/env python3
|
|
||||||
"""
|
|
||||||
Migration script to add embedding_model field to existing OpenSearch index.
|
|
||||||
Run this once to fix the field type from text to keyword.
|
|
||||||
"""
|
|
||||||
import asyncio
|
|
||||||
import sys
|
|
||||||
from opensearchpy import AsyncOpenSearch
|
|
||||||
from opensearchpy._async.http_aiohttp import AIOHttpConnection
|
|
||||||
|
|
||||||
# Add parent directory to path to import config
|
|
||||||
sys.path.insert(0, '/home/tato/Desktop/openrag/src')
|
|
||||||
|
|
||||||
from config.settings import (
|
|
||||||
OPENSEARCH_HOST,
|
|
||||||
OPENSEARCH_PORT,
|
|
||||||
OPENSEARCH_USERNAME,
|
|
||||||
OPENSEARCH_PASSWORD,
|
|
||||||
INDEX_NAME,
|
|
||||||
)
|
|
||||||
from utils.logging_config import get_logger
|
|
||||||
|
|
||||||
logger = get_logger(__name__)
|
|
||||||
|
|
||||||
|
|
||||||
async def add_embedding_model_field():
|
|
||||||
"""Add embedding_model as keyword field to existing index"""
|
|
||||||
|
|
||||||
# Create admin OpenSearch client
|
|
||||||
client = AsyncOpenSearch(
|
|
||||||
hosts=[{"host": OPENSEARCH_HOST, "port": OPENSEARCH_PORT}],
|
|
||||||
connection_class=AIOHttpConnection,
|
|
||||||
scheme="https",
|
|
||||||
use_ssl=True,
|
|
||||||
verify_certs=False,
|
|
||||||
ssl_assert_fingerprint=None,
|
|
||||||
http_auth=(OPENSEARCH_USERNAME, OPENSEARCH_PASSWORD),
|
|
||||||
http_compress=True,
|
|
||||||
)
|
|
||||||
|
|
||||||
try:
|
|
||||||
# Check if index exists
|
|
||||||
exists = await client.indices.exists(index=INDEX_NAME)
|
|
||||||
if not exists:
|
|
||||||
logger.error(f"Index {INDEX_NAME} does not exist")
|
|
||||||
return False
|
|
||||||
|
|
||||||
# Get current mapping
|
|
||||||
mapping = await client.indices.get_mapping(index=INDEX_NAME)
|
|
||||||
current_props = mapping[INDEX_NAME]["mappings"].get("properties", {})
|
|
||||||
|
|
||||||
# Check if embedding_model field exists
|
|
||||||
if "embedding_model" in current_props:
|
|
||||||
current_type = current_props["embedding_model"].get("type")
|
|
||||||
logger.info(f"embedding_model field exists with type: {current_type}")
|
|
||||||
|
|
||||||
if current_type == "keyword":
|
|
||||||
logger.info("Field is already correct type (keyword)")
|
|
||||||
return True
|
|
||||||
else:
|
|
||||||
logger.warning(
|
|
||||||
f"Field exists with wrong type: {current_type}. "
|
|
||||||
"Cannot change field type on existing field. "
|
|
||||||
"You need to reindex or use a different field name."
|
|
||||||
)
|
|
||||||
return False
|
|
||||||
|
|
||||||
# Add the field as keyword
|
|
||||||
logger.info("Adding embedding_model field as keyword type")
|
|
||||||
new_mapping = {
|
|
||||||
"properties": {
|
|
||||||
"embedding_model": {"type": "keyword"}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
response = await client.indices.put_mapping(
|
|
||||||
index=INDEX_NAME,
|
|
||||||
body=new_mapping
|
|
||||||
)
|
|
||||||
|
|
||||||
logger.info(f"Successfully added embedding_model field: {response}")
|
|
||||||
|
|
||||||
# Verify the change
|
|
||||||
updated_mapping = await client.indices.get_mapping(index=INDEX_NAME)
|
|
||||||
updated_props = updated_mapping[INDEX_NAME]["mappings"]["properties"]
|
|
||||||
|
|
||||||
if "embedding_model" in updated_props:
|
|
||||||
field_type = updated_props["embedding_model"].get("type")
|
|
||||||
logger.info(f"Verified: embedding_model field type is now: {field_type}")
|
|
||||||
return field_type == "keyword"
|
|
||||||
else:
|
|
||||||
logger.error("Field was not added successfully")
|
|
||||||
return False
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Error adding embedding_model field: {e}")
|
|
||||||
return False
|
|
||||||
finally:
|
|
||||||
await client.close()
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
success = asyncio.run(add_embedding_model_field())
|
|
||||||
sys.exit(0 if success else 1)
|
|
||||||
|
|
@ -100,7 +100,7 @@ async def ensure_embedding_field_exists(
|
||||||
dimensions=dimensions,
|
dimensions=dimensions,
|
||||||
)
|
)
|
||||||
|
|
||||||
# Define the field mapping
|
# Define the field mapping for both the vector field and the tracking field
|
||||||
mapping = {
|
mapping = {
|
||||||
"properties": {
|
"properties": {
|
||||||
field_name: {
|
field_name: {
|
||||||
|
|
@ -112,6 +112,10 @@ async def ensure_embedding_field_exists(
|
||||||
"space_type": "l2",
|
"space_type": "l2",
|
||||||
"parameters": {"ef_construction": 100, "m": 16},
|
"parameters": {"ef_construction": 100, "m": 16},
|
||||||
},
|
},
|
||||||
|
},
|
||||||
|
# Also ensure the embedding_model tracking field exists as keyword
|
||||||
|
"embedding_model": {
|
||||||
|
"type": "keyword"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue