From ddf802ff54e7f245545dab49a8be202bbb14f36c Mon Sep 17 00:00:00 2001 From: Igor Ilic Date: Thu, 27 Nov 2025 18:38:00 +0100 Subject: [PATCH] chore: Add migration of unique constraint for SQLite --- ...d2b2_expand_dataset_database_with_json_.py | 201 ++++++++++++++++++ 1 file changed, 201 insertions(+) diff --git a/alembic/versions/46a6ce2bd2b2_expand_dataset_database_with_json_.py b/alembic/versions/46a6ce2bd2b2_expand_dataset_database_with_json_.py index a9e804f88..e15a98b7c 100644 --- a/alembic/versions/46a6ce2bd2b2_expand_dataset_database_with_json_.py +++ b/alembic/versions/46a6ce2bd2b2_expand_dataset_database_with_json_.py @@ -18,6 +18,10 @@ down_revision: Union[str, None] = "76625596c5c3" branch_labels: Union[str, Sequence[str], None] = None depends_on: Union[str, Sequence[str], None] = None +graph_constraint_name = "dataset_database_graph_database_name_key" +vector_constraint_name = "dataset_database_vector_database_name_key" +TABLE_NAME = "dataset_database" + def _get_column(inspector, table, name, schema=None): for col in inspector.get_columns(table, schema=schema): @@ -26,10 +30,154 @@ def _get_column(inspector, table, name, schema=None): return None +def _recreate_table_without_unique_constraint_sqlite(op, insp): + """ + SQLite cannot drop unique constraints on individual columns. We must: + 1. Create a new table without the unique constraints. + 2. Copy data from the old table. + 3. Drop the old table. + 4. Rename the new table. + """ + conn = op.get_bind() + + # Create new table definition (without unique constraints) + op.create_table( + f"{TABLE_NAME}_new", + sa.Column("owner_id", sa.UUID()), + sa.Column("dataset_id", sa.UUID(), primary_key=True, nullable=False), + sa.Column("vector_database_name", sa.String(), nullable=False), + sa.Column("graph_database_name", sa.String(), nullable=False), + sa.Column("vector_database_provider", sa.String(), nullable=False), + sa.Column("graph_database_provider", sa.String(), nullable=False), + sa.Column("vector_database_url", sa.String()), + sa.Column("graph_database_url", sa.String()), + sa.Column("vector_database_key", sa.String()), + sa.Column("graph_database_key", sa.String()), + sa.Column( + "graph_database_connection_info", + sa.JSON(), + nullable=False, + server_default=sa.text("'{}'"), + ), + sa.Column( + "vector_database_connection_info", + sa.JSON(), + nullable=False, + server_default=sa.text("'{}'"), + ), + sa.Column("created_at", sa.DateTime()), + sa.Column("updated_at", sa.DateTime()), + sa.ForeignKeyConstraint(["dataset_id"], ["datasets.id"], ondelete="CASCADE"), + sa.ForeignKeyConstraint(["owner_id"], ["principals.id"], ondelete="CASCADE"), + ) + + # Copy data into new table + conn.execute( + sa.text(f""" + INSERT INTO {TABLE_NAME}_new + SELECT + owner_id, + dataset_id, + vector_database_name, + graph_database_name, + vector_database_provider, + graph_database_provider, + vector_database_url, + graph_database_url, + vector_database_key, + graph_database_key, + COALESCE(graph_database_connection_info, '{{}}'), + COALESCE(vector_database_connection_info, '{{}}'), + created_at, + updated_at + FROM {TABLE_NAME} + """) + ) + + # Drop old table + op.drop_table(TABLE_NAME) + + # Rename new table + op.rename_table(f"{TABLE_NAME}_new", TABLE_NAME) + + +def _recreate_table_with_unique_constraint_sqlite(op, insp): + """ + SQLite cannot drop unique constraints on individual columns. We must: + 1. Create a new table without the unique constraints. + 2. Copy data from the old table. + 3. Drop the old table. + 4. Rename the new table. + """ + conn = op.get_bind() + + # Create new table definition (without unique constraints) + op.create_table( + f"{TABLE_NAME}_new", + sa.Column("owner_id", sa.UUID()), + sa.Column("dataset_id", sa.UUID(), primary_key=True, nullable=False), + sa.Column("vector_database_name", sa.String(), nullable=False, unique=True), + sa.Column("graph_database_name", sa.String(), nullable=False, unique=True), + sa.Column("vector_database_provider", sa.String(), nullable=False), + sa.Column("graph_database_provider", sa.String(), nullable=False), + sa.Column("vector_database_url", sa.String()), + sa.Column("graph_database_url", sa.String()), + sa.Column("vector_database_key", sa.String()), + sa.Column("graph_database_key", sa.String()), + sa.Column( + "graph_database_connection_info", + sa.JSON(), + nullable=False, + server_default=sa.text("'{}'"), + ), + sa.Column( + "vector_database_connection_info", + sa.JSON(), + nullable=False, + server_default=sa.text("'{}'"), + ), + sa.Column("created_at", sa.DateTime()), + sa.Column("updated_at", sa.DateTime()), + sa.ForeignKeyConstraint(["dataset_id"], ["datasets.id"], ondelete="CASCADE"), + sa.ForeignKeyConstraint(["owner_id"], ["principals.id"], ondelete="CASCADE"), + ) + + # Copy data into new table + conn.execute( + sa.text(f""" + INSERT INTO {TABLE_NAME}_new + SELECT + owner_id, + dataset_id, + vector_database_name, + graph_database_name, + vector_database_provider, + graph_database_provider, + vector_database_url, + graph_database_url, + vector_database_key, + graph_database_key, + COALESCE(graph_database_connection_info, '{{}}'), + COALESCE(vector_database_connection_info, '{{}}'), + created_at, + updated_at + FROM {TABLE_NAME} + """) + ) + + # Drop old table + op.drop_table(TABLE_NAME) + + # Rename new table + op.rename_table(f"{TABLE_NAME}_new", TABLE_NAME) + + def upgrade() -> None: conn = op.get_bind() insp = sa.inspect(conn) + unique_constraints = insp.get_unique_constraints(TABLE_NAME) + vector_database_connection_info_column = _get_column( insp, "dataset_database", "vector_database_connection_info" ) @@ -60,7 +208,60 @@ def upgrade() -> None: ), ) + with op.batch_alter_table("dataset_database", schema=None) as batch_op: + # Drop the unique constraint to make unique=False + graph_constraint_to_drop = None + for uc in unique_constraints: + # Check if the constraint covers ONLY the target column + if uc["name"] == graph_constraint_name: + graph_constraint_to_drop = uc["name"] + break + + vector_constraint_to_drop = None + for uc in unique_constraints: + # Check if the constraint covers ONLY the target column + if uc["name"] == vector_constraint_name: + vector_constraint_to_drop = uc["name"] + break + + if ( + vector_constraint_to_drop + and graph_constraint_to_drop + and op.get_context().dialect.name == "postgresql" + ): + # PostgreSQL + batch_op.drop_constraint(graph_constraint_name, type_="unique") + batch_op.drop_constraint(vector_constraint_name, type_="unique") + + if op.get_context().dialect.name == "sqlite": + conn = op.get_bind() + # Fun fact: SQLite has hidden auto indexes for unique constraints that can't be dropped or accessed directly + # So we need to check for them and drop them by recreating the table (altering column also won't work) + result = conn.execute(sa.text("PRAGMA index_list('dataset_database')")) + rows = result.fetchall() + unique_auto_indexes = [row for row in rows if row[3] == "u"] + for row in unique_auto_indexes: + result = conn.execute(sa.text(f"PRAGMA index_info('{row[1]}')")) + index_info = result.fetchall() + if index_info[0][2] == "vector_database_name": + # In case a unique index exists on vector_database_name, drop it and the graph_database_name one + _recreate_table_without_unique_constraint_sqlite(op, insp) + def downgrade() -> None: + conn = op.get_bind() + insp = sa.inspect(conn) + + if op.get_context().dialect.name == "sqlite": + _recreate_table_with_unique_constraint_sqlite(op, insp) + elif op.get_context().dialect.name == "postgresql": + with op.batch_alter_table("dataset_database", schema=None) as batch_op: + # Re-add the unique constraint to return to unique=True + batch_op.create_unique_constraint(graph_constraint_name, ["graph_database_name"]) + + with op.batch_alter_table("dataset_database", schema=None) as batch_op: + # Re-add the unique constraint to return to unique=True + batch_op.create_unique_constraint(vector_constraint_name, ["vector_database_name"]) + op.drop_column("dataset_database", "vector_database_connection_info") op.drop_column("dataset_database", "graph_database_connection_info")