chore: Add alembic migration for multi-tenant system

2025-11-04 19:03:56 +01:00 · 2025-11-04 19:03:56 +01:00 · fb102f29a8
commit fb102f29a8
parent a6487cfdc1
1 changed files with 113 additions and 0 deletions
--- a/alembic/versions/c946955da633_multi_tenant_support.py
+++ b/alembic/versions/c946955da633_multi_tenant_support.py
@ -0,0 +1,113 @@
 """Multi Tenant Support
 Revision ID: c946955da633
 Revises: 211ab850ef3d
 Create Date: 2025-11-04 18:11:09.325158
 """
 from typing import Sequence, Union
 from datetime import datetime, timezone
 from uuid import uuid4
 from alembic import op
 import sqlalchemy as sa
 # revision identifiers, used by Alembic.
 revision: str = "c946955da633"
 down_revision: Union[str, None] = "211ab850ef3d"
 branch_labels: Union[str, Sequence[str], None] = None
 depends_on: Union[str, Sequence[str], None] = None
 def _define_user_table() -> sa.Table:
    table = sa.Table(
        "users",
        sa.MetaData(),
        sa.Column(
            "id",
            sa.UUID,
            sa.ForeignKey("principals.id", ondelete="CASCADE"),
            primary_key=True,
            nullable=False,
        ),
        sa.Column("tenant_id", sa.UUID, sa.ForeignKey("tenants.id"), index=True, nullable=True),
    )
    return table
 def _define_dataset_table() -> sa.Table:
    # Note: We can't use any Cognee model info to gather data (as it can change) in database so we must use our own table
    #       definition or load what is in the database
    table = sa.Table(
        "datasets",
        sa.MetaData(),
        sa.Column("id", sa.UUID, primary_key=True, default=uuid4),
        sa.Column("name", sa.Text),
        sa.Column(
            "created_at",
            sa.DateTime(timezone=True),
            default=lambda: datetime.now(timezone.utc),
        ),
        sa.Column(
            "updated_at",
            sa.DateTime(timezone=True),
            onupdate=lambda: datetime.now(timezone.utc),
        ),
        sa.Column("owner_id", sa.UUID(), sa.ForeignKey("principals.id"), index=True),
        sa.Column("tenant_id", sa.UUID(), sa.ForeignKey("tenants.id"), index=True, nullable=True),
    )
    return table
 def _get_column(inspector, table, name, schema=None):
    for col in inspector.get_columns(table, schema=schema):
        if col["name"] == name:
            return col
    return None
 def upgrade() -> None:
    conn = op.get_bind()
    insp = sa.inspect(conn)
    dataset = _define_dataset_table()
    user = _define_user_table()
    tenant_id_column = _get_column(insp, "datasets", "tenant_id")
    if not tenant_id_column:
        op.add_column("datasets", sa.Column("tenant_id", sa.UUID(), nullable=True))
        # Build correlated subquery: select users.tenant_id for each dataset.owner_id
        tenant_id_from_dataset_owner = (
            sa.select(user.c.tenant_id).where(user.c.id == dataset.c.owner_id).scalar_subquery()
        )
        # Update statement; restrict to rows where tenant_id is currently NULL
        # update_stmt = (
        #     sa.update(dataset)
        #     .values(tenant_id=subq)
        # )
        user = _define_user_table()
        if op.get_context().dialect.name == "sqlite":
            # If column doesn't exist create new original_extension column and update from values of extension column
            with op.batch_alter_table("datasets") as batch_op:
                batch_op.execute(
                    dataset.update().values(
                        tenant_id=tenant_id_from_dataset_owner,
                    )
                )
        else:
            conn = op.get_bind()
            conn.execute(dataset.update().values(tenant_id=tenant_id_from_dataset_owner))
        op.create_index(op.f("ix_datasets_tenant_id"), "datasets", ["tenant_id"], unique=False)
 def downgrade() -> None:
    # ### commands auto generated by Alembic - please adjust! ###
    op.drop_column("datasets", "tenant_id")
    # ### end Alembic commands ###