From a8ff50ceae262868cb303707f5396abe07cfed38 Mon Sep 17 00:00:00 2001 From: Igor Ilic Date: Fri, 17 Oct 2025 18:09:01 +0200 Subject: [PATCH 01/41] feat: Initial multi-tenancy commit --- cognee/modules/data/methods/create_dataset.py | 5 +-- cognee/modules/data/models/Dataset.py | 1 + cognee/modules/users/methods/create_user.py | 35 +++++-------------- .../modules/users/methods/get_default_user.py | 7 +--- cognee/modules/users/methods/get_user.py | 2 +- .../users/methods/get_user_by_email.py | 2 +- cognee/modules/users/models/Tenant.py | 13 ++++--- cognee/modules/users/models/User.py | 11 +++--- cognee/modules/users/models/UserTenant.py | 12 +++++++ cognee/modules/users/models/__init__.py | 1 + .../get_all_user_permission_datasets.py | 20 +++++------ .../tenants/methods/add_user_to_tenant.py | 25 +++++++++---- .../users/tenants/methods/create_tenant.py | 16 ++++++--- examples/python/permissions_example.py | 4 ++- 14 files changed, 82 insertions(+), 72 deletions(-) create mode 100644 cognee/modules/users/models/UserTenant.py diff --git a/cognee/modules/data/methods/create_dataset.py b/cognee/modules/data/methods/create_dataset.py index c080de0e8..280c9e105 100644 --- a/cognee/modules/data/methods/create_dataset.py +++ b/cognee/modules/data/methods/create_dataset.py @@ -22,8 +22,9 @@ async def create_dataset(dataset_name: str, user: User, session: AsyncSession) - if dataset is None: # Dataset id should be generated based on dataset_name and owner_id/user so multiple users can use the same dataset_name dataset_id = await get_unique_dataset_id(dataset_name=dataset_name, user=user) - dataset = Dataset(id=dataset_id, name=dataset_name, data=[]) - dataset.owner_id = owner_id + dataset = Dataset( + id=dataset_id, name=dataset_name, data=[], owner_id=owner_id, tenant_id=user.tenant_id + ) session.add(dataset) diff --git a/cognee/modules/data/models/Dataset.py b/cognee/modules/data/models/Dataset.py index 797401d5a..00ed4da96 100644 --- a/cognee/modules/data/models/Dataset.py +++ b/cognee/modules/data/models/Dataset.py @@ -18,6 +18,7 @@ class Dataset(Base): updated_at = Column(DateTime(timezone=True), onupdate=lambda: datetime.now(timezone.utc)) owner_id = Column(UUID, index=True) + tenant_id = Column(UUID, index=True, nullable=True) acls = relationship("ACL", back_populates="dataset", cascade="all, delete-orphan") diff --git a/cognee/modules/users/methods/create_user.py b/cognee/modules/users/methods/create_user.py index 1b303bd36..953c70cd6 100644 --- a/cognee/modules/users/methods/create_user.py +++ b/cognee/modules/users/methods/create_user.py @@ -18,7 +18,6 @@ from typing import Optional async def create_user( email: str, password: str, - tenant_id: Optional[str] = None, is_superuser: bool = False, is_active: bool = True, is_verified: bool = False, @@ -30,33 +29,15 @@ async def create_user( async with relational_engine.get_async_session() as session: async with get_user_db_context(session) as user_db: async with get_user_manager_context(user_db) as user_manager: - if tenant_id: - # Check if the tenant already exists - result = await session.execute(select(Tenant).where(Tenant.id == tenant_id)) - tenant = result.scalars().first() - if not tenant: - raise TenantNotFoundError - - user = await user_manager.create( - UserCreate( - email=email, - password=password, - tenant_id=tenant.id, - is_superuser=is_superuser, - is_active=is_active, - is_verified=is_verified, - ) - ) - else: - user = await user_manager.create( - UserCreate( - email=email, - password=password, - is_superuser=is_superuser, - is_active=is_active, - is_verified=is_verified, - ) + user = await user_manager.create( + UserCreate( + email=email, + password=password, + is_superuser=is_superuser, + is_active=is_active, + is_verified=is_verified, ) + ) if auto_login: await session.refresh(user) diff --git a/cognee/modules/users/methods/get_default_user.py b/cognee/modules/users/methods/get_default_user.py index 48073a884..773545f8e 100644 --- a/cognee/modules/users/methods/get_default_user.py +++ b/cognee/modules/users/methods/get_default_user.py @@ -27,12 +27,7 @@ async def get_default_user() -> SimpleNamespace: if user is None: return await create_default_user() - # We return a SimpleNamespace to have the same user type as our SaaS - # SimpleNamespace is just a dictionary which can be accessed through attributes - auth_data = SimpleNamespace( - id=user.id, email=user.email, tenant_id=user.tenant_id, roles=[] - ) - return auth_data + return user except Exception as error: if "principals" in str(error.args): raise DatabaseNotCreatedError() from error diff --git a/cognee/modules/users/methods/get_user.py b/cognee/modules/users/methods/get_user.py index 2678a5a01..a1c87aab7 100644 --- a/cognee/modules/users/methods/get_user.py +++ b/cognee/modules/users/methods/get_user.py @@ -14,7 +14,7 @@ async def get_user(user_id: UUID): user = ( await session.execute( select(User) - .options(selectinload(User.roles), selectinload(User.tenant)) + .options(selectinload(User.roles), selectinload(User.tenants)) .where(User.id == user_id) ) ).scalar() diff --git a/cognee/modules/users/methods/get_user_by_email.py b/cognee/modules/users/methods/get_user_by_email.py index c4bd5b48e..6df989251 100644 --- a/cognee/modules/users/methods/get_user_by_email.py +++ b/cognee/modules/users/methods/get_user_by_email.py @@ -13,7 +13,7 @@ async def get_user_by_email(user_email: str): user = ( await session.execute( select(User) - .options(joinedload(User.roles), joinedload(User.tenant)) + .options(joinedload(User.roles), joinedload(User.tenants)) .where(User.email == user_email) ) ).scalar() diff --git a/cognee/modules/users/models/Tenant.py b/cognee/modules/users/models/Tenant.py index 95023a6ee..b8fa158c5 100644 --- a/cognee/modules/users/models/Tenant.py +++ b/cognee/modules/users/models/Tenant.py @@ -1,7 +1,7 @@ -from sqlalchemy.orm import relationship +from sqlalchemy.orm import relationship, Mapped from sqlalchemy import Column, String, ForeignKey, UUID from .Principal import Principal -from .User import User +from .UserTenant import UserTenant from .Role import Role @@ -13,14 +13,13 @@ class Tenant(Principal): owner_id = Column(UUID, index=True) - # One-to-Many relationship with User; specify the join via User.tenant_id - users = relationship( + users: Mapped[list["User"]] = relationship( # noqa: F821 "User", - back_populates="tenant", - foreign_keys=lambda: [User.tenant_id], + secondary=UserTenant.__tablename__, + back_populates="tenants", ) - # One-to-Many relationship with Role (if needed; similar fix) + # One-to-Many relationship with Role roles = relationship( "Role", back_populates="tenant", diff --git a/cognee/modules/users/models/User.py b/cognee/modules/users/models/User.py index 8972a5932..a98abd3bc 100644 --- a/cognee/modules/users/models/User.py +++ b/cognee/modules/users/models/User.py @@ -6,8 +6,10 @@ from sqlalchemy import ForeignKey, Column, UUID from sqlalchemy.orm import relationship, Mapped from .Principal import Principal +from .UserTenant import UserTenant from .UserRole import UserRole from .Role import Role +from .Tenant import Tenant class User(SQLAlchemyBaseUserTableUUID, Principal): @@ -15,7 +17,7 @@ class User(SQLAlchemyBaseUserTableUUID, Principal): id = Column(UUID, ForeignKey("principals.id", ondelete="CASCADE"), primary_key=True) - # Foreign key to Tenant (Many-to-One relationship) + # Foreign key to current Tenant (Many-to-One relationship) tenant_id = Column(UUID, ForeignKey("tenants.id")) # Many-to-Many Relationship with Roles @@ -25,11 +27,11 @@ class User(SQLAlchemyBaseUserTableUUID, Principal): back_populates="users", ) - # Relationship to Tenant - tenant = relationship( + # Many-to-Many Relationship with Tenants user is a part of + tenants: Mapped[list["Tenant"]] = relationship( "Tenant", + secondary=UserTenant.__tablename__, back_populates="users", - foreign_keys=[tenant_id], ) # ACL Relationship (One-to-Many) @@ -46,7 +48,6 @@ class UserRead(schemas.BaseUser[uuid_UUID]): class UserCreate(schemas.BaseUserCreate): - tenant_id: Optional[uuid_UUID] = None is_verified: bool = True diff --git a/cognee/modules/users/models/UserTenant.py b/cognee/modules/users/models/UserTenant.py new file mode 100644 index 000000000..bfb852aa5 --- /dev/null +++ b/cognee/modules/users/models/UserTenant.py @@ -0,0 +1,12 @@ +from datetime import datetime, timezone +from sqlalchemy import Column, ForeignKey, DateTime, UUID +from cognee.infrastructure.databases.relational import Base + + +class UserTenant(Base): + __tablename__ = "user_tenants" + + created_at = Column(DateTime(timezone=True), default=lambda: datetime.now(timezone.utc)) + + user_id = Column(UUID, ForeignKey("users.id"), primary_key=True) + tenant_id = Column(UUID, ForeignKey("tenants.id"), primary_key=True) diff --git a/cognee/modules/users/models/__init__.py b/cognee/modules/users/models/__init__.py index ba2f40e49..5114cc45a 100644 --- a/cognee/modules/users/models/__init__.py +++ b/cognee/modules/users/models/__init__.py @@ -1,6 +1,7 @@ from .User import User from .Role import Role from .UserRole import UserRole +from .UserTenant import UserTenant from .DatasetDatabase import DatasetDatabase from .RoleDefaultPermissions import RoleDefaultPermissions from .UserDefaultPermissions import UserDefaultPermissions diff --git a/cognee/modules/users/permissions/methods/get_all_user_permission_datasets.py b/cognee/modules/users/permissions/methods/get_all_user_permission_datasets.py index 1185dd7ad..a4f538259 100644 --- a/cognee/modules/users/permissions/methods/get_all_user_permission_datasets.py +++ b/cognee/modules/users/permissions/methods/get_all_user_permission_datasets.py @@ -1,11 +1,8 @@ -from types import SimpleNamespace - from cognee.shared.logging_utils import get_logger from ...models.User import User from cognee.modules.data.models.Dataset import Dataset from cognee.modules.users.permissions.methods import get_principal_datasets -from cognee.modules.users.permissions.methods import get_role, get_tenant logger = get_logger() @@ -25,17 +22,15 @@ async def get_all_user_permission_datasets(user: User, permission_type: str) -> # Get all datasets User has explicit access to datasets.extend(await get_principal_datasets(user, permission_type)) - if user.tenant_id: - # Get all datasets all tenants have access to - tenant = await get_tenant(user.tenant_id) + # Get all tenants user is a part of + tenants = await user.awaitable_attrs.tenants + + for tenant in tenants: + # Get all datasets all tenant members have access to datasets.extend(await get_principal_datasets(tenant, permission_type)) - # Get all datasets Users roles have access to - if isinstance(user, SimpleNamespace): - # If simple namespace use roles defined in user - roles = user.roles - else: - roles = await user.awaitable_attrs.roles + # Get all datasets accessible by roles user is a part of + roles = await user.awaitable_attrs.roles for role in roles: datasets.extend(await get_principal_datasets(role, permission_type)) @@ -45,4 +40,5 @@ async def get_all_user_permission_datasets(user: User, permission_type: str) -> # If the dataset id key already exists, leave the dictionary unchanged. unique.setdefault(dataset.id, dataset) + # TODO: Add filtering out of datasets that aren't currently selected tenant of user return list(unique.values()) diff --git a/cognee/modules/users/tenants/methods/add_user_to_tenant.py b/cognee/modules/users/tenants/methods/add_user_to_tenant.py index 1374067a7..b9f5898d0 100644 --- a/cognee/modules/users/tenants/methods/add_user_to_tenant.py +++ b/cognee/modules/users/tenants/methods/add_user_to_tenant.py @@ -1,8 +1,11 @@ +from typing import Optional from uuid import UUID from sqlalchemy.exc import IntegrityError +from sqlalchemy import insert from cognee.infrastructure.databases.exceptions import EntityAlreadyExistsError from cognee.infrastructure.databases.relational import get_relational_engine +from cognee.modules.users.models.UserTenant import UserTenant from cognee.modules.users.methods import get_user from cognee.modules.users.permissions.methods import get_tenant from cognee.modules.users.exceptions import ( @@ -12,14 +15,19 @@ from cognee.modules.users.exceptions import ( ) -async def add_user_to_tenant(user_id: UUID, tenant_id: UUID, owner_id: UUID): +async def add_user_to_tenant( + user_id: UUID, tenant_id: UUID, owner_id: UUID, set_active_tenant: Optional[bool] = True +): """ Add a user with the given id to the tenant with the given id. This can only be successful if the request owner with the given id is the tenant owner. + + If set_active_tenant is true it will automatically set the users active tenant to provided tenant. Args: user_id: Id of the user. tenant_id: Id of the tenant. owner_id: Id of the request owner. + set_active_tenant: If set_active_tenant is true it will automatically set the users active tenant to provided tenant. Returns: None @@ -41,12 +49,17 @@ async def add_user_to_tenant(user_id: UUID, tenant_id: UUID, owner_id: UUID): ) try: - if user.tenant_id is None: + try: + # Add association directly to the association table + create_user_tenant_statement = insert(UserTenant).values( + user_id=user_id, tenant_id=tenant_id + ) + await session.execute(create_user_tenant_statement) + except IntegrityError: + raise EntityAlreadyExistsError(message="User is already part of group.") + + if set_active_tenant: user.tenant_id = tenant_id - elif user.tenant_id == tenant_id: - return - else: - raise IntegrityError await session.merge(user) await session.commit() diff --git a/cognee/modules/users/tenants/methods/create_tenant.py b/cognee/modules/users/tenants/methods/create_tenant.py index bfd23e08f..665e3cc18 100644 --- a/cognee/modules/users/tenants/methods/create_tenant.py +++ b/cognee/modules/users/tenants/methods/create_tenant.py @@ -1,6 +1,8 @@ from uuid import UUID +from sqlalchemy import insert from sqlalchemy.exc import IntegrityError +from cognee.modules.users.models.UserTenant import UserTenant from cognee.infrastructure.databases.exceptions import EntityAlreadyExistsError from cognee.infrastructure.databases.relational import get_relational_engine from cognee.modules.users.models import Tenant @@ -22,16 +24,22 @@ async def create_tenant(tenant_name: str, user_id: UUID) -> UUID: async with db_engine.get_async_session() as session: try: user = await get_user(user_id) - if user.tenant_id: - raise EntityAlreadyExistsError( - message="User already has a tenant. New tenant cannot be created." - ) tenant = Tenant(name=tenant_name, owner_id=user_id) session.add(tenant) await session.flush() user.tenant_id = tenant.id + + try: + # Add association directly to the association table + create_user_tenant_statement = insert(UserTenant).values( + user_id=user_id, tenant_id=tenant.id + ) + await session.execute(create_user_tenant_statement) + except IntegrityError: + raise EntityAlreadyExistsError(message="User is already part of group.") + await session.merge(user) await session.commit() return tenant.id diff --git a/examples/python/permissions_example.py b/examples/python/permissions_example.py index 4f51b660f..7c140845c 100644 --- a/examples/python/permissions_example.py +++ b/examples/python/permissions_example.py @@ -150,7 +150,9 @@ async def main(): # To add a user to a role he must be part of the same tenant/organization print("\nOperation started as user_2 to add user_3 to CogneeLab tenant/organization") - await add_user_to_tenant(user_id=user_3.id, tenant_id=tenant_id, owner_id=user_2.id) + await add_user_to_tenant( + user_id=user_3.id, tenant_id=tenant_id, owner_id=user_2.id, set_active_tenant=True + ) print( "\nOperation started by user_2, as tenant owner, to add user_3 to Researcher role inside the tenant/organization" From 0c4e3e1f5295746db287eff5101d60c4cf89c1df Mon Sep 17 00:00:00 2001 From: Igor Ilic Date: Sun, 19 Oct 2025 20:13:22 +0200 Subject: [PATCH 02/41] fix: Load tenants to default user --- cognee/modules/users/methods/get_default_user.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/cognee/modules/users/methods/get_default_user.py b/cognee/modules/users/methods/get_default_user.py index 773545f8e..a48bd8928 100644 --- a/cognee/modules/users/methods/get_default_user.py +++ b/cognee/modules/users/methods/get_default_user.py @@ -18,7 +18,9 @@ async def get_default_user() -> SimpleNamespace: try: async with db_engine.get_async_session() as session: query = ( - select(User).options(selectinload(User.roles)).where(User.email == default_email) + select(User) + .options(selectinload(User.roles), selectinload(User.tenants)) + .where(User.email == default_email) ) result = await session.execute(query) From 12785e31ea327135a4e1968c90f1cb1e5891fad3 Mon Sep 17 00:00:00 2001 From: Igor Ilic Date: Sun, 19 Oct 2025 21:11:14 +0200 Subject: [PATCH 03/41] fix: Resolve issue with adding user to tenants --- .../tenants/methods/add_user_to_tenant.py | 28 ++++++++----------- 1 file changed, 12 insertions(+), 16 deletions(-) diff --git a/cognee/modules/users/tenants/methods/add_user_to_tenant.py b/cognee/modules/users/tenants/methods/add_user_to_tenant.py index b9f5898d0..dabab6b6b 100644 --- a/cognee/modules/users/tenants/methods/add_user_to_tenant.py +++ b/cognee/modules/users/tenants/methods/add_user_to_tenant.py @@ -48,22 +48,18 @@ async def add_user_to_tenant( message="Only tenant owner can add other users to organization." ) - try: - try: - # Add association directly to the association table - create_user_tenant_statement = insert(UserTenant).values( - user_id=user_id, tenant_id=tenant_id - ) - await session.execute(create_user_tenant_statement) - except IntegrityError: - raise EntityAlreadyExistsError(message="User is already part of group.") - - if set_active_tenant: - user.tenant_id = tenant_id - + if set_active_tenant: + user.tenant_id = tenant_id await session.merge(user) await session.commit() - except IntegrityError: - raise EntityAlreadyExistsError( - message="User is already part of a tenant. Only one tenant can be assigned to user." + + try: + # Add association directly to the association table + create_user_tenant_statement = insert(UserTenant).values( + user_id=user_id, tenant_id=tenant_id ) + await session.execute(create_user_tenant_statement) + await session.commit() + + except IntegrityError: + raise EntityAlreadyExistsError(message="User is already part of group.") From 13f0423a55720debc1d04fbe9f005855c008ae53 Mon Sep 17 00:00:00 2001 From: Igor Ilic Date: Sun, 19 Oct 2025 21:35:50 +0200 Subject: [PATCH 04/41] refactor: Add better TODO message --- .../permissions/methods/get_all_user_permission_datasets.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/cognee/modules/users/permissions/methods/get_all_user_permission_datasets.py b/cognee/modules/users/permissions/methods/get_all_user_permission_datasets.py index a4f538259..ff0f52d27 100644 --- a/cognee/modules/users/permissions/methods/get_all_user_permission_datasets.py +++ b/cognee/modules/users/permissions/methods/get_all_user_permission_datasets.py @@ -40,5 +40,6 @@ async def get_all_user_permission_datasets(user: User, permission_type: str) -> # If the dataset id key already exists, leave the dictionary unchanged. unique.setdefault(dataset.id, dataset) - # TODO: Add filtering out of datasets that aren't currently selected tenant of user + # TODO: Add filtering out of datasets that aren't currently selected tenant of user (currently selected tenant is the tenant_id value in the User model) + # TODO: Add endpoint/method to select current Tenant for a user (This UUID value should be stored in tenant_id of User model) return list(unique.values()) From d6bb95e3798984bee76a0e5cd92308097f153649 Mon Sep 17 00:00:00 2001 From: Igor Ilic Date: Sun, 19 Oct 2025 21:57:39 +0200 Subject: [PATCH 05/41] fix: load tenants and roles when creating user --- cognee/modules/users/methods/create_user.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/cognee/modules/users/methods/create_user.py b/cognee/modules/users/methods/create_user.py index 953c70cd6..ef325fb6f 100644 --- a/cognee/modules/users/methods/create_user.py +++ b/cognee/modules/users/methods/create_user.py @@ -42,6 +42,10 @@ async def create_user( if auto_login: await session.refresh(user) + # Update tenants and roles information for User object + _ = await user.awaitable_attrs.tenants + _ = await user.awaitable_attrs.roles + return user except UserAlreadyExists as error: print(f"User {email} already exists") From 4f874deace3b55072bf97c35b45e158d03c5d844 Mon Sep 17 00:00:00 2001 From: Igor Ilic Date: Sun, 19 Oct 2025 23:50:17 +0200 Subject: [PATCH 06/41] feat: Add tenant select method/endpoint for users --- .../routers/get_permissions_router.py | 32 ++++++++++++ .../get_all_user_permission_datasets.py | 1 - .../modules/users/tenants/methods/__init__.py | 1 + .../users/tenants/methods/create_tenant.py | 7 +-- .../users/tenants/methods/select_tenant.py | 50 +++++++++++++++++++ 5 files changed, 87 insertions(+), 4 deletions(-) create mode 100644 cognee/modules/users/tenants/methods/select_tenant.py diff --git a/cognee/api/v1/permissions/routers/get_permissions_router.py b/cognee/api/v1/permissions/routers/get_permissions_router.py index 637293268..7959415da 100644 --- a/cognee/api/v1/permissions/routers/get_permissions_router.py +++ b/cognee/api/v1/permissions/routers/get_permissions_router.py @@ -220,4 +220,36 @@ def get_permissions_router() -> APIRouter: status_code=200, content={"message": "Tenant created.", "tenant_id": str(tenant_id)} ) + @permissions_router.post("/tenants/{tenant_id}") + async def select_tenant(tenant_id: UUID, user: User = Depends(get_authenticated_user)): + """ + Select current tenant. + + This endpoint selects a tenant with the specified UUID. Tenants are used + to organize users and resources in multi-tenant environments, providing + isolation and access control between different groups or organizations. + + ## Request Parameters + - **tenant_id** (UUID): UUID of the tenant to create + + ## Response + Returns a success message indicating the tenant was created. + """ + send_telemetry( + "Permissions API Endpoint Invoked", + user.id, + additional_properties={ + "endpoint": f"POST /v1/permissions/tenants/{str(tenant_id)}", + "tenant_id": tenant_id, + }, + ) + + from cognee.modules.users.tenants.methods import select_tenant as select_tenant_method + + await select_tenant_method(user_id=user.id, tenant_id=tenant_id) + + return JSONResponse( + status_code=200, content={"message": "Tenant selected.", "tenant_id": str(tenant_id)} + ) + return permissions_router diff --git a/cognee/modules/users/permissions/methods/get_all_user_permission_datasets.py b/cognee/modules/users/permissions/methods/get_all_user_permission_datasets.py index ff0f52d27..e5dbb0e4b 100644 --- a/cognee/modules/users/permissions/methods/get_all_user_permission_datasets.py +++ b/cognee/modules/users/permissions/methods/get_all_user_permission_datasets.py @@ -41,5 +41,4 @@ async def get_all_user_permission_datasets(user: User, permission_type: str) -> unique.setdefault(dataset.id, dataset) # TODO: Add filtering out of datasets that aren't currently selected tenant of user (currently selected tenant is the tenant_id value in the User model) - # TODO: Add endpoint/method to select current Tenant for a user (This UUID value should be stored in tenant_id of User model) return list(unique.values()) diff --git a/cognee/modules/users/tenants/methods/__init__.py b/cognee/modules/users/tenants/methods/__init__.py index 9a052e9c6..39e2b31bb 100644 --- a/cognee/modules/users/tenants/methods/__init__.py +++ b/cognee/modules/users/tenants/methods/__init__.py @@ -1,2 +1,3 @@ from .create_tenant import create_tenant from .add_user_to_tenant import add_user_to_tenant +from .select_tenant import select_tenant diff --git a/cognee/modules/users/tenants/methods/create_tenant.py b/cognee/modules/users/tenants/methods/create_tenant.py index 665e3cc18..60e10db5c 100644 --- a/cognee/modules/users/tenants/methods/create_tenant.py +++ b/cognee/modules/users/tenants/methods/create_tenant.py @@ -30,6 +30,8 @@ async def create_tenant(tenant_name: str, user_id: UUID) -> UUID: await session.flush() user.tenant_id = tenant.id + await session.merge(user) + await session.commit() try: # Add association directly to the association table @@ -37,11 +39,10 @@ async def create_tenant(tenant_name: str, user_id: UUID) -> UUID: user_id=user_id, tenant_id=tenant.id ) await session.execute(create_user_tenant_statement) + await session.commit() except IntegrityError: - raise EntityAlreadyExistsError(message="User is already part of group.") + raise EntityAlreadyExistsError(message="User is already part of tenant.") - await session.merge(user) - await session.commit() return tenant.id except IntegrityError as e: raise EntityAlreadyExistsError(message="Tenant already exists.") from e diff --git a/cognee/modules/users/tenants/methods/select_tenant.py b/cognee/modules/users/tenants/methods/select_tenant.py new file mode 100644 index 000000000..709e46bf2 --- /dev/null +++ b/cognee/modules/users/tenants/methods/select_tenant.py @@ -0,0 +1,50 @@ +from uuid import UUID + +import sqlalchemy.exc +from sqlalchemy import select + +from cognee.infrastructure.databases.relational import get_relational_engine +from cognee.modules.users.models.UserTenant import UserTenant +from cognee.modules.users.methods import get_user +from cognee.modules.users.permissions.methods import get_tenant +from cognee.modules.users.exceptions import UserNotFoundError, TenantNotFoundError + + +async def select_tenant(user_id: UUID, tenant_id: UUID): + """ + Set the users active tenant to provided tenant. + Args: + user_id: Id of the user. + tenant_id: Id of the tenant. + + Returns: + None + + """ + db_engine = get_relational_engine() + async with db_engine.get_async_session() as session: + user = await get_user(user_id) + tenant = await get_tenant(tenant_id) + + if not user: + raise UserNotFoundError + elif not tenant: + raise TenantNotFoundError + + # Check if User is part of Tenant + result = await session.execute( + select(UserTenant) + .where(UserTenant.user_id == user_id) + .where(UserTenant.tenant_id == tenant_id) + ) + + try: + result = result.scalar_one() + except sqlalchemy.exc.NoResultFound as e: + raise TenantNotFoundError("User Tenant relationship not found.") from e + + if result: + # If user is part of tenant update current tenant of user + user.tenant_id = tenant_id + await session.merge(user) + await session.commit() From 6934692e1b7646a493f47ec6a189e041db6cb14a Mon Sep 17 00:00:00 2001 From: Igor Ilic Date: Mon, 20 Oct 2025 15:07:13 +0200 Subject: [PATCH 07/41] refactor: Enable selection of default single user tenant --- .../routers/get_permissions_router.py | 24 ++++++++++++------- .../users/tenants/methods/select_tenant.py | 13 +++++++++- 2 files changed, 28 insertions(+), 9 deletions(-) diff --git a/cognee/api/v1/permissions/routers/get_permissions_router.py b/cognee/api/v1/permissions/routers/get_permissions_router.py index 7959415da..eeea9b653 100644 --- a/cognee/api/v1/permissions/routers/get_permissions_router.py +++ b/cognee/api/v1/permissions/routers/get_permissions_router.py @@ -1,14 +1,19 @@ from uuid import UUID -from typing import List +from typing import List, Union from fastapi import APIRouter, Depends from fastapi.responses import JSONResponse from cognee.modules.users.models import User +from cognee.api.DTO import InDTO from cognee.modules.users.methods import get_authenticated_user from cognee.shared.utils import send_telemetry +class SelectTenantDTO(InDTO): + tenant_id: UUID | None = None + + def get_permissions_router() -> APIRouter: permissions_router = APIRouter() @@ -220,8 +225,8 @@ def get_permissions_router() -> APIRouter: status_code=200, content={"message": "Tenant created.", "tenant_id": str(tenant_id)} ) - @permissions_router.post("/tenants/{tenant_id}") - async def select_tenant(tenant_id: UUID, user: User = Depends(get_authenticated_user)): + @permissions_router.post("/tenants/select") + async def select_tenant(payload: SelectTenantDTO, user: User = Depends(get_authenticated_user)): """ Select current tenant. @@ -229,8 +234,10 @@ def get_permissions_router() -> APIRouter: to organize users and resources in multi-tenant environments, providing isolation and access control between different groups or organizations. + Sending a null/None value as tenant_id selects his default single user tenant + ## Request Parameters - - **tenant_id** (UUID): UUID of the tenant to create + - **tenant_id** (Union[UUID, None]): UUID of the tenant to select, If null/None is provided use the default single user tenant ## Response Returns a success message indicating the tenant was created. @@ -239,17 +246,18 @@ def get_permissions_router() -> APIRouter: "Permissions API Endpoint Invoked", user.id, additional_properties={ - "endpoint": f"POST /v1/permissions/tenants/{str(tenant_id)}", - "tenant_id": tenant_id, + "endpoint": f"POST /v1/permissions/tenants/{str(payload.tenant_id)}", + "tenant_id": str(payload.tenant_id), }, ) from cognee.modules.users.tenants.methods import select_tenant as select_tenant_method - await select_tenant_method(user_id=user.id, tenant_id=tenant_id) + await select_tenant_method(user_id=user.id, tenant_id=payload.tenant_id) return JSONResponse( - status_code=200, content={"message": "Tenant selected.", "tenant_id": str(tenant_id)} + status_code=200, + content={"message": "Tenant selected.", "tenant_id": str(payload.tenant_id)}, ) return permissions_router diff --git a/cognee/modules/users/tenants/methods/select_tenant.py b/cognee/modules/users/tenants/methods/select_tenant.py index 709e46bf2..732b24858 100644 --- a/cognee/modules/users/tenants/methods/select_tenant.py +++ b/cognee/modules/users/tenants/methods/select_tenant.py @@ -1,4 +1,5 @@ from uuid import UUID +from typing import Union import sqlalchemy.exc from sqlalchemy import select @@ -10,9 +11,11 @@ from cognee.modules.users.permissions.methods import get_tenant from cognee.modules.users.exceptions import UserNotFoundError, TenantNotFoundError -async def select_tenant(user_id: UUID, tenant_id: UUID): +async def select_tenant(user_id: UUID, tenant_id: Union[UUID, None]): """ Set the users active tenant to provided tenant. + + If None tenant_id is provided set current Tenant to the default single user-tenant Args: user_id: Id of the user. tenant_id: Id of the tenant. @@ -24,6 +27,14 @@ async def select_tenant(user_id: UUID, tenant_id: UUID): db_engine = get_relational_engine() async with db_engine.get_async_session() as session: user = await get_user(user_id) + + if tenant_id is None: + # If no tenant_id is provided set current Tenant to the single user-tenant + user.tenant_id = None + await session.merge(user) + await session.commit() + return + tenant = await get_tenant(tenant_id) if not user: From e3b707a0c242fb7268d56d2b485990f120fe0462 Mon Sep 17 00:00:00 2001 From: Igor Ilic Date: Tue, 4 Nov 2025 12:20:17 +0100 Subject: [PATCH 08/41] refactor: Change variable names, add setting of current tenant to be optional for tenant creation --- .../users/tenants/methods/add_user_to_tenant.py | 8 ++++---- .../modules/users/tenants/methods/create_tenant.py | 13 +++++++++---- .../modules/users/tenants/methods/select_tenant.py | 2 +- 3 files changed, 14 insertions(+), 9 deletions(-) diff --git a/cognee/modules/users/tenants/methods/add_user_to_tenant.py b/cognee/modules/users/tenants/methods/add_user_to_tenant.py index dabab6b6b..edadfe66b 100644 --- a/cognee/modules/users/tenants/methods/add_user_to_tenant.py +++ b/cognee/modules/users/tenants/methods/add_user_to_tenant.py @@ -16,18 +16,18 @@ from cognee.modules.users.exceptions import ( async def add_user_to_tenant( - user_id: UUID, tenant_id: UUID, owner_id: UUID, set_active_tenant: Optional[bool] = True + user_id: UUID, tenant_id: UUID, owner_id: UUID, set_as_active_tenant: Optional[bool] = True ): """ Add a user with the given id to the tenant with the given id. This can only be successful if the request owner with the given id is the tenant owner. - If set_active_tenant is true it will automatically set the users active tenant to provided tenant. + If set_as_active_tenant is true it will automatically set the users active tenant to provided tenant. Args: user_id: Id of the user. tenant_id: Id of the tenant. owner_id: Id of the request owner. - set_active_tenant: If set_active_tenant is true it will automatically set the users active tenant to provided tenant. + set_as_active_tenant: If set_as_active_tenant is true it will automatically set the users active tenant to provided tenant. Returns: None @@ -48,7 +48,7 @@ async def add_user_to_tenant( message="Only tenant owner can add other users to organization." ) - if set_active_tenant: + if set_as_active_tenant: user.tenant_id = tenant_id await session.merge(user) await session.commit() diff --git a/cognee/modules/users/tenants/methods/create_tenant.py b/cognee/modules/users/tenants/methods/create_tenant.py index 60e10db5c..32baa05fd 100644 --- a/cognee/modules/users/tenants/methods/create_tenant.py +++ b/cognee/modules/users/tenants/methods/create_tenant.py @@ -1,6 +1,7 @@ from uuid import UUID from sqlalchemy import insert from sqlalchemy.exc import IntegrityError +from typing import Optional from cognee.modules.users.models.UserTenant import UserTenant from cognee.infrastructure.databases.exceptions import EntityAlreadyExistsError @@ -9,13 +10,16 @@ from cognee.modules.users.models import Tenant from cognee.modules.users.methods import get_user -async def create_tenant(tenant_name: str, user_id: UUID) -> UUID: +async def create_tenant( + tenant_name: str, user_id: UUID, set_as_active_tenant: Optional[bool] = True +) -> UUID: """ Create a new tenant with the given name, for the user with the given id. This user is the owner of the tenant. Args: tenant_name: Name of the new tenant. user_id: Id of the user. + set_as_active_tenant: If true, set the newly created tenant as the active tenant for the user. Returns: None @@ -29,9 +33,10 @@ async def create_tenant(tenant_name: str, user_id: UUID) -> UUID: session.add(tenant) await session.flush() - user.tenant_id = tenant.id - await session.merge(user) - await session.commit() + if set_as_active_tenant: + user.tenant_id = tenant.id + await session.merge(user) + await session.commit() try: # Add association directly to the association table diff --git a/cognee/modules/users/tenants/methods/select_tenant.py b/cognee/modules/users/tenants/methods/select_tenant.py index 732b24858..6e72fea2f 100644 --- a/cognee/modules/users/tenants/methods/select_tenant.py +++ b/cognee/modules/users/tenants/methods/select_tenant.py @@ -52,7 +52,7 @@ async def select_tenant(user_id: UUID, tenant_id: Union[UUID, None]): try: result = result.scalar_one() except sqlalchemy.exc.NoResultFound as e: - raise TenantNotFoundError("User Tenant relationship not found.") from e + raise TenantNotFoundError("User is not part of the tenant.") from e if result: # If user is part of tenant update current tenant of user From b0f85c9e990f8dd20e6fce8dcd6f29c4050050e8 Mon Sep 17 00:00:00 2001 From: Igor Ilic Date: Tue, 4 Nov 2025 13:01:10 +0100 Subject: [PATCH 09/41] feat: add legacy and modern data_id calculating --- cognee/modules/data/methods/__init__.py | 1 + .../data/methods/get_unique_data_id.py | 71 +++++++++++++++++++ cognee/modules/ingestion/identify.py | 10 +-- 3 files changed, 78 insertions(+), 4 deletions(-) create mode 100644 cognee/modules/data/methods/get_unique_data_id.py diff --git a/cognee/modules/data/methods/__init__.py b/cognee/modules/data/methods/__init__.py index 83913085c..7936a9afd 100644 --- a/cognee/modules/data/methods/__init__.py +++ b/cognee/modules/data/methods/__init__.py @@ -10,6 +10,7 @@ from .get_authorized_dataset import get_authorized_dataset from .get_authorized_dataset_by_name import get_authorized_dataset_by_name from .get_data import get_data from .get_unique_dataset_id import get_unique_dataset_id +from .get_unique_data_id import get_unique_data_id from .get_authorized_existing_datasets import get_authorized_existing_datasets from .get_dataset_ids import get_dataset_ids diff --git a/cognee/modules/data/methods/get_unique_data_id.py b/cognee/modules/data/methods/get_unique_data_id.py new file mode 100644 index 000000000..3fc184ce4 --- /dev/null +++ b/cognee/modules/data/methods/get_unique_data_id.py @@ -0,0 +1,71 @@ +from uuid import uuid5, NAMESPACE_OID, UUID +from typing import Optional +from sqlalchemy import select + +from cognee.modules.data.models.Data import Data +from cognee.infrastructure.databases.relational import get_relational_engine +from cognee.modules.users.models import User + + +async def get_unique_data_id(data_identifier: str, user: User, tenant_id: Optional[UUID]) -> UUID: + """ + Function returns a unique UUID for data based on data identifier, user id and tenant id. + If data with legacy ID exists, return that ID to maintain compatibility. + + Args: + data_identifier: A way to uniquely identify data (e.g. file hash, data name, etc.) + user: User object adding the data + tenant_id: UUID of the tenant for which data is being added + + Returns: + UUID: Unique identifier for the data + """ + + def _get_deprecated_unique_data_id(data_identifier: str, user: User) -> UUID: + """ + Deprecated function, returns a unique UUID for data based on data identifier and user id. + Needed to support legacy data without tenant information. + Args: + data_identifier: A way to uniquely identify data (e.g. file hash, data name, etc.) + user: User object adding the data + + Returns: + UUID: Unique identifier for the data + """ + # return UUID hash of file contents + owner id + tenant_id + return uuid5(NAMESPACE_OID, f"{data_identifier}{str(user.id)}") + + def _get_modern_unique_data_id(data_identifier: str, user: User, tenant_id: UUID) -> UUID: + """ + Function returns a unique UUID for data based on data identifier, user id and tenant id. + Args: + data_identifier: A way to uniquely identify data (e.g. file hash, data name, etc.) + user: User object adding the data + tenant_id: UUID of the tenant for which data is being added + + Returns: + UUID: Unique identifier for the data + """ + # return UUID hash of file contents + owner id + tenant_id + return uuid5(NAMESPACE_OID, f"{data_identifier}{str(user.id)}{str(tenant_id)}") + + # Get all possible data_id values + data_id = { + "modern_data_id": _get_modern_unique_data_id( + data_identifier=data_identifier, user=user, tenant_id=tenant_id + ), + "legacy_data_id": _get_deprecated_unique_data_id( + data_identifier=data_identifier, user=user + ), + } + + # Check if data item with legacy_data_id exists, if so use that one, else use modern_data_id + db_engine = get_relational_engine() + async with db_engine.get_async_session() as session: + legacy_data_point = ( + await session.execute(select(Data).filter(Data.id == data_id["legacy_data_id"])) + ).scalar_one_or_none() + + if not legacy_data_point: + return data_id["modern_data_id"] + return data_id["legacy_data_id"] diff --git a/cognee/modules/ingestion/identify.py b/cognee/modules/ingestion/identify.py index 977ff3f0b..5a0fe379e 100644 --- a/cognee/modules/ingestion/identify.py +++ b/cognee/modules/ingestion/identify.py @@ -1,11 +1,13 @@ -from uuid import uuid5, NAMESPACE_OID +from uuid import UUID from .data_types import IngestionData from cognee.modules.users.models import User +from cognee.modules.data.methods import get_unique_data_id -def identify(data: IngestionData, user: User) -> str: +async def identify(data: IngestionData, user: User) -> UUID: data_content_hash: str = data.get_identifier() - # return UUID hash of file contents + owner id - return uuid5(NAMESPACE_OID, f"{data_content_hash}{user.id}") + return await get_unique_data_id( + data_identifier=data_content_hash, user=user, tenant_id=user.tenant_id + ) From ff388179fb38cd82a59b3a45ea3f343b16c56c86 Mon Sep 17 00:00:00 2001 From: Igor Ilic Date: Tue, 4 Nov 2025 13:11:57 +0100 Subject: [PATCH 10/41] feat: Add dataset_id calculation that handles legacy dataset_id --- .../data/methods/get_unique_data_id.py | 11 ++- .../data/methods/get_unique_dataset_id.py | 70 +++++++++++++++++-- cognee/modules/ingestion/identify.py | 4 +- 3 files changed, 71 insertions(+), 14 deletions(-) diff --git a/cognee/modules/data/methods/get_unique_data_id.py b/cognee/modules/data/methods/get_unique_data_id.py index 3fc184ce4..877b5930c 100644 --- a/cognee/modules/data/methods/get_unique_data_id.py +++ b/cognee/modules/data/methods/get_unique_data_id.py @@ -1,5 +1,4 @@ from uuid import uuid5, NAMESPACE_OID, UUID -from typing import Optional from sqlalchemy import select from cognee.modules.data.models.Data import Data @@ -7,7 +6,7 @@ from cognee.infrastructure.databases.relational import get_relational_engine from cognee.modules.users.models import User -async def get_unique_data_id(data_identifier: str, user: User, tenant_id: Optional[UUID]) -> UUID: +async def get_unique_data_id(data_identifier: str, user: User) -> UUID: """ Function returns a unique UUID for data based on data identifier, user id and tenant id. If data with legacy ID exists, return that ID to maintain compatibility. @@ -35,7 +34,7 @@ async def get_unique_data_id(data_identifier: str, user: User, tenant_id: Option # return UUID hash of file contents + owner id + tenant_id return uuid5(NAMESPACE_OID, f"{data_identifier}{str(user.id)}") - def _get_modern_unique_data_id(data_identifier: str, user: User, tenant_id: UUID) -> UUID: + def _get_modern_unique_data_id(data_identifier: str, user: User) -> UUID: """ Function returns a unique UUID for data based on data identifier, user id and tenant id. Args: @@ -47,13 +46,11 @@ async def get_unique_data_id(data_identifier: str, user: User, tenant_id: Option UUID: Unique identifier for the data """ # return UUID hash of file contents + owner id + tenant_id - return uuid5(NAMESPACE_OID, f"{data_identifier}{str(user.id)}{str(tenant_id)}") + return uuid5(NAMESPACE_OID, f"{data_identifier}{str(user.id)}{str(user.tenant_id)}") # Get all possible data_id values data_id = { - "modern_data_id": _get_modern_unique_data_id( - data_identifier=data_identifier, user=user, tenant_id=tenant_id - ), + "modern_data_id": _get_modern_unique_data_id(data_identifier=data_identifier, user=user), "legacy_data_id": _get_deprecated_unique_data_id( data_identifier=data_identifier, user=user ), diff --git a/cognee/modules/data/methods/get_unique_dataset_id.py b/cognee/modules/data/methods/get_unique_dataset_id.py index 2caf5fb55..274f24d1a 100644 --- a/cognee/modules/data/methods/get_unique_dataset_id.py +++ b/cognee/modules/data/methods/get_unique_dataset_id.py @@ -1,9 +1,71 @@ from uuid import UUID, uuid5, NAMESPACE_OID -from cognee.modules.users.models import User from typing import Union +from sqlalchemy import select + +from cognee.modules.data.models.Dataset import Dataset +from cognee.modules.users.models import User +from cognee.infrastructure.databases.relational import get_relational_engine async def get_unique_dataset_id(dataset_name: Union[str, UUID], user: User) -> UUID: - if isinstance(dataset_name, UUID): - return dataset_name - return uuid5(NAMESPACE_OID, f"{dataset_name}{str(user.id)}") + """ + Function returns a unique UUID for dataset based on dataset name, user id and tenant id. + If dataset with legacy ID exists, return that ID to maintain compatibility. + + Args: + dataset_name: string representing the dataset name + user: User object adding the dataset + tenant_id: UUID of the tenant for which dataset is being added + + Returns: + UUID: Unique identifier for the dataset + """ + + def _get_legacy_unique_dataset_id(dataset_name: Union[str, UUID], user: User) -> UUID: + """ + Legacy function, returns a unique UUID for dataset based on dataset name and user id. + Needed to support legacy datasets without tenant information. + Args: + dataset_name: string representing the dataset name + user: Current User object adding the dataset + + Returns: + UUID: Unique identifier for the dataset + """ + if isinstance(dataset_name, UUID): + return dataset_name + return uuid5(NAMESPACE_OID, f"{dataset_name}{str(user.id)}") + + def _get_modern_unique_dataset_id(dataset_name: Union[str, UUID], user: User) -> UUID: + """ + Returns a unique UUID for dataset based on dataset name, user id and tenant_id. + Args: + dataset_name: string representing the dataset name + user: Current User object adding the dataset + tenant_id: UUID of the tenant for which dataset is being added + + Returns: + UUID: Unique identifier for the dataset + """ + if isinstance(dataset_name, UUID): + return dataset_name + return uuid5(NAMESPACE_OID, f"{dataset_name}{str(user.id)}{str(user.tenant_id)}") + + # Get all possible dataset_id values + dataset_id = { + "modern_dataset_id": _get_modern_unique_dataset_id(dataset_name=dataset_name, user=user), + "legacy_dataset_id": _get_legacy_unique_dataset_id(dataset_name=dataset_name, user=user), + } + + # Check if dataset with legacy_dataset_id exists, if so use that one, else use modern_dataset_id + db_engine = get_relational_engine() + async with db_engine.get_async_session() as session: + legacy_dataset = ( + await session.execute( + select(Dataset).filter(Dataset.id == dataset_id["legacy_data_id"]) + ) + ).scalar_one_or_none() + + if not legacy_dataset: + return dataset_id["modern_dataset_id"] + return dataset_id["legacy_dataset_id"] diff --git a/cognee/modules/ingestion/identify.py b/cognee/modules/ingestion/identify.py index 5a0fe379e..640fce4a2 100644 --- a/cognee/modules/ingestion/identify.py +++ b/cognee/modules/ingestion/identify.py @@ -8,6 +8,4 @@ from cognee.modules.data.methods import get_unique_data_id async def identify(data: IngestionData, user: User) -> UUID: data_content_hash: str = data.get_identifier() - return await get_unique_data_id( - data_identifier=data_content_hash, user=user, tenant_id=user.tenant_id - ) + return await get_unique_data_id(data_identifier=data_content_hash, user=user) From ac257dca1db4123cf97abacf32e1ecd85dab9afd Mon Sep 17 00:00:00 2001 From: Igor Ilic Date: Tue, 4 Nov 2025 13:13:42 +0100 Subject: [PATCH 11/41] refactor: Account for async change for identify function --- cognee/modules/pipelines/operations/run_tasks_data_item.py | 2 +- cognee/tasks/ingestion/ingest_data.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/cognee/modules/pipelines/operations/run_tasks_data_item.py b/cognee/modules/pipelines/operations/run_tasks_data_item.py index 152e72d7f..2cc449df6 100644 --- a/cognee/modules/pipelines/operations/run_tasks_data_item.py +++ b/cognee/modules/pipelines/operations/run_tasks_data_item.py @@ -69,7 +69,7 @@ async def run_tasks_data_item_incremental( async with open_data_file(file_path) as file: classified_data = ingestion.classify(file) # data_id is the hash of file contents + owner id to avoid duplicate data - data_id = ingestion.identify(classified_data, user) + data_id = await ingestion.identify(classified_data, user) else: # If data was already processed by Cognee get data id data_id = data_item.id diff --git a/cognee/tasks/ingestion/ingest_data.py b/cognee/tasks/ingestion/ingest_data.py index 0572d0f1e..5987f38d5 100644 --- a/cognee/tasks/ingestion/ingest_data.py +++ b/cognee/tasks/ingestion/ingest_data.py @@ -99,7 +99,7 @@ async def ingest_data( # data_id is the hash of original file contents + owner id to avoid duplicate data - data_id = ingestion.identify(classified_data, user) + data_id = await ingestion.identify(classified_data, user) original_file_metadata = classified_data.get_metadata() # Find metadata from Cognee data storage text file From ea675f29d65dcf354d8999106ff3b8db3a8149f2 Mon Sep 17 00:00:00 2001 From: Igor Ilic Date: Tue, 4 Nov 2025 13:15:49 +0100 Subject: [PATCH 12/41] fix: Resolve typo in accessing dictionary for dataset_id --- cognee/modules/data/methods/get_unique_dataset_id.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cognee/modules/data/methods/get_unique_dataset_id.py b/cognee/modules/data/methods/get_unique_dataset_id.py index 274f24d1a..2b765ec78 100644 --- a/cognee/modules/data/methods/get_unique_dataset_id.py +++ b/cognee/modules/data/methods/get_unique_dataset_id.py @@ -62,7 +62,7 @@ async def get_unique_dataset_id(dataset_name: Union[str, UUID], user: User) -> U async with db_engine.get_async_session() as session: legacy_dataset = ( await session.execute( - select(Dataset).filter(Dataset.id == dataset_id["legacy_data_id"]) + select(Dataset).filter(Dataset.id == dataset_id["legacy_dataset_id"]) ) ).scalar_one_or_none() From 9d771acc2427592f40caf0e9727c8e8151c5af64 Mon Sep 17 00:00:00 2001 From: Igor Ilic Date: Tue, 4 Nov 2025 13:35:50 +0100 Subject: [PATCH 13/41] refactor: filter out search results --- .../methods/get_all_user_permission_datasets.py | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/cognee/modules/users/permissions/methods/get_all_user_permission_datasets.py b/cognee/modules/users/permissions/methods/get_all_user_permission_datasets.py index e5dbb0e4b..a8cb96fbb 100644 --- a/cognee/modules/users/permissions/methods/get_all_user_permission_datasets.py +++ b/cognee/modules/users/permissions/methods/get_all_user_permission_datasets.py @@ -26,13 +26,16 @@ async def get_all_user_permission_datasets(user: User, permission_type: str) -> tenants = await user.awaitable_attrs.tenants for tenant in tenants: - # Get all datasets all tenant members have access to - datasets.extend(await get_principal_datasets(tenant, permission_type)) + # If tenant is the user's selected tenant add datasets that users roles in the tenant and the tenant itself + # have access for + if tenant.id == user.tenant_id: + # Get all datasets all tenant members have access to + datasets.extend(await get_principal_datasets(tenant, permission_type)) - # Get all datasets accessible by roles user is a part of - roles = await user.awaitable_attrs.roles - for role in roles: - datasets.extend(await get_principal_datasets(role, permission_type)) + # Get all datasets accessible by roles user is a part of + roles = await user.awaitable_attrs.roles + for role in roles: + datasets.extend(await get_principal_datasets(role, permission_type)) # Deduplicate datasets with same ID unique = {} From f4117c42e9c1bd0630a333bb789faba8686ba5b0 Mon Sep 17 00:00:00 2001 From: Igor Ilic Date: Tue, 4 Nov 2025 16:43:41 +0100 Subject: [PATCH 14/41] fix: Resolve issue with entity extraction test --- cognee/tests/tasks/entity_extraction/entity_extraction_test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cognee/tests/tasks/entity_extraction/entity_extraction_test.py b/cognee/tests/tasks/entity_extraction/entity_extraction_test.py index 39e883e09..41a9254ca 100644 --- a/cognee/tests/tasks/entity_extraction/entity_extraction_test.py +++ b/cognee/tests/tasks/entity_extraction/entity_extraction_test.py @@ -55,7 +55,7 @@ async def main(): classified_data = ingestion.classify(file) # data_id is the hash of original file contents + owner id to avoid duplicate data - data_id = ingestion.identify(classified_data, await get_default_user()) + data_id = await ingestion.identify(classified_data, await get_default_user()) await cognee.add(file_path) From cd32b492a469c9bfac14d4b3f20ed99a727a9460 Mon Sep 17 00:00:00 2001 From: Igor Ilic Date: Tue, 4 Nov 2025 17:56:01 +0100 Subject: [PATCH 15/41] refactor: Add filtering of non current tenant results when authorizing dataset --- .../get_all_user_permission_datasets.py | 25 ++++++++++--------- .../users/roles/methods/add_user_to_role.py | 4 ++- 2 files changed, 16 insertions(+), 13 deletions(-) diff --git a/cognee/modules/users/permissions/methods/get_all_user_permission_datasets.py b/cognee/modules/users/permissions/methods/get_all_user_permission_datasets.py index a8cb96fbb..ee1de3c72 100644 --- a/cognee/modules/users/permissions/methods/get_all_user_permission_datasets.py +++ b/cognee/modules/users/permissions/methods/get_all_user_permission_datasets.py @@ -24,18 +24,14 @@ async def get_all_user_permission_datasets(user: User, permission_type: str) -> # Get all tenants user is a part of tenants = await user.awaitable_attrs.tenants - for tenant in tenants: - # If tenant is the user's selected tenant add datasets that users roles in the tenant and the tenant itself - # have access for - if tenant.id == user.tenant_id: - # Get all datasets all tenant members have access to - datasets.extend(await get_principal_datasets(tenant, permission_type)) + # Get all datasets all tenant members have access to + datasets.extend(await get_principal_datasets(tenant, permission_type)) - # Get all datasets accessible by roles user is a part of - roles = await user.awaitable_attrs.roles - for role in roles: - datasets.extend(await get_principal_datasets(role, permission_type)) + # Get all datasets accessible by roles user is a part of + roles = await user.awaitable_attrs.roles + for role in roles: + datasets.extend(await get_principal_datasets(role, permission_type)) # Deduplicate datasets with same ID unique = {} @@ -43,5 +39,10 @@ async def get_all_user_permission_datasets(user: User, permission_type: str) -> # If the dataset id key already exists, leave the dictionary unchanged. unique.setdefault(dataset.id, dataset) - # TODO: Add filtering out of datasets that aren't currently selected tenant of user (currently selected tenant is the tenant_id value in the User model) - return list(unique.values()) + # Filter out dataset that aren't part of the current user's tenant + filtered_datasets = [] + for dataset in list(unique.values()): + if dataset.tenant_id == user.tenant_id: + filtered_datasets.append(dataset) + + return filtered_datasets diff --git a/cognee/modules/users/roles/methods/add_user_to_role.py b/cognee/modules/users/roles/methods/add_user_to_role.py index de5e47775..d764ac900 100644 --- a/cognee/modules/users/roles/methods/add_user_to_role.py +++ b/cognee/modules/users/roles/methods/add_user_to_role.py @@ -42,11 +42,13 @@ async def add_user_to_role(user_id: UUID, role_id: UUID, owner_id: UUID): .first() ) + user_tenants = await user.awaitable_attrs.tenants + if not user: raise UserNotFoundError elif not role: raise RoleNotFoundError - elif user.tenant_id != role.tenant_id: + elif role.tenant_id not in [tenant.id for tenant in user_tenants]: # TESTME raise TenantNotFoundError( message="User tenant does not match role tenant. User cannot be added to role." ) From fb102f29a8fbbfa941641208f41a55e1eb370fb5 Mon Sep 17 00:00:00 2001 From: Igor Ilic Date: Tue, 4 Nov 2025 19:03:56 +0100 Subject: [PATCH 16/41] chore: Add alembic migration for multi-tenant system --- .../c946955da633_multi_tenant_support.py | 113 ++++++++++++++++++ 1 file changed, 113 insertions(+) create mode 100644 alembic/versions/c946955da633_multi_tenant_support.py diff --git a/alembic/versions/c946955da633_multi_tenant_support.py b/alembic/versions/c946955da633_multi_tenant_support.py new file mode 100644 index 000000000..2ad230974 --- /dev/null +++ b/alembic/versions/c946955da633_multi_tenant_support.py @@ -0,0 +1,113 @@ +"""Multi Tenant Support + +Revision ID: c946955da633 +Revises: 211ab850ef3d +Create Date: 2025-11-04 18:11:09.325158 + +""" + +from typing import Sequence, Union +from datetime import datetime, timezone +from uuid import uuid4 + +from alembic import op +import sqlalchemy as sa + +# revision identifiers, used by Alembic. +revision: str = "c946955da633" +down_revision: Union[str, None] = "211ab850ef3d" +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def _define_user_table() -> sa.Table: + table = sa.Table( + "users", + sa.MetaData(), + sa.Column( + "id", + sa.UUID, + sa.ForeignKey("principals.id", ondelete="CASCADE"), + primary_key=True, + nullable=False, + ), + sa.Column("tenant_id", sa.UUID, sa.ForeignKey("tenants.id"), index=True, nullable=True), + ) + return table + + +def _define_dataset_table() -> sa.Table: + # Note: We can't use any Cognee model info to gather data (as it can change) in database so we must use our own table + # definition or load what is in the database + table = sa.Table( + "datasets", + sa.MetaData(), + sa.Column("id", sa.UUID, primary_key=True, default=uuid4), + sa.Column("name", sa.Text), + sa.Column( + "created_at", + sa.DateTime(timezone=True), + default=lambda: datetime.now(timezone.utc), + ), + sa.Column( + "updated_at", + sa.DateTime(timezone=True), + onupdate=lambda: datetime.now(timezone.utc), + ), + sa.Column("owner_id", sa.UUID(), sa.ForeignKey("principals.id"), index=True), + sa.Column("tenant_id", sa.UUID(), sa.ForeignKey("tenants.id"), index=True, nullable=True), + ) + + return table + + +def _get_column(inspector, table, name, schema=None): + for col in inspector.get_columns(table, schema=schema): + if col["name"] == name: + return col + return None + + +def upgrade() -> None: + conn = op.get_bind() + insp = sa.inspect(conn) + + dataset = _define_dataset_table() + user = _define_user_table() + + tenant_id_column = _get_column(insp, "datasets", "tenant_id") + if not tenant_id_column: + op.add_column("datasets", sa.Column("tenant_id", sa.UUID(), nullable=True)) + + # Build correlated subquery: select users.tenant_id for each dataset.owner_id + tenant_id_from_dataset_owner = ( + sa.select(user.c.tenant_id).where(user.c.id == dataset.c.owner_id).scalar_subquery() + ) + + # Update statement; restrict to rows where tenant_id is currently NULL + # update_stmt = ( + # sa.update(dataset) + # .values(tenant_id=subq) + # ) + + user = _define_user_table() + if op.get_context().dialect.name == "sqlite": + # If column doesn't exist create new original_extension column and update from values of extension column + with op.batch_alter_table("datasets") as batch_op: + batch_op.execute( + dataset.update().values( + tenant_id=tenant_id_from_dataset_owner, + ) + ) + else: + conn = op.get_bind() + conn.execute(dataset.update().values(tenant_id=tenant_id_from_dataset_owner)) + + op.create_index(op.f("ix_datasets_tenant_id"), "datasets", ["tenant_id"], unique=False) + + +def downgrade() -> None: + # ### commands auto generated by Alembic - please adjust! ### + + op.drop_column("datasets", "tenant_id") + # ### end Alembic commands ### From db2a32dd171a7db53487bec4c29474d1f36d1aa2 Mon Sep 17 00:00:00 2001 From: Igor Ilic Date: Tue, 4 Nov 2025 19:17:02 +0100 Subject: [PATCH 17/41] test: Resolve issue permission example --- alembic/versions/c946955da633_multi_tenant_support.py | 9 +-------- examples/python/permissions_example.py | 2 +- 2 files changed, 2 insertions(+), 9 deletions(-) diff --git a/alembic/versions/c946955da633_multi_tenant_support.py b/alembic/versions/c946955da633_multi_tenant_support.py index 2ad230974..09781c85c 100644 --- a/alembic/versions/c946955da633_multi_tenant_support.py +++ b/alembic/versions/c946955da633_multi_tenant_support.py @@ -79,18 +79,11 @@ def upgrade() -> None: if not tenant_id_column: op.add_column("datasets", sa.Column("tenant_id", sa.UUID(), nullable=True)) - # Build correlated subquery: select users.tenant_id for each dataset.owner_id + # Build subquery, select users.tenant_id for each dataset.owner_id tenant_id_from_dataset_owner = ( sa.select(user.c.tenant_id).where(user.c.id == dataset.c.owner_id).scalar_subquery() ) - # Update statement; restrict to rows where tenant_id is currently NULL - # update_stmt = ( - # sa.update(dataset) - # .values(tenant_id=subq) - # ) - - user = _define_user_table() if op.get_context().dialect.name == "sqlite": # If column doesn't exist create new original_extension column and update from values of extension column with op.batch_alter_table("datasets") as batch_op: diff --git a/examples/python/permissions_example.py b/examples/python/permissions_example.py index 7c140845c..5d1195a11 100644 --- a/examples/python/permissions_example.py +++ b/examples/python/permissions_example.py @@ -151,7 +151,7 @@ async def main(): # To add a user to a role he must be part of the same tenant/organization print("\nOperation started as user_2 to add user_3 to CogneeLab tenant/organization") await add_user_to_tenant( - user_id=user_3.id, tenant_id=tenant_id, owner_id=user_2.id, set_active_tenant=True + user_id=user_3.id, tenant_id=tenant_id, owner_id=user_2.id, set_as_active_tenant=True ) print( From f002d3bf0ef24e8db113625971bfb98e6473e6b7 Mon Sep 17 00:00:00 2001 From: Igor Ilic Date: Tue, 4 Nov 2025 20:24:16 +0100 Subject: [PATCH 18/41] refactor: Update permissions example --- .../tenants/methods/add_user_to_tenant.py | 2 +- .../users/tenants/methods/select_tenant.py | 6 ++- examples/python/permissions_example.py | 45 ++++++++++++++++--- 3 files changed, 44 insertions(+), 9 deletions(-) diff --git a/cognee/modules/users/tenants/methods/add_user_to_tenant.py b/cognee/modules/users/tenants/methods/add_user_to_tenant.py index edadfe66b..eecc49f6f 100644 --- a/cognee/modules/users/tenants/methods/add_user_to_tenant.py +++ b/cognee/modules/users/tenants/methods/add_user_to_tenant.py @@ -16,7 +16,7 @@ from cognee.modules.users.exceptions import ( async def add_user_to_tenant( - user_id: UUID, tenant_id: UUID, owner_id: UUID, set_as_active_tenant: Optional[bool] = True + user_id: UUID, tenant_id: UUID, owner_id: UUID, set_as_active_tenant: Optional[bool] = False ): """ Add a user with the given id to the tenant with the given id. diff --git a/cognee/modules/users/tenants/methods/select_tenant.py b/cognee/modules/users/tenants/methods/select_tenant.py index 6e72fea2f..b444e9b1e 100644 --- a/cognee/modules/users/tenants/methods/select_tenant.py +++ b/cognee/modules/users/tenants/methods/select_tenant.py @@ -7,11 +7,12 @@ from sqlalchemy import select from cognee.infrastructure.databases.relational import get_relational_engine from cognee.modules.users.models.UserTenant import UserTenant from cognee.modules.users.methods import get_user +from cognee.modules.users.models.User import User from cognee.modules.users.permissions.methods import get_tenant from cognee.modules.users.exceptions import UserNotFoundError, TenantNotFoundError -async def select_tenant(user_id: UUID, tenant_id: Union[UUID, None]): +async def select_tenant(user_id: UUID, tenant_id: Union[UUID, None]) -> User: """ Set the users active tenant to provided tenant. @@ -33,7 +34,7 @@ async def select_tenant(user_id: UUID, tenant_id: Union[UUID, None]): user.tenant_id = None await session.merge(user) await session.commit() - return + return user tenant = await get_tenant(tenant_id) @@ -59,3 +60,4 @@ async def select_tenant(user_id: UUID, tenant_id: Union[UUID, None]): user.tenant_id = tenant_id await session.merge(user) await session.commit() + return user diff --git a/examples/python/permissions_example.py b/examples/python/permissions_example.py index 5d1195a11..fdbde00f0 100644 --- a/examples/python/permissions_example.py +++ b/examples/python/permissions_example.py @@ -3,6 +3,7 @@ import cognee import pathlib from cognee.modules.users.exceptions import PermissionDeniedError +from cognee.modules.users.tenants.methods import select_tenant from cognee.shared.logging_utils import get_logger from cognee.modules.search.types import SearchType from cognee.modules.users.methods import create_user @@ -116,6 +117,7 @@ async def main(): print( "\nOperation started as user_2 to give read permission to user_1 for the dataset owned by user_2" ) + await authorized_give_permission_on_datasets( user_1.id, [quantum_dataset_id], @@ -142,6 +144,9 @@ async def main(): print("User 2 is creating CogneeLab tenant/organization") tenant_id = await create_tenant("CogneeLab", user_2.id) + print("User 2 is selecting CogneeLab tenant/organization as active tenant") + await select_tenant(user_id=user_2.id, tenant_id=tenant_id) + print("\nUser 2 is creating Researcher role") role_id = await create_role(role_name="Researcher", owner_id=user_2.id) @@ -150,27 +155,55 @@ async def main(): # To add a user to a role he must be part of the same tenant/organization print("\nOperation started as user_2 to add user_3 to CogneeLab tenant/organization") - await add_user_to_tenant( - user_id=user_3.id, tenant_id=tenant_id, owner_id=user_2.id, set_as_active_tenant=True - ) + await add_user_to_tenant(user_id=user_3.id, tenant_id=tenant_id, owner_id=user_2.id) print( "\nOperation started by user_2, as tenant owner, to add user_3 to Researcher role inside the tenant/organization" ) await add_user_to_role(user_id=user_3.id, role_id=role_id, owner_id=user_2.id) + print("\nOperation as user_3 to select CogneeLab tenant/organization as active tenant") + await select_tenant(user_id=user_3.id, tenant_id=tenant_id) + print( - "\nOperation started as user_2 to give read permission to Researcher role for the dataset owned by user_2" + "\nOperation started as user_2, with CogneeLab as its active tenant, to give read permission to Researcher role for the dataset QUANTUM owned by user_2" + ) + # Even though the dataset owner is user_2, the dataset doesn't belong to the tenant/organization CogneeLab. + # So we can't assign permissions to it when we're acting in the CogneeLab tenant. + try: + await authorized_give_permission_on_datasets( + role_id, + [quantum_dataset_id], + "read", + user_2.id, + ) + except PermissionDeniedError: + print( + "User 2 could not give permission to the role as the QUANTUM dataset is not part of the CogneeLab tenant" + ) + + print( + "We will now create a new QUANTUM dataset in the CogneeLab tenant so that permissions can be assigned to the Researcher role inside the tenant/organization" + ) + # Re-create the QUANTUM dataset in the CogneeLab tenant. The old QUANTUM dataset is still owned by user_2 personally + # and can still be accessed by selecting the personal tenant for user 2. + await cognee.add([text], dataset_name="QUANTUM", user=user_2) + quantum_cognify_result = await cognee.cognify(["QUANTUM"], user=user_2) + + # The recreated Quantum dataset will now have a different dataset_id as it's a new dataset in a different organization + quantum_dataset_id_cognee_lab_tenant = extract_dataset_id_from_cognify(quantum_cognify_result) + print( + "\nOperation started as user_2, with CogneeLab as its active tenant, to give read permission to Researcher role for the dataset QUANTUM owned by the CogneeLab tenant" ) await authorized_give_permission_on_datasets( role_id, - [quantum_dataset_id], + [quantum_dataset_id_cognee_lab_tenant], "read", user_2.id, ) # Now user_3 can read from QUANTUM dataset as part of the Researcher role after proper permissions have been assigned by the QUANTUM dataset owner, user_2. - print("\nSearch result as user_3 on the dataset owned by user_2:") + print("\nSearch result as user_3 on the QUANTUM dataset owned by the CogneeLab organization:") search_results = await cognee.search( query_type=SearchType.GRAPH_COMPLETION, query_text="What is in the document?", From 7782f246d30f159e23c4ae46afa7896936a8a677 Mon Sep 17 00:00:00 2001 From: Igor Ilic Date: Tue, 4 Nov 2025 20:54:00 +0100 Subject: [PATCH 19/41] refactor: Update permissions example to work with new changes --- .../routers/get_permissions_router.py | 2 +- .../users/tenants/methods/select_tenant.py | 9 +++----- examples/python/permissions_example.py | 22 ++++++++++--------- 3 files changed, 16 insertions(+), 17 deletions(-) diff --git a/cognee/api/v1/permissions/routers/get_permissions_router.py b/cognee/api/v1/permissions/routers/get_permissions_router.py index 20d35e748..db2c72705 100644 --- a/cognee/api/v1/permissions/routers/get_permissions_router.py +++ b/cognee/api/v1/permissions/routers/get_permissions_router.py @@ -259,7 +259,7 @@ def get_permissions_router() -> APIRouter: from cognee.modules.users.tenants.methods import select_tenant as select_tenant_method - await select_tenant_method(user_id=user.id, tenant_id=payload.tenant_id) + await select_tenant_method(user=user, tenant_id=payload.tenant_id) return JSONResponse( status_code=200, diff --git a/cognee/modules/users/tenants/methods/select_tenant.py b/cognee/modules/users/tenants/methods/select_tenant.py index b444e9b1e..cb291d5f2 100644 --- a/cognee/modules/users/tenants/methods/select_tenant.py +++ b/cognee/modules/users/tenants/methods/select_tenant.py @@ -6,19 +6,18 @@ from sqlalchemy import select from cognee.infrastructure.databases.relational import get_relational_engine from cognee.modules.users.models.UserTenant import UserTenant -from cognee.modules.users.methods import get_user from cognee.modules.users.models.User import User from cognee.modules.users.permissions.methods import get_tenant from cognee.modules.users.exceptions import UserNotFoundError, TenantNotFoundError -async def select_tenant(user_id: UUID, tenant_id: Union[UUID, None]) -> User: +async def select_tenant(user: User, tenant_id: Union[UUID, None]) -> User: """ Set the users active tenant to provided tenant. If None tenant_id is provided set current Tenant to the default single user-tenant Args: - user_id: Id of the user. + user: User object. tenant_id: Id of the tenant. Returns: @@ -27,8 +26,6 @@ async def select_tenant(user_id: UUID, tenant_id: Union[UUID, None]) -> User: """ db_engine = get_relational_engine() async with db_engine.get_async_session() as session: - user = await get_user(user_id) - if tenant_id is None: # If no tenant_id is provided set current Tenant to the single user-tenant user.tenant_id = None @@ -46,7 +43,7 @@ async def select_tenant(user_id: UUID, tenant_id: Union[UUID, None]) -> User: # Check if User is part of Tenant result = await session.execute( select(UserTenant) - .where(UserTenant.user_id == user_id) + .where(UserTenant.user_id == user.id) .where(UserTenant.tenant_id == tenant_id) ) diff --git a/examples/python/permissions_example.py b/examples/python/permissions_example.py index fdbde00f0..4bbd30bea 100644 --- a/examples/python/permissions_example.py +++ b/examples/python/permissions_example.py @@ -145,7 +145,7 @@ async def main(): tenant_id = await create_tenant("CogneeLab", user_2.id) print("User 2 is selecting CogneeLab tenant/organization as active tenant") - await select_tenant(user_id=user_2.id, tenant_id=tenant_id) + await select_tenant(user=user_2, tenant_id=tenant_id) print("\nUser 2 is creating Researcher role") role_id = await create_role(role_name="Researcher", owner_id=user_2.id) @@ -163,7 +163,7 @@ async def main(): await add_user_to_role(user_id=user_3.id, role_id=role_id, owner_id=user_2.id) print("\nOperation as user_3 to select CogneeLab tenant/organization as active tenant") - await select_tenant(user_id=user_3.id, tenant_id=tenant_id) + await select_tenant(user=user_3, tenant_id=tenant_id) print( "\nOperation started as user_2, with CogneeLab as its active tenant, to give read permission to Researcher role for the dataset QUANTUM owned by user_2" @@ -183,21 +183,23 @@ async def main(): ) print( - "We will now create a new QUANTUM dataset in the CogneeLab tenant so that permissions can be assigned to the Researcher role inside the tenant/organization" + "We will now create a new QUANTUM dataset with the QUANTUM_COGNEE_LAB name in the CogneeLab tenant so that permissions can be assigned to the Researcher role inside the tenant/organization" ) - # Re-create the QUANTUM dataset in the CogneeLab tenant. The old QUANTUM dataset is still owned by user_2 personally + # We can re-create the QUANTUM dataset in the CogneeLab tenant. The old QUANTUM dataset is still owned by user_2 personally # and can still be accessed by selecting the personal tenant for user 2. - await cognee.add([text], dataset_name="QUANTUM", user=user_2) - quantum_cognify_result = await cognee.cognify(["QUANTUM"], user=user_2) + await cognee.add([text], dataset_name="QUANTUM_COGNEE_LAB", user=user_2) + quantum_cognee_lab_cognify_result = await cognee.cognify(["QUANTUM_COGNEE_LAB"], user=user_2) # The recreated Quantum dataset will now have a different dataset_id as it's a new dataset in a different organization - quantum_dataset_id_cognee_lab_tenant = extract_dataset_id_from_cognify(quantum_cognify_result) + quantum_cognee_lab_dataset_id = extract_dataset_id_from_cognify( + quantum_cognee_lab_cognify_result + ) print( "\nOperation started as user_2, with CogneeLab as its active tenant, to give read permission to Researcher role for the dataset QUANTUM owned by the CogneeLab tenant" ) await authorized_give_permission_on_datasets( role_id, - [quantum_dataset_id_cognee_lab_tenant], + [quantum_cognee_lab_dataset_id], "read", user_2.id, ) @@ -207,8 +209,8 @@ async def main(): search_results = await cognee.search( query_type=SearchType.GRAPH_COMPLETION, query_text="What is in the document?", - user=user_1, - dataset_ids=[quantum_dataset_id], + user=user_3, + dataset_ids=[quantum_cognee_lab_dataset_id], ) for result in search_results: print(f"{result}\n") From c2aaec2a827fbdd8f91747989753b4f62a41fa38 Mon Sep 17 00:00:00 2001 From: Igor Ilic Date: Tue, 4 Nov 2025 23:34:51 +0100 Subject: [PATCH 20/41] refactor: Resolve issue with permissions example --- .../api/v1/permissions/routers/get_permissions_router.py | 2 +- cognee/modules/users/tenants/methods/select_tenant.py | 6 ++++-- examples/python/permissions_example.py | 8 ++++++-- 3 files changed, 11 insertions(+), 5 deletions(-) diff --git a/cognee/api/v1/permissions/routers/get_permissions_router.py b/cognee/api/v1/permissions/routers/get_permissions_router.py index db2c72705..20d35e748 100644 --- a/cognee/api/v1/permissions/routers/get_permissions_router.py +++ b/cognee/api/v1/permissions/routers/get_permissions_router.py @@ -259,7 +259,7 @@ def get_permissions_router() -> APIRouter: from cognee.modules.users.tenants.methods import select_tenant as select_tenant_method - await select_tenant_method(user=user, tenant_id=payload.tenant_id) + await select_tenant_method(user_id=user.id, tenant_id=payload.tenant_id) return JSONResponse( status_code=200, diff --git a/cognee/modules/users/tenants/methods/select_tenant.py b/cognee/modules/users/tenants/methods/select_tenant.py index cb291d5f2..83c11dc91 100644 --- a/cognee/modules/users/tenants/methods/select_tenant.py +++ b/cognee/modules/users/tenants/methods/select_tenant.py @@ -5,19 +5,20 @@ import sqlalchemy.exc from sqlalchemy import select from cognee.infrastructure.databases.relational import get_relational_engine +from cognee.modules.users.methods.get_user import get_user from cognee.modules.users.models.UserTenant import UserTenant from cognee.modules.users.models.User import User from cognee.modules.users.permissions.methods import get_tenant from cognee.modules.users.exceptions import UserNotFoundError, TenantNotFoundError -async def select_tenant(user: User, tenant_id: Union[UUID, None]) -> User: +async def select_tenant(user_id: UUID, tenant_id: Union[UUID, None]) -> User: """ Set the users active tenant to provided tenant. If None tenant_id is provided set current Tenant to the default single user-tenant Args: - user: User object. + user_id: UUID of the user. tenant_id: Id of the tenant. Returns: @@ -26,6 +27,7 @@ async def select_tenant(user: User, tenant_id: Union[UUID, None]) -> User: """ db_engine = get_relational_engine() async with db_engine.get_async_session() as session: + user = await get_user(user_id) if tenant_id is None: # If no tenant_id is provided set current Tenant to the single user-tenant user.tenant_id = None diff --git a/examples/python/permissions_example.py b/examples/python/permissions_example.py index 4bbd30bea..c0b104023 100644 --- a/examples/python/permissions_example.py +++ b/examples/python/permissions_example.py @@ -145,7 +145,7 @@ async def main(): tenant_id = await create_tenant("CogneeLab", user_2.id) print("User 2 is selecting CogneeLab tenant/organization as active tenant") - await select_tenant(user=user_2, tenant_id=tenant_id) + await select_tenant(user_id=user_2.id, tenant_id=tenant_id) print("\nUser 2 is creating Researcher role") role_id = await create_role(role_name="Researcher", owner_id=user_2.id) @@ -163,7 +163,7 @@ async def main(): await add_user_to_role(user_id=user_3.id, role_id=role_id, owner_id=user_2.id) print("\nOperation as user_3 to select CogneeLab tenant/organization as active tenant") - await select_tenant(user=user_3, tenant_id=tenant_id) + await select_tenant(user_id=user_3.id, tenant_id=tenant_id) print( "\nOperation started as user_2, with CogneeLab as its active tenant, to give read permission to Researcher role for the dataset QUANTUM owned by user_2" @@ -187,6 +187,10 @@ async def main(): ) # We can re-create the QUANTUM dataset in the CogneeLab tenant. The old QUANTUM dataset is still owned by user_2 personally # and can still be accessed by selecting the personal tenant for user 2. + from cognee.modules.users.methods import get_user + + # Note: We need to update user_2 from the database to refresh its tenant context changes + user_2 = await get_user(user_2.id) await cognee.add([text], dataset_name="QUANTUM_COGNEE_LAB", user=user_2) quantum_cognee_lab_cognify_result = await cognee.cognify(["QUANTUM_COGNEE_LAB"], user=user_2) From 1643b13c95ba83b08abb0d1afeec80767049db26 Mon Sep 17 00:00:00 2001 From: Igor Ilic Date: Wed, 5 Nov 2025 12:43:01 +0100 Subject: [PATCH 21/41] chore: add table creation for multi-tenancy to migration --- .../c946955da633_multi_tenant_support.py | 22 +++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/alembic/versions/c946955da633_multi_tenant_support.py b/alembic/versions/c946955da633_multi_tenant_support.py index 09781c85c..fc45644d0 100644 --- a/alembic/versions/c946955da633_multi_tenant_support.py +++ b/alembic/versions/c946955da633_multi_tenant_support.py @@ -75,6 +75,28 @@ def upgrade() -> None: dataset = _define_dataset_table() user = _define_user_table() + if "user_tenants" not in insp.get_table_names(): + tenant_id_from_user = sa.select(user.c.tenant_id).scalar_subquery() + # Define table with all necessary columns including primary key + user_tenants = op.create_table( + "user_tenants", + sa.Column("user_id", sa.UUID, sa.ForeignKey("users.id"), primary_key=True), + sa.Column("tenant_id", sa.UUID, sa.ForeignKey("tenants.id"), primary_key=True), + sa.Column("created_at", sa.DateTime(), default=lambda: datetime.now(timezone.utc)), + ) + if op.get_context().dialect.name == "sqlite": + # If column doesn't exist create new original_extension column and update from values of extension column + with op.batch_alter_table("user_tenants") as batch_op: + batch_op.execute( + user_tenants.update().values( + tenant_id=tenant_id_from_user, + user_id=user.c.id, + ) + ) + else: + conn = op.get_bind() + conn.execute(dataset.update().values(tenant_id=tenant_id_from_user, user_id=user.c.id)) + tenant_id_column = _get_column(insp, "datasets", "tenant_id") if not tenant_id_column: op.add_column("datasets", sa.Column("tenant_id", sa.UUID(), nullable=True)) From 9fc4199958045cb5ed06cfcaf783baae247760d6 Mon Sep 17 00:00:00 2001 From: Igor Ilic Date: Wed, 5 Nov 2025 13:18:47 +0100 Subject: [PATCH 22/41] fix: Resolve issue with cleaning acl table --- .../ab7e313804ae_permission_system_rework.py | 72 +++++++++++-------- 1 file changed, 43 insertions(+), 29 deletions(-) diff --git a/alembic/versions/ab7e313804ae_permission_system_rework.py b/alembic/versions/ab7e313804ae_permission_system_rework.py index bd69b9b41..d83f946a6 100644 --- a/alembic/versions/ab7e313804ae_permission_system_rework.py +++ b/alembic/versions/ab7e313804ae_permission_system_rework.py @@ -144,44 +144,58 @@ def _create_data_permission(conn, user_id, data_id, permission_name): ) +def _get_column(inspector, table, name, schema=None): + for col in inspector.get_columns(table, schema=schema): + if col["name"] == name: + return col + return None + + def upgrade() -> None: conn = op.get_bind() + insp = sa.inspect(conn) - # Recreate ACLs table with default permissions set to datasets instead of documents - op.drop_table("acls") + dataset_id_column = _get_column(insp, "acls", "dataset_id") + if not dataset_id_column: + # Recreate ACLs table with default permissions set to datasets instead of documents + op.drop_table("acls") - acls_table = op.create_table( - "acls", - sa.Column("id", UUID, primary_key=True, default=uuid4), - sa.Column( - "created_at", sa.DateTime(timezone=True), default=lambda: datetime.now(timezone.utc) - ), - sa.Column( - "updated_at", sa.DateTime(timezone=True), onupdate=lambda: datetime.now(timezone.utc) - ), - sa.Column("principal_id", UUID, sa.ForeignKey("principals.id")), - sa.Column("permission_id", UUID, sa.ForeignKey("permissions.id")), - sa.Column("dataset_id", UUID, sa.ForeignKey("datasets.id", ondelete="CASCADE")), - ) + acls_table = op.create_table( + "acls", + sa.Column("id", UUID, primary_key=True, default=uuid4), + sa.Column( + "created_at", sa.DateTime(timezone=True), default=lambda: datetime.now(timezone.utc) + ), + sa.Column( + "updated_at", + sa.DateTime(timezone=True), + onupdate=lambda: datetime.now(timezone.utc), + ), + sa.Column("principal_id", UUID, sa.ForeignKey("principals.id")), + sa.Column("permission_id", UUID, sa.ForeignKey("permissions.id")), + sa.Column("dataset_id", UUID, sa.ForeignKey("datasets.id", ondelete="CASCADE")), + ) - # Note: We can't use any Cognee model info to gather data (as it can change) in database so we must use our own table - # definition or load what is in the database - dataset_table = _define_dataset_table() - datasets = conn.execute(sa.select(dataset_table)).fetchall() + # Note: We can't use any Cognee model info to gather data (as it can change) in database so we must use our own table + # definition or load what is in the database + dataset_table = _define_dataset_table() + datasets = conn.execute(sa.select(dataset_table)).fetchall() - if not datasets: - return + if not datasets: + return - acl_list = [] + acl_list = [] - for dataset in datasets: - acl_list.append(_create_dataset_permission(conn, dataset.owner_id, dataset.id, "read")) - acl_list.append(_create_dataset_permission(conn, dataset.owner_id, dataset.id, "write")) - acl_list.append(_create_dataset_permission(conn, dataset.owner_id, dataset.id, "share")) - acl_list.append(_create_dataset_permission(conn, dataset.owner_id, dataset.id, "delete")) + for dataset in datasets: + acl_list.append(_create_dataset_permission(conn, dataset.owner_id, dataset.id, "read")) + acl_list.append(_create_dataset_permission(conn, dataset.owner_id, dataset.id, "write")) + acl_list.append(_create_dataset_permission(conn, dataset.owner_id, dataset.id, "share")) + acl_list.append( + _create_dataset_permission(conn, dataset.owner_id, dataset.id, "delete") + ) - if acl_list: - op.bulk_insert(acls_table, acl_list) + if acl_list: + op.bulk_insert(acls_table, acl_list) def downgrade() -> None: From fa4c50f972e27190fb97d20b2e61726b52bb3f2a Mon Sep 17 00:00:00 2001 From: Igor Ilic Date: Wed, 5 Nov 2025 16:05:33 +0100 Subject: [PATCH 23/41] fix: Resolve issue with sync migration not working for postgresql --- .../211ab850ef3d_add_sync_operations_table.py | 32 +++++++++++++------ 1 file changed, 23 insertions(+), 9 deletions(-) diff --git a/alembic/versions/211ab850ef3d_add_sync_operations_table.py b/alembic/versions/211ab850ef3d_add_sync_operations_table.py index 370aab1a4..9c6e81f12 100644 --- a/alembic/versions/211ab850ef3d_add_sync_operations_table.py +++ b/alembic/versions/211ab850ef3d_add_sync_operations_table.py @@ -10,6 +10,7 @@ from typing import Sequence, Union from alembic import op import sqlalchemy as sa +from sqlalchemy.dialects import postgresql # revision identifiers, used by Alembic. @@ -27,6 +28,27 @@ def upgrade() -> None: inspector = sa.inspect(connection) if "sync_operations" not in inspector.get_table_names(): + if op.get_context().dialect.name == "postgresql": + syncstatus = postgresql.ENUM( + "STARTED", + "IN_PROGRESS", + "COMPLETED", + "FAILED", + "CANCELLED", + name="syncstatus", + create_type=False, + ) + else: + syncstatus = sa.Enum( + "STARTED", + "IN_PROGRESS", + "COMPLETED", + "FAILED", + "CANCELLED", + name="syncstatus", + create_type=False, + ) + # Table doesn't exist, create it normally op.create_table( "sync_operations", @@ -34,15 +56,7 @@ def upgrade() -> None: sa.Column("run_id", sa.Text(), nullable=True), sa.Column( "status", - sa.Enum( - "STARTED", - "IN_PROGRESS", - "COMPLETED", - "FAILED", - "CANCELLED", - name="syncstatus", - create_type=False, - ), + syncstatus, nullable=True, ), sa.Column("progress_percentage", sa.Integer(), nullable=True), From c4807a0c6751e05a4fb04439afa183fa7620c8f5 Mon Sep 17 00:00:00 2001 From: Igor Ilic Date: Wed, 5 Nov 2025 16:14:37 +0100 Subject: [PATCH 24/41] refactor: Use user_tenants table to update --- alembic/versions/c946955da633_multi_tenant_support.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/alembic/versions/c946955da633_multi_tenant_support.py b/alembic/versions/c946955da633_multi_tenant_support.py index fc45644d0..3f7bde5a2 100644 --- a/alembic/versions/c946955da633_multi_tenant_support.py +++ b/alembic/versions/c946955da633_multi_tenant_support.py @@ -95,7 +95,9 @@ def upgrade() -> None: ) else: conn = op.get_bind() - conn.execute(dataset.update().values(tenant_id=tenant_id_from_user, user_id=user.c.id)) + conn.execute( + user_tenants.update().values(tenant_id=tenant_id_from_user, user_id=user.c.id) + ) tenant_id_column = _get_column(insp, "datasets", "tenant_id") if not tenant_id_column: From 9b6cbaf389b172fb86da42ac1c9c8fe544202aae Mon Sep 17 00:00:00 2001 From: Igor Ilic Date: Wed, 5 Nov 2025 17:24:11 +0100 Subject: [PATCH 25/41] chore: Add multi tenant migration --- .../211ab850ef3d_add_sync_operations_table.py | 6 ++- .../c946955da633_multi_tenant_support.py | 38 +++++++++++-------- 2 files changed, 26 insertions(+), 18 deletions(-) diff --git a/alembic/versions/211ab850ef3d_add_sync_operations_table.py b/alembic/versions/211ab850ef3d_add_sync_operations_table.py index 9c6e81f12..976439a32 100644 --- a/alembic/versions/211ab850ef3d_add_sync_operations_table.py +++ b/alembic/versions/211ab850ef3d_add_sync_operations_table.py @@ -36,7 +36,8 @@ def upgrade() -> None: "FAILED", "CANCELLED", name="syncstatus", - create_type=False, + create_type=True, + checkfirst=True, ) else: syncstatus = sa.Enum( @@ -46,7 +47,8 @@ def upgrade() -> None: "FAILED", "CANCELLED", name="syncstatus", - create_type=False, + create_type=True, + checkfirst=True, ) # Table doesn't exist, create it normally diff --git a/alembic/versions/c946955da633_multi_tenant_support.py b/alembic/versions/c946955da633_multi_tenant_support.py index 3f7bde5a2..6d21f8fc7 100644 --- a/alembic/versions/c946955da633_multi_tenant_support.py +++ b/alembic/versions/c946955da633_multi_tenant_support.py @@ -20,6 +20,10 @@ branch_labels: Union[str, Sequence[str], None] = None depends_on: Union[str, Sequence[str], None] = None +def _now(): + return datetime.now(timezone.utc) + + def _define_user_table() -> sa.Table: table = sa.Table( "users", @@ -76,27 +80,29 @@ def upgrade() -> None: user = _define_user_table() if "user_tenants" not in insp.get_table_names(): - tenant_id_from_user = sa.select(user.c.tenant_id).scalar_subquery() # Define table with all necessary columns including primary key user_tenants = op.create_table( "user_tenants", sa.Column("user_id", sa.UUID, sa.ForeignKey("users.id"), primary_key=True), sa.Column("tenant_id", sa.UUID, sa.ForeignKey("tenants.id"), primary_key=True), - sa.Column("created_at", sa.DateTime(), default=lambda: datetime.now(timezone.utc)), + sa.Column( + "created_at", sa.DateTime(timezone=True), default=lambda: datetime.now(timezone.utc) + ), ) - if op.get_context().dialect.name == "sqlite": - # If column doesn't exist create new original_extension column and update from values of extension column - with op.batch_alter_table("user_tenants") as batch_op: - batch_op.execute( - user_tenants.update().values( - tenant_id=tenant_id_from_user, - user_id=user.c.id, - ) - ) - else: - conn = op.get_bind() - conn.execute( - user_tenants.update().values(tenant_id=tenant_id_from_user, user_id=user.c.id) + + # Get all users with their tenant_id + user_data = conn.execute( + sa.select(user.c.id, user.c.tenant_id).where(user.c.tenant_id.isnot(None)) + ).fetchall() + + # Insert into user_tenants table + if user_data: + op.bulk_insert( + user_tenants, + [ + {"user_id": user_id, "tenant_id": tenant_id, "created_at": _now()} + for user_id, tenant_id in user_data + ], ) tenant_id_column = _get_column(insp, "datasets", "tenant_id") @@ -125,6 +131,6 @@ def upgrade() -> None: def downgrade() -> None: # ### commands auto generated by Alembic - please adjust! ### - + op.drop_table("user_tenants") op.drop_column("datasets", "tenant_id") # ### end Alembic commands ### From 1ef5805c5708ae82eed17335e891eddafd794cf4 Mon Sep 17 00:00:00 2001 From: Igor Ilic Date: Wed, 5 Nov 2025 17:50:13 +0100 Subject: [PATCH 26/41] fix: Resolve issue with sync migration --- .../211ab850ef3d_add_sync_operations_table.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/alembic/versions/211ab850ef3d_add_sync_operations_table.py b/alembic/versions/211ab850ef3d_add_sync_operations_table.py index 976439a32..30049b44b 100644 --- a/alembic/versions/211ab850ef3d_add_sync_operations_table.py +++ b/alembic/versions/211ab850ef3d_add_sync_operations_table.py @@ -27,6 +27,12 @@ def upgrade() -> None: connection = op.get_bind() inspector = sa.inspect(connection) + if op.get_context().dialect.name == "postgresql": + syncstatus_enum = postgresql.ENUM( + "STARTED", "IN_PROGRESS", "COMPLETED", "FAILED", "CANCELLED", name="syncstatus" + ) + syncstatus_enum.create(op.get_bind(), checkfirst=True) + if "sync_operations" not in inspector.get_table_names(): if op.get_context().dialect.name == "postgresql": syncstatus = postgresql.ENUM( @@ -36,8 +42,7 @@ def upgrade() -> None: "FAILED", "CANCELLED", name="syncstatus", - create_type=True, - checkfirst=True, + create_type=False, ) else: syncstatus = sa.Enum( @@ -47,8 +52,7 @@ def upgrade() -> None: "FAILED", "CANCELLED", name="syncstatus", - create_type=True, - checkfirst=True, + create_type=False, ) # Table doesn't exist, create it normally From ce64f242b7a5480bbe9763bbb523fafb7b10e9fb Mon Sep 17 00:00:00 2001 From: Igor Ilic Date: Wed, 5 Nov 2025 18:04:05 +0100 Subject: [PATCH 27/41] refactor: add droping of index as well --- alembic/versions/c946955da633_multi_tenant_support.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/alembic/versions/c946955da633_multi_tenant_support.py b/alembic/versions/c946955da633_multi_tenant_support.py index 6d21f8fc7..ba451fc03 100644 --- a/alembic/versions/c946955da633_multi_tenant_support.py +++ b/alembic/versions/c946955da633_multi_tenant_support.py @@ -126,11 +126,10 @@ def upgrade() -> None: conn = op.get_bind() conn.execute(dataset.update().values(tenant_id=tenant_id_from_dataset_owner)) - op.create_index(op.f("ix_datasets_tenant_id"), "datasets", ["tenant_id"], unique=False) - def downgrade() -> None: # ### commands auto generated by Alembic - please adjust! ### - op.drop_table("user_tenants") + op.drop_index(op.f("ix_datasets_tenant_id"), table_name="datasets") op.drop_column("datasets", "tenant_id") + op.drop_table("user_tenants") # ### end Alembic commands ### From ac6dd08855e30349b0666e6af48da3e829079948 Mon Sep 17 00:00:00 2001 From: Igor Ilic Date: Thu, 6 Nov 2025 14:35:26 +0100 Subject: [PATCH 28/41] fix: Resolve issue with sqlite index creation --- alembic/versions/c946955da633_multi_tenant_support.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/alembic/versions/c946955da633_multi_tenant_support.py b/alembic/versions/c946955da633_multi_tenant_support.py index ba451fc03..c87500907 100644 --- a/alembic/versions/c946955da633_multi_tenant_support.py +++ b/alembic/versions/c946955da633_multi_tenant_support.py @@ -126,6 +126,8 @@ def upgrade() -> None: conn = op.get_bind() conn.execute(dataset.update().values(tenant_id=tenant_id_from_dataset_owner)) + op.create_index(op.f("ix_datasets_tenant_id"), "datasets", ["tenant_id"]) + def downgrade() -> None: # ### commands auto generated by Alembic - please adjust! ### From ac751bacf09e26b851b5829d46330a2f7ee7f25e Mon Sep 17 00:00:00 2001 From: Igor Ilic Date: Thu, 6 Nov 2025 14:51:25 +0100 Subject: [PATCH 29/41] fix: Resolve SQLite migration issue --- .../c946955da633_multi_tenant_support.py | 24 +++++++++++++------ 1 file changed, 17 insertions(+), 7 deletions(-) diff --git a/alembic/versions/c946955da633_multi_tenant_support.py b/alembic/versions/c946955da633_multi_tenant_support.py index c87500907..a87989d9b 100644 --- a/alembic/versions/c946955da633_multi_tenant_support.py +++ b/alembic/versions/c946955da633_multi_tenant_support.py @@ -97,13 +97,23 @@ def upgrade() -> None: # Insert into user_tenants table if user_data: - op.bulk_insert( - user_tenants, - [ - {"user_id": user_id, "tenant_id": tenant_id, "created_at": _now()} - for user_id, tenant_id in user_data - ], - ) + if op.get_context().dialect.name == "sqlite": + insert_stmt = user_tenants.insert().values( + [ + {"user_id": user_id, "tenant_id": tenant_id, "created_at": _now()} + for user_id, tenant_id in user_data + ] + ) + conn.execute(insert_stmt) + conn.commit() + else: + op.bulk_insert( + user_tenants, + [ + {"user_id": user_id, "tenant_id": tenant_id, "created_at": _now()} + for user_id, tenant_id in user_data + ], + ) tenant_id_column = _get_column(insp, "datasets", "tenant_id") if not tenant_id_column: From ef3a3826698d89cfdf5bed62b6ea9f93576122d8 Mon Sep 17 00:00:00 2001 From: Igor Ilic Date: Thu, 6 Nov 2025 16:23:54 +0100 Subject: [PATCH 30/41] refactor: use batch insert for SQLite as well --- .../c946955da633_multi_tenant_support.py | 26 ++++++------------- 1 file changed, 8 insertions(+), 18 deletions(-) diff --git a/alembic/versions/c946955da633_multi_tenant_support.py b/alembic/versions/c946955da633_multi_tenant_support.py index a87989d9b..d8fccdfbf 100644 --- a/alembic/versions/c946955da633_multi_tenant_support.py +++ b/alembic/versions/c946955da633_multi_tenant_support.py @@ -97,23 +97,13 @@ def upgrade() -> None: # Insert into user_tenants table if user_data: - if op.get_context().dialect.name == "sqlite": - insert_stmt = user_tenants.insert().values( - [ - {"user_id": user_id, "tenant_id": tenant_id, "created_at": _now()} - for user_id, tenant_id in user_data - ] - ) - conn.execute(insert_stmt) - conn.commit() - else: - op.bulk_insert( - user_tenants, - [ - {"user_id": user_id, "tenant_id": tenant_id, "created_at": _now()} - for user_id, tenant_id in user_data - ], - ) + op.bulk_insert( + user_tenants, + [ + {"user_id": user_id, "tenant_id": tenant_id, "created_at": _now()} + for user_id, tenant_id in user_data + ], + ) tenant_id_column = _get_column(insp, "datasets", "tenant_id") if not tenant_id_column: @@ -141,7 +131,7 @@ def upgrade() -> None: def downgrade() -> None: # ### commands auto generated by Alembic - please adjust! ### + op.drop_table("user_tenants") op.drop_index(op.f("ix_datasets_tenant_id"), table_name="datasets") op.drop_column("datasets", "tenant_id") - op.drop_table("user_tenants") # ### end Alembic commands ### From c146de3a4d2f5327f4cffd347a8a782e39906da0 Mon Sep 17 00:00:00 2001 From: Igor Ilic Date: Thu, 6 Nov 2025 16:41:00 +0100 Subject: [PATCH 31/41] fix: Remove creation of database and db tables from env.py --- alembic/env.py | 5 ----- alembic/versions/c946955da633_multi_tenant_support.py | 4 ++++ 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/alembic/env.py b/alembic/env.py index 1cbef65f7..8ca09968d 100644 --- a/alembic/env.py +++ b/alembic/env.py @@ -87,11 +87,6 @@ db_engine = get_relational_engine() print("Using database:", db_engine.db_uri) -if "sqlite" in db_engine.db_uri: - from cognee.infrastructure.utils.run_sync import run_sync - - run_sync(db_engine.create_database()) - config.set_section_option( config.config_ini_section, "SQLALCHEMY_DATABASE_URI", diff --git a/alembic/versions/c946955da633_multi_tenant_support.py b/alembic/versions/c946955da633_multi_tenant_support.py index d8fccdfbf..7806fdde8 100644 --- a/alembic/versions/c946955da633_multi_tenant_support.py +++ b/alembic/versions/c946955da633_multi_tenant_support.py @@ -79,6 +79,10 @@ def upgrade() -> None: dataset = _define_dataset_table() user = _define_user_table() + print(insp.get_table_names()) + + print(_get_column(insp, "user_tenants", "tenant_id")) + if "user_tenants" not in insp.get_table_names(): # Define table with all necessary columns including primary key user_tenants = op.create_table( From efb46c99f9d0d5ac426540b95aadd0a1bfd3e5de Mon Sep 17 00:00:00 2001 From: Igor Ilic Date: Thu, 6 Nov 2025 16:47:42 +0100 Subject: [PATCH 32/41] fix: resolve issue with sqlite migration --- alembic/versions/c946955da633_multi_tenant_support.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/alembic/versions/c946955da633_multi_tenant_support.py b/alembic/versions/c946955da633_multi_tenant_support.py index 7806fdde8..d8fccdfbf 100644 --- a/alembic/versions/c946955da633_multi_tenant_support.py +++ b/alembic/versions/c946955da633_multi_tenant_support.py @@ -79,10 +79,6 @@ def upgrade() -> None: dataset = _define_dataset_table() user = _define_user_table() - print(insp.get_table_names()) - - print(_get_column(insp, "user_tenants", "tenant_id")) - if "user_tenants" not in insp.get_table_names(): # Define table with all necessary columns including primary key user_tenants = op.create_table( From 0d68175167af5da9b69e4024f434cbf0bd64b2ae Mon Sep 17 00:00:00 2001 From: Igor Ilic Date: Thu, 6 Nov 2025 16:53:22 +0100 Subject: [PATCH 33/41] fix: remove database creation from migrations --- alembic/versions/8057ae7329c2_initial_migration.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/alembic/versions/8057ae7329c2_initial_migration.py b/alembic/versions/8057ae7329c2_initial_migration.py index aa0ecd4b8..42e9904a8 100644 --- a/alembic/versions/8057ae7329c2_initial_migration.py +++ b/alembic/versions/8057ae7329c2_initial_migration.py @@ -18,11 +18,8 @@ depends_on: Union[str, Sequence[str], None] = None def upgrade() -> None: - db_engine = get_relational_engine() - # we might want to delete this - await_only(db_engine.create_database()) + pass def downgrade() -> None: - db_engine = get_relational_engine() - await_only(db_engine.delete_database()) + pass From bcc59cf9a0b6f5f765269a7ea2725fbd27e971f5 Mon Sep 17 00:00:00 2001 From: Igor Ilic Date: Thu, 6 Nov 2025 16:57:59 +0100 Subject: [PATCH 34/41] fix: Remove default user creation --- alembic/versions/482cd6517ce4_add_default_user.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/alembic/versions/482cd6517ce4_add_default_user.py b/alembic/versions/482cd6517ce4_add_default_user.py index d85f0f146..fafa111f9 100644 --- a/alembic/versions/482cd6517ce4_add_default_user.py +++ b/alembic/versions/482cd6517ce4_add_default_user.py @@ -23,11 +23,8 @@ depends_on: Union[str, Sequence[str], None] = "8057ae7329c2" def upgrade() -> None: - try: - await_only(create_default_user()) - except UserAlreadyExists: - pass # It's fine if the default user already exists + pass # It's fine if the default user already exists def downgrade() -> None: - await_only(delete_user("default_user@example.com")) + pass From 61e1c2903f5f7372e5281a3c7126cc2bcb71bde5 Mon Sep 17 00:00:00 2001 From: Igor Ilic Date: Thu, 6 Nov 2025 17:00:46 +0100 Subject: [PATCH 35/41] fix: Remove issue with default user creation --- alembic/versions/482cd6517ce4_add_default_user.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/alembic/versions/482cd6517ce4_add_default_user.py b/alembic/versions/482cd6517ce4_add_default_user.py index fafa111f9..c8a3dc5d5 100644 --- a/alembic/versions/482cd6517ce4_add_default_user.py +++ b/alembic/versions/482cd6517ce4_add_default_user.py @@ -23,7 +23,7 @@ depends_on: Union[str, Sequence[str], None] = "8057ae7329c2" def upgrade() -> None: - pass # It's fine if the default user already exists + pass def downgrade() -> None: From 7dec6bfdedf30149113a25aa66bf6c21980b605b Mon Sep 17 00:00:00 2001 From: Igor Ilic Date: Thu, 6 Nov 2025 18:10:04 +0100 Subject: [PATCH 36/41] refactor: Add migrations as part of python package --- pyproject.toml | 1 - 1 file changed, 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 5f0aef1d8..8af35113c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -167,7 +167,6 @@ exclude = [ "/dist", "/.data", "/.github", - "/alembic", "/deployment", "/cognee-mcp", "/cognee-frontend", From 96c8bba5807e13cf376802da28817788ae4d6dbd Mon Sep 17 00:00:00 2001 From: Igor Ilic Date: Thu, 6 Nov 2025 19:12:09 +0100 Subject: [PATCH 37/41] refactor: Add db creation as step in MCP creation --- cognee-mcp/src/server.py | 4 ++++ cognee/modules/data/models/Dataset.py | 1 + 2 files changed, 5 insertions(+) diff --git a/cognee-mcp/src/server.py b/cognee-mcp/src/server.py index ce6dad88a..7c708638c 100755 --- a/cognee-mcp/src/server.py +++ b/cognee-mcp/src/server.py @@ -1096,6 +1096,10 @@ async def main(): # Skip migrations when in API mode (the API server handles its own database) if not args.no_migration and not args.api_url: + from cognee.modules.engine.operations.setup import setup + + await setup() + # Run Alembic migrations from the main cognee directory where alembic.ini is located logger.info("Running database migrations...") migration_result = subprocess.run( diff --git a/cognee/modules/data/models/Dataset.py b/cognee/modules/data/models/Dataset.py index 00ed4da96..fba065253 100644 --- a/cognee/modules/data/models/Dataset.py +++ b/cognee/modules/data/models/Dataset.py @@ -37,5 +37,6 @@ class Dataset(Base): "createdAt": self.created_at.isoformat(), "updatedAt": self.updated_at.isoformat() if self.updated_at else None, "ownerId": str(self.owner_id), + "tenantId": str(self.tenant_id), "data": [data.to_json() for data in self.data], } From 59f758d5c227b04f91e8086915fde078be3089db Mon Sep 17 00:00:00 2001 From: Igor Ilic Date: Fri, 7 Nov 2025 15:50:49 +0100 Subject: [PATCH 38/41] feat: Add test for multi tenancy, add ability to share name for dataset across tenants for one user --- .github/workflows/e2e_tests.yml | 29 ++- cognee/modules/data/methods/create_dataset.py | 1 + .../modules/data/methods/get_dataset_ids.py | 6 +- cognee/modules/search/methods/search.py | 2 + cognee/tests/test_multi_tenancy.py | 165 ++++++++++++++++++ 5 files changed, 200 insertions(+), 3 deletions(-) create mode 100644 cognee/tests/test_multi_tenancy.py diff --git a/.github/workflows/e2e_tests.yml b/.github/workflows/e2e_tests.yml index 0596f22d3..715487372 100644 --- a/.github/workflows/e2e_tests.yml +++ b/.github/workflows/e2e_tests.yml @@ -226,7 +226,7 @@ jobs: - name: Dependencies already installed run: echo "Dependencies already installed in setup" - - name: Run parallel databases test + - name: Run permissions test env: ENV: 'dev' LLM_MODEL: ${{ secrets.LLM_MODEL }} @@ -239,6 +239,31 @@ jobs: EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }} run: uv run python ./cognee/tests/test_permissions.py + test-multi-tenancy: + name: Test multi tenancy with different situations in Cognee + runs-on: ubuntu-22.04 + steps: + - name: Check out repository + uses: actions/checkout@v4 + + - name: Cognee Setup + uses: ./.github/actions/cognee_setup + with: + python-version: '3.11.x' + + - name: Run multi tenancy test + env: + ENV: 'dev' + LLM_MODEL: ${{ secrets.LLM_MODEL }} + LLM_ENDPOINT: ${{ secrets.LLM_ENDPOINT }} + LLM_API_KEY: ${{ secrets.LLM_API_KEY }} + LLM_API_VERSION: ${{ secrets.LLM_API_VERSION }} + EMBEDDING_MODEL: ${{ secrets.EMBEDDING_MODEL }} + EMBEDDING_ENDPOINT: ${{ secrets.EMBEDDING_ENDPOINT }} + EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }} + EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }} + run: uv run python ./cognee/tests/test_multi_tenancy.py + test-graph-edges: name: Test graph edge ingestion runs-on: ubuntu-22.04 @@ -487,4 +512,4 @@ jobs: AWS_ENDPOINT_URL: https://s3-eu-west-1.amazonaws.com AWS_ACCESS_KEY_ID: ${{ secrets.AWS_S3_DEV_USER_KEY_ID }} AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_S3_DEV_USER_SECRET_KEY }} - run: uv run python ./cognee/tests/test_load.py \ No newline at end of file + run: uv run python ./cognee/tests/test_load.py diff --git a/cognee/modules/data/methods/create_dataset.py b/cognee/modules/data/methods/create_dataset.py index 280c9e105..7e28a8255 100644 --- a/cognee/modules/data/methods/create_dataset.py +++ b/cognee/modules/data/methods/create_dataset.py @@ -16,6 +16,7 @@ async def create_dataset(dataset_name: str, user: User, session: AsyncSession) - .options(joinedload(Dataset.data)) .filter(Dataset.name == dataset_name) .filter(Dataset.owner_id == owner_id) + .filter(Dataset.tenant_id == user.tenant_id) ) ).first() diff --git a/cognee/modules/data/methods/get_dataset_ids.py b/cognee/modules/data/methods/get_dataset_ids.py index d4402ff36..a61e85310 100644 --- a/cognee/modules/data/methods/get_dataset_ids.py +++ b/cognee/modules/data/methods/get_dataset_ids.py @@ -27,7 +27,11 @@ async def get_dataset_ids(datasets: Union[list[str], list[UUID]], user): # Get all user owned dataset objects (If a user wants to write to a dataset he is not the owner of it must be provided through UUID.) user_datasets = await get_datasets(user.id) # Filter out non name mentioned datasets - dataset_ids = [dataset.id for dataset in user_datasets if dataset.name in datasets] + dataset_ids = [dataset for dataset in user_datasets if dataset.name in datasets] + # Filter out non current tenant datasets + dataset_ids = [ + dataset.id for dataset in dataset_ids if dataset.tenant_id == user.tenant_id + ] else: raise DatasetTypeError( f"One or more of the provided dataset types is not handled: f{datasets}" diff --git a/cognee/modules/search/methods/search.py b/cognee/modules/search/methods/search.py index 5e465b239..b4278424b 100644 --- a/cognee/modules/search/methods/search.py +++ b/cognee/modules/search/methods/search.py @@ -172,6 +172,7 @@ async def search( "search_result": [context] if context else None, "dataset_id": datasets[0].id, "dataset_name": datasets[0].name, + "dataset_tenant_id": datasets[0].tenant_id, "graphs": graphs, } ) @@ -181,6 +182,7 @@ async def search( "search_result": [result] if result else None, "dataset_id": datasets[0].id, "dataset_name": datasets[0].name, + "dataset_tenant_id": datasets[0].tenant_id, "graphs": graphs, } ) diff --git a/cognee/tests/test_multi_tenancy.py b/cognee/tests/test_multi_tenancy.py new file mode 100644 index 000000000..7cdcda8d8 --- /dev/null +++ b/cognee/tests/test_multi_tenancy.py @@ -0,0 +1,165 @@ +import cognee +import pytest + +from cognee.modules.users.exceptions import PermissionDeniedError +from cognee.modules.users.tenants.methods import select_tenant +from cognee.modules.users.methods import get_user +from cognee.shared.logging_utils import get_logger +from cognee.modules.search.types import SearchType +from cognee.modules.users.methods import create_user +from cognee.modules.users.permissions.methods import authorized_give_permission_on_datasets +from cognee.modules.users.roles.methods import add_user_to_role +from cognee.modules.users.roles.methods import create_role +from cognee.modules.users.tenants.methods import create_tenant +from cognee.modules.users.tenants.methods import add_user_to_tenant +from cognee.modules.engine.operations.setup import setup +from cognee.shared.logging_utils import setup_logging, CRITICAL + +logger = get_logger() + + +async def main(): + # Create a clean slate for cognee -- reset data and system state + print("Resetting cognee data...") + await cognee.prune.prune_data() + await cognee.prune.prune_system(metadata=True) + print("Data reset complete.\n") + + # Set up the necessary databases and tables for user management. + await setup() + + # Add document for user_1, add it under dataset name AI + text = """A quantum computer is a computer that takes advantage of quantum mechanical phenomena. + At small scales, physical matter exhibits properties of both particles and waves, and quantum computing leverages + this behavior, specifically quantum superposition and entanglement, using specialized hardware that supports the + preparation and manipulation of quantum state""" + + print("Creating user_1: user_1@example.com") + user_1 = await create_user("user_1@example.com", "example") + await cognee.add([text], dataset_name="AI", user=user_1) + + print("\nCreating user_2: user_2@example.com") + user_2 = await create_user("user_2@example.com", "example") + + # Run cognify for both datasets as the appropriate user/owner + print("\nCreating different datasets for user_1 (AI dataset) and user_2 (QUANTUM dataset)") + ai_cognify_result = await cognee.cognify(["AI"], user=user_1) + + # Extract dataset_ids from cognify results + def extract_dataset_id_from_cognify(cognify_result): + """Extract dataset_id from cognify output dictionary""" + for dataset_id, pipeline_result in cognify_result.items(): + return dataset_id # Return the first dataset_id + return None + + # Get dataset IDs from cognify results + # Note: When we want to work with datasets from other users (search, add, cognify and etc.) we must supply dataset + # information through dataset_id using dataset name only looks for datasets owned by current user + ai_dataset_id = extract_dataset_id_from_cognify(ai_cognify_result) + + # We can see here that user_1 can read his own dataset (AI dataset) + search_results = await cognee.search( + query_type=SearchType.GRAPH_COMPLETION, + query_text="What is in the document?", + user=user_1, + datasets=[ai_dataset_id], + ) + + # Verify that user_2 cannot access user_1's dataset without permission + with pytest.raises(PermissionDeniedError): + search_results = await cognee.search( + query_type=SearchType.GRAPH_COMPLETION, + query_text="What is in the document?", + user=user_2, + datasets=[ai_dataset_id], + ) + + # Create new tenant and role, add user_2 to tenant and role + tenant_id = await create_tenant("CogneeLab", user_1.id) + await select_tenant(user_id=user_1.id, tenant_id=tenant_id) + role_id = await create_role(role_name="Researcher", owner_id=user_1.id) + await add_user_to_tenant( + user_id=user_2.id, tenant_id=tenant_id, owner_id=user_1.id, set_as_active_tenant=True + ) + await add_user_to_role(user_id=user_2.id, role_id=role_id, owner_id=user_1.id) + + # Assert that user_1 cannot give permissions on his dataset to role before switching to the correct tenant + # AI dataset was made with default tenant and not CogneeLab tenant + with pytest.raises(PermissionDeniedError): + await authorized_give_permission_on_datasets( + role_id, + [ai_dataset_id], + "read", + user_1.id, + ) + + # We need to refresh the user object with changes made when switching tenants + user_1 = await get_user(user_1.id) + await cognee.add([text], dataset_name="AI_COGNEE_LAB", user=user_1) + ai_cognee_lab_cognify_result = await cognee.cognify(["AI_COGNEE_LAB"], user=user_1) + + ai_cognee_lab_dataset_id = extract_dataset_id_from_cognify(ai_cognee_lab_cognify_result) + + await authorized_give_permission_on_datasets( + role_id, + [ai_cognee_lab_dataset_id], + "read", + user_1.id, + ) + + search_results = await cognee.search( + query_type=SearchType.GRAPH_COMPLETION, + query_text="What is in the document?", + user=user_2, + dataset_ids=[ai_cognee_lab_dataset_id], + ) + for result in search_results: + print(f"{result}\n") + + # Let's test changing tenants + tenant_id = await create_tenant("CogneeLab2", user_1.id) + await select_tenant(user_id=user_1.id, tenant_id=tenant_id) + + user_1 = await get_user(user_1.id) + await cognee.add([text], dataset_name="AI_COGNEE_LAB", user=user_1) + await cognee.cognify(["AI_COGNEE_LAB"], user=user_1) + + search_results = await cognee.search( + query_type=SearchType.GRAPH_COMPLETION, + query_text="What is in the document?", + user=user_1, + ) + + # Assert only AI_COGNEE_LAB dataset from CogneeLab2 tenant is visible as the currently selected tenant + assert len(search_results) == 1, ( + f"Search results must only contain one dataset from current tenant: {search_results}" + ) + assert search_results[0]["dataset_name"] == "AI_COGNEE_LAB", ( + f"Dict must contain dataset name 'AI_COGNEE_LAB': {search_results[0]}" + ) + assert search_results[0]["dataset_tenant_id"] == user_1.tenant_id, ( + f"Dataset tenant_id must be same as user_1 tenant_id: {search_results[0]}" + ) + + # Switch back to no tenant (default tenant) + await select_tenant(user_id=user_1.id, tenant_id=None) + # Refresh user_1 object + user_1 = await get_user(user_1.id) + search_results = await cognee.search( + query_type=SearchType.GRAPH_COMPLETION, + query_text="What is in the document?", + user=user_1, + ) + assert len(search_results) == 1, ( + f"Search results must only contain one dataset from default tenant: {search_results}" + ) + assert search_results[0]["dataset_name"] == "AI", ( + f"Dict must contain dataset name 'AI': {search_results[0]}" + ) + + +if __name__ == "__main__": + import asyncio + + logger = setup_logging(log_level=CRITICAL) + asyncio.run(main()) From d6e2bd132b85d9e038475ec4adf87140f69e53ce Mon Sep 17 00:00:00 2001 From: Igor Ilic Date: Fri, 7 Nov 2025 16:37:37 +0100 Subject: [PATCH 39/41] refactor: Remove testme comment --- cognee/modules/users/roles/methods/add_user_to_role.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cognee/modules/users/roles/methods/add_user_to_role.py b/cognee/modules/users/roles/methods/add_user_to_role.py index d764ac900..23bb947f0 100644 --- a/cognee/modules/users/roles/methods/add_user_to_role.py +++ b/cognee/modules/users/roles/methods/add_user_to_role.py @@ -48,7 +48,7 @@ async def add_user_to_role(user_id: UUID, role_id: UUID, owner_id: UUID): raise UserNotFoundError elif not role: raise RoleNotFoundError - elif role.tenant_id not in [tenant.id for tenant in user_tenants]: # TESTME + elif role.tenant_id not in [tenant.id for tenant in user_tenants]: raise TenantNotFoundError( message="User tenant does not match role tenant. User cannot be added to role." ) From 3710eec94ff547fb9fb80f3b3b35223098269ffc Mon Sep 17 00:00:00 2001 From: Igor Ilic Date: Mon, 10 Nov 2025 16:23:34 +0100 Subject: [PATCH 40/41] refactor: update docstring message --- cognee/api/v1/permissions/routers/get_permissions_router.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cognee/api/v1/permissions/routers/get_permissions_router.py b/cognee/api/v1/permissions/routers/get_permissions_router.py index 20d35e748..63de97eaa 100644 --- a/cognee/api/v1/permissions/routers/get_permissions_router.py +++ b/cognee/api/v1/permissions/routers/get_permissions_router.py @@ -246,7 +246,7 @@ def get_permissions_router() -> APIRouter: - **tenant_id** (Union[UUID, None]): UUID of the tenant to select, If null/None is provided use the default single user tenant ## Response - Returns a success message indicating the tenant was created. + Returns a success message along with selected tenant id. """ send_telemetry( "Permissions API Endpoint Invoked", From b5f94c889d00e4043f9f7373b449d4dd165e2391 Mon Sep 17 00:00:00 2001 From: Igor Ilic <30923996+dexters1@users.noreply.github.com> Date: Tue, 11 Nov 2025 12:51:09 +0100 Subject: [PATCH 41/41] Update cognee/modules/users/permissions/methods/get_all_user_permission_datasets.py Co-authored-by: Boris --- .../permissions/methods/get_all_user_permission_datasets.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cognee/modules/users/permissions/methods/get_all_user_permission_datasets.py b/cognee/modules/users/permissions/methods/get_all_user_permission_datasets.py index ee1de3c72..5eed992db 100644 --- a/cognee/modules/users/permissions/methods/get_all_user_permission_datasets.py +++ b/cognee/modules/users/permissions/methods/get_all_user_permission_datasets.py @@ -39,7 +39,7 @@ async def get_all_user_permission_datasets(user: User, permission_type: str) -> # If the dataset id key already exists, leave the dictionary unchanged. unique.setdefault(dataset.id, dataset) - # Filter out dataset that aren't part of the current user's tenant + # Filter out dataset that aren't part of the selected user's tenant filtered_datasets = [] for dataset in list(unique.values()): if dataset.tenant_id == user.tenant_id: