From 40846df92db48312484721e163a650f3bb024127 Mon Sep 17 00:00:00 2001 From: rajeevrajeshuni Date: Sun, 30 Nov 2025 07:26:24 +0530 Subject: [PATCH] cleanup of delete dataset methods --- cognee/cli/commands/delete_command.py | 2 +- cognee/cli/tui/delete_screen.py | 2 +- .../sqlalchemy/SqlAlchemyAdapter.py | 53 ++++++++++------- cognee/modules/data/methods/__init__.py | 2 +- .../data/methods/delete_data_by_user.py | 49 +++------------- cognee/modules/data/methods/delete_dataset.py | 2 +- .../data/methods/delete_dataset_by_name.py | 30 ++++++++++ .../data/methods/delete_datasets_by_name.py | 57 ------------------- 8 files changed, 72 insertions(+), 125 deletions(-) create mode 100644 cognee/modules/data/methods/delete_dataset_by_name.py delete mode 100644 cognee/modules/data/methods/delete_datasets_by_name.py diff --git a/cognee/cli/commands/delete_command.py b/cognee/cli/commands/delete_command.py index 9079b4ca3..d50571fca 100644 --- a/cognee/cli/commands/delete_command.py +++ b/cognee/cli/commands/delete_command.py @@ -6,7 +6,7 @@ from cognee.cli import DEFAULT_DOCS_URL import cognee.cli.echo as fmt from cognee.cli.exceptions import CliCommandException, CliCommandInnerException from cognee.modules.data.methods.get_deletion_counts import get_deletion_counts -from cognee.modules.data.methods.delete_datasets_by_name import delete_datasets_by_name +from cognee.modules.data.methods.delete_dataset_by_name import delete_datasets_by_name from cognee.modules.data.methods.delete_data_by_user import delete_data_by_user from cognee.modules.users.methods import get_default_user diff --git a/cognee/cli/tui/delete_screen.py b/cognee/cli/tui/delete_screen.py index bb08a3a5f..80bb9586d 100644 --- a/cognee/cli/tui/delete_screen.py +++ b/cognee/cli/tui/delete_screen.py @@ -6,7 +6,7 @@ from textual.containers import Container, Vertical, Horizontal from textual.binding import Binding from cognee.cli.tui.base_screen import BaseTUIScreen from cognee.modules.data.methods.get_deletion_counts import get_deletion_counts -from cognee.modules.data.methods.delete_datasets_by_name import delete_datasets_by_name +from cognee.modules.data.methods.delete_dataset_by_name import delete_datasets_by_name from cognee.modules.data.methods.delete_data_by_user import delete_data_by_user from cognee.modules.users.methods import get_default_user diff --git a/cognee/infrastructure/databases/relational/sqlalchemy/SqlAlchemyAdapter.py b/cognee/infrastructure/databases/relational/sqlalchemy/SqlAlchemyAdapter.py index 380ce9917..c02c486a4 100644 --- a/cognee/infrastructure/databases/relational/sqlalchemy/SqlAlchemyAdapter.py +++ b/cognee/infrastructure/databases/relational/sqlalchemy/SqlAlchemyAdapter.py @@ -3,8 +3,7 @@ import asyncio from os import path import tempfile from uuid import UUID -from typing import Optional -from typing import AsyncGenerator, List +from typing import Optional, AsyncGenerator, List, Union from contextlib import asynccontextmanager from sqlalchemy.orm import joinedload from sqlalchemy.exc import NoResultFound @@ -235,35 +234,45 @@ class SQLAlchemyAdapter: return [schema[0] for schema in result.fetchall()] return [] - async def delete_entity_by_id( - self, table_name: str, data_id: UUID, schema_name: Optional[str] = "public" + async def delete_entities_by_id( + self, + table_name: str, + data_id: Union[UUID, List[UUID]], # Supports a single UUID or a List of UUIDs + schema_name: Optional[str] = "public" ): """ - Delete an entity from the specified table based on its unique ID. + Delete one or more entities from the specified table based on their ID(s). Parameters: ----------- - - - table_name (str): The name of the table from which to delete the entity. - - data_id (UUID): The unique identifier of the entity to be deleted. - - schema_name (Optional[str]): The name of the schema where the table resides, - defaults to 'public'. (default 'public') + - table_name (str): The name of the table from which to delete the entities. + - data_id (Union[UUID, List[UUID]]): The unique identifier(s) to be deleted. + - schema_name (Optional[str]): The name of the schema where the table resides. """ - if self.engine.dialect.name == "sqlite": - async with self.get_async_session() as session: - TableModel = await self.get_table(table_name, schema_name) - # Foreign key constraints are disabled by default in SQLite (for backwards compatibility), - # so must be enabled for each database connection/session separately. + # Ensure data_ids is a list for the WHERE clause logic + if isinstance(data_id, list): + data_ids_to_delete = data_id + else: + data_ids_to_delete = [data_id] + + if not data_ids_to_delete: + return + + async with self.get_async_session() as session: + TableModel = await self.get_table(table_name, schema_name) + + # Handle SQLite's foreign key requirement + if self.engine.dialect.name == "sqlite": + from sqlalchemy import text await session.execute(text("PRAGMA foreign_keys = ON;")) - await session.execute(TableModel.delete().where(TableModel.c.id == data_id)) - await session.commit() - else: - async with self.get_async_session() as session: - TableModel = await self.get_table(table_name, schema_name) - await session.execute(TableModel.delete().where(TableModel.c.id == data_id)) - await session.commit() + # Construct the DELETE statement using the 'in_()' operator + stmt = TableModel.delete().where(TableModel.c.id.in_(data_ids_to_delete)) + + # Execute and commit + await session.execute(stmt) + await session.commit() async def delete_data_entity(self, data_id: UUID): """ diff --git a/cognee/modules/data/methods/__init__.py b/cognee/modules/data/methods/__init__.py index 34b58590d..17073cd08 100644 --- a/cognee/modules/data/methods/__init__.py +++ b/cognee/modules/data/methods/__init__.py @@ -16,7 +16,7 @@ from .get_dataset_ids import get_dataset_ids # Delete from .delete_dataset import delete_dataset -from .delete_datasets_by_name import delete_datasets_by_name +from .delete_dataset_by_name import delete_datasets_by_name from .delete_data_by_user import delete_data_by_user from .delete_data import delete_data diff --git a/cognee/modules/data/methods/delete_data_by_user.py b/cognee/modules/data/methods/delete_data_by_user.py index 258321b78..0f94df852 100644 --- a/cognee/modules/data/methods/delete_data_by_user.py +++ b/cognee/modules/data/methods/delete_data_by_user.py @@ -1,14 +1,14 @@ from uuid import UUID -from sqlalchemy import select, delete as sql_delete +from sqlalchemy import select from cognee.infrastructure.databases.relational import get_relational_engine -from cognee.modules.data.models import Dataset, DatasetData +from cognee.modules.data.models import Dataset from cognee.modules.users.methods import get_user from cognee.shared.logging_utils import get_logger logger = get_logger() -async def delete_data_by_user(user_id: UUID) -> dict[str, int]: +async def delete_data_by_user(user_id: UUID): """ Delete all datasets and their associated data for a specific user. @@ -18,11 +18,6 @@ async def delete_data_by_user(user_id: UUID) -> dict[str, int]: Args: user_id: UUID of the user whose data should be deleted - Returns: - Dictionary containing deletion statistics: - - datasets_deleted: Number of datasets deleted - - data_entries_deleted: Number of data entries deleted - Raises: ValueError: If user is not found """ @@ -35,37 +30,7 @@ async def delete_data_by_user(user_id: UUID) -> dict[str, int]: raise ValueError(f"User with ID {user_id} not found") # Get all datasets owned by this user - datasets_query = select(Dataset).where(Dataset.owner_id == user_id) - user_datasets = (await session.execute(datasets_query)).scalars().all() - - datasets_deleted = 0 - data_entries_deleted = 0 - - # Delete each dataset and its data - for dataset in user_datasets: - # Get all data entries in this dataset - data_query = select(DatasetData).where(DatasetData.dataset_id == dataset.id) - dataset_data_links = (await session.execute(data_query)).scalars().all() - - # Delete dataset-data links - for link in dataset_data_links: - await session.execute( - sql_delete(DatasetData).where(DatasetData.id == link.id) - ) - data_entries_deleted += 1 - - # Delete the dataset itself - await session.execute( - sql_delete(Dataset).where(Dataset.id == dataset.id) - ) - datasets_deleted += 1 - - # Commit all changes - await session.commit() - - logger.info(f"Deleted {datasets_deleted} datasets and {data_entries_deleted} data entries for user {user_id}") - - return { - "datasets_deleted": datasets_deleted, - "data_entries_deleted": data_entries_deleted, - } \ No newline at end of file + datasets_query = select(Dataset.id).where(Dataset.owner_id == user_id) + user_datasets_ids = (await session.execute(datasets_query)).scalars().all() + if user_datasets_ids: + await db_engine.delete_entities_by_id(Dataset.__table__.name, user_datasets_ids) \ No newline at end of file diff --git a/cognee/modules/data/methods/delete_dataset.py b/cognee/modules/data/methods/delete_dataset.py index ff20ff9e7..7cc6a2fd0 100644 --- a/cognee/modules/data/methods/delete_dataset.py +++ b/cognee/modules/data/methods/delete_dataset.py @@ -5,4 +5,4 @@ from cognee.infrastructure.databases.relational import get_relational_engine async def delete_dataset(dataset: Dataset): db_engine = get_relational_engine() - return await db_engine.delete_entity_by_id(dataset.__tablename__, dataset.id) + return await db_engine.delete_entities_by_id(dataset.__tablename__, dataset.id) diff --git a/cognee/modules/data/methods/delete_dataset_by_name.py b/cognee/modules/data/methods/delete_dataset_by_name.py new file mode 100644 index 000000000..ae8f7704f --- /dev/null +++ b/cognee/modules/data/methods/delete_dataset_by_name.py @@ -0,0 +1,30 @@ +from uuid import UUID +from sqlalchemy import select +from cognee.infrastructure.databases.relational import get_relational_engine +from ..models import Dataset + + +async def delete_dataset_by_name( + dataset_name: str, user_id: UUID +): + """ + Delete a single dataset by name for a specific user. + + Args: + dataset_name: The name of the dataset to delete (must be a single string). + user_id: UUID of the dataset owner. + + """ + db_engine = get_relational_engine() + + async with db_engine.get_async_session() as session: + dataset_id = ( + await session.scalars( + select(Dataset.id) + .filter(Dataset.owner_id == user_id) + .filter(Dataset.name == dataset_name) + ) + ).first() + #Keeping this out of the first session, since delete_entities_by_id creates another session. + if dataset_id: + await db_engine.delete_entities_by_id(Dataset.__table__.name, dataset_id) diff --git a/cognee/modules/data/methods/delete_datasets_by_name.py b/cognee/modules/data/methods/delete_datasets_by_name.py deleted file mode 100644 index abccd8f7c..000000000 --- a/cognee/modules/data/methods/delete_datasets_by_name.py +++ /dev/null @@ -1,57 +0,0 @@ -from typing import Union -from uuid import UUID -from sqlalchemy import select -from cognee.infrastructure.databases.relational import get_relational_engine -from ..models import Dataset - - -async def delete_datasets_by_name( - dataset_names: Union[str, list[str]], user_id: UUID -) -> dict[str, any]: - """ - Delete datasets by name for a specific user. - - Args: - dataset_names: Single dataset name or list of dataset names to delete - user_id: UUID of the dataset owner - - Returns: - Dictionary containing: - - deleted_count: Number of datasets deleted - - deleted_ids: List of deleted dataset IDs - - not_found: List of dataset names that were not found - """ - db_engine = get_relational_engine() - - async with db_engine.get_async_session() as session: - # Normalize input to list - if isinstance(dataset_names, str): - dataset_names = [dataset_names] - - # Retrieve datasets matching the names and user_id - datasets = ( - await session.scalars( - select(Dataset) - .filter(Dataset.owner_id == user_id) - .filter(Dataset.name.in_(dataset_names)) - ) - ).all() - - # Track results - deleted_ids = [] - found_names = set() - - # Delete each dataset - for dataset in datasets: - await db_engine.delete_entity_by_id(dataset.__tablename__, dataset.id) - deleted_ids.append(dataset.id) - found_names.add(dataset.name) - - # Identify datasets that were not found - not_found = [name for name in dataset_names if name not in found_names] - - return { - "deleted_count": len(deleted_ids), - "deleted_ids": deleted_ids, - "not_found": not_found - } \ No newline at end of file