diff --git a/cognee/cli/commands/delete_command.py b/cognee/cli/commands/delete_command.py index 43e8b8e13..46cf41367 100644 --- a/cognee/cli/commands/delete_command.py +++ b/cognee/cli/commands/delete_command.py @@ -6,6 +6,7 @@ from cognee.cli.reference import SupportsCliCommand from cognee.cli import DEFAULT_DOCS_URL import cognee.cli.echo as fmt from cognee.cli.exceptions import CliCommandException, CliCommandInnerException +from cognee.modules.data.methods.get_deletion_counts import get_deletion_counts class DeleteCommand(SupportsCliCommand): @@ -41,7 +42,34 @@ Be careful with deletion operations as they are irreversible. fmt.error("Please specify what to delete: --dataset-name, --user-id, or --all") return - # Build confirmation message + # If --force is used, skip the preview and go straight to deletion + if not args.force: + # --- START PREVIEW LOGIC --- + fmt.echo("Gathering data for preview...") + try: + preview_data = asyncio.run( + get_deletion_counts( + dataset_name=args.dataset_name, + user_id=args.user_id, + all_data=args.all, + ) + ) + except CliCommandException as e: + fmt.error(f"Error occured when fetching preview data: {str(e)}") + return + + if not preview_data: + fmt.success("No data found to delete.") + return + + fmt.echo("You are about to delete:") + fmt.echo( + f"Datasets: {preview_data.datasets}\nEntries: {preview_data.entries}\nUsers: {preview_data.users}" + ) + fmt.echo("-" * 20) + # --- END PREVIEW LOGIC --- + + # Build operation message for success/failure logging if args.all: confirm_msg = "Delete ALL data from cognee?" operation = "all data" @@ -51,8 +79,9 @@ Be careful with deletion operations as they are irreversible. elif args.user_id: confirm_msg = f"Delete all data for user '{args.user_id}'?" operation = f"data for user '{args.user_id}'" + else: + operation = "data" - # Confirm deletion unless forced if not args.force: fmt.warning("This operation is irreversible!") if not fmt.confirm(confirm_msg): @@ -64,6 +93,8 @@ Be careful with deletion operations as they are irreversible. # Run the async delete function async def run_delete(): try: + # NOTE: The underlying cognee.delete() function is currently not working as expected. + # This is a separate bug that this preview feature helps to expose. if args.all: await cognee.delete(dataset_name=None, user_id=args.user_id) else: @@ -72,6 +103,7 @@ Be careful with deletion operations as they are irreversible. raise CliCommandInnerException(f"Failed to delete: {str(e)}") asyncio.run(run_delete()) + # This success message may be inaccurate due to the underlying bug, but we leave it for now. fmt.success(f"Successfully deleted {operation}") except Exception as e: diff --git a/cognee/modules/data/methods/get_deletion_counts.py b/cognee/modules/data/methods/get_deletion_counts.py new file mode 100644 index 000000000..9237a8b85 --- /dev/null +++ b/cognee/modules/data/methods/get_deletion_counts.py @@ -0,0 +1,92 @@ +from uuid import UUID +from cognee.cli.exceptions import CliCommandException +from cognee.infrastructure.databases.exceptions.exceptions import EntityNotFoundError +from sqlalchemy import select +from sqlalchemy.sql import func +from cognee.infrastructure.databases.relational import get_relational_engine +from cognee.modules.data.models import Dataset, Data, DatasetData +from cognee.modules.users.models import User +from cognee.modules.users.methods import get_user +from dataclasses import dataclass + + +@dataclass +class DeletionCountsPreview: + datasets: int = 0 + data_entries: int = 0 + users: int = 0 + + +async def get_deletion_counts( + dataset_name: str = None, user_id: str = None, all_data: bool = False +) -> DeletionCountsPreview: + """ + Calculates the number of items that will be deleted based on the provided arguments. + """ + counts = DeletionCountsPreview() + relational_engine = get_relational_engine() + async with relational_engine.get_async_session() as session: + if dataset_name: + # Find the dataset by name + dataset_result = await session.execute( + select(Dataset).where(Dataset.name == dataset_name) + ) + dataset = dataset_result.scalar_one_or_none() + + if dataset is None: + raise CliCommandException( + f"No Dataset exists with the name {dataset_name}", error_code=1 + ) + + # Count data entries linked to this dataset + count_query = ( + select(func.count()) + .select_from(DatasetData) + .where(DatasetData.dataset_id == dataset.id) + ) + data_entry_count = (await session.execute(count_query)).scalar_one() + counts.users = 1 + counts.datasets = 1 + counts.entries = data_entry_count + return counts + + elif all_data: + # Simplified logic: Get total counts directly from the tables. + counts.datasets = ( + await session.execute(select(func.count()).select_from(Dataset)) + ).scalar_one() + counts.entries = ( + await session.execute(select(func.count()).select_from(Data)) + ).scalar_one() + counts.users = ( + await session.execute(select(func.count()).select_from(User)) + ).scalar_one() + return counts + + # Placeholder for user_id logic + elif user_id: + user = None + try: + user_uuid = UUID(user_id) + user = await get_user(user_uuid) + except (ValueError, EntityNotFoundError): + raise CliCommandException(f"No User exists with ID {user_id}", error_code=1) + counts.users = 1 + # Find all datasets owned by this user + datasets_query = select(Dataset).where(Dataset.owner_id == user.id) + user_datasets = (await session.execute(datasets_query)).scalars().all() + dataset_count = len(user_datasets) + counts.datasets = dataset_count + if dataset_count > 0: + dataset_ids = [d.id for d in user_datasets] + # Count all data entries across all of the user's datasets + data_count_query = ( + select(func.count()) + .select_from(DatasetData) + .where(DatasetData.dataset_id.in_(dataset_ids)) + ) + data_entry_count = (await session.execute(data_count_query)).scalar_one() + counts.entries = data_entry_count + else: + counts.entries = 0 + return counts diff --git a/cognee/tests/cli_tests/cli_unit_tests/test_cli_commands.py b/cognee/tests/cli_tests/cli_unit_tests/test_cli_commands.py index e5d6a0c64..7654a781a 100644 --- a/cognee/tests/cli_tests/cli_unit_tests/test_cli_commands.py +++ b/cognee/tests/cli_tests/cli_unit_tests/test_cli_commands.py @@ -12,7 +12,8 @@ from cognee.cli.commands.search_command import SearchCommand from cognee.cli.commands.cognify_command import CognifyCommand from cognee.cli.commands.delete_command import DeleteCommand from cognee.cli.commands.config_command import ConfigCommand -from cognee.cli.exceptions import CliCommandException, CliCommandInnerException +from cognee.cli.exceptions import CliCommandException +from cognee.modules.data.methods.get_deletion_counts import DeletionCountsPreview # Mock asyncio.run to properly handle coroutines @@ -282,13 +283,18 @@ class TestDeleteCommand: assert "all" in actions assert "force" in actions + @patch("cognee.cli.commands.delete_command.get_deletion_counts") @patch("cognee.cli.commands.delete_command.fmt.confirm") @patch("cognee.cli.commands.delete_command.asyncio.run", side_effect=_mock_run) - def test_execute_delete_dataset_with_confirmation(self, mock_asyncio_run, mock_confirm): + def test_execute_delete_dataset_with_confirmation( + self, mock_asyncio_run, mock_confirm, mock_get_deletion_counts + ): """Test execute delete dataset with user confirmation""" # Mock the cognee module mock_cognee = MagicMock() mock_cognee.delete = AsyncMock() + mock_get_deletion_counts = AsyncMock() + mock_get_deletion_counts.return_value = DeletionCountsPreview() with patch.dict(sys.modules, {"cognee": mock_cognee}): command = DeleteCommand() @@ -301,13 +307,16 @@ class TestDeleteCommand: command.execute(args) mock_confirm.assert_called_once_with(f"Delete dataset '{args.dataset_name}'?") - mock_asyncio_run.assert_called_once() + assert mock_asyncio_run.call_count == 2 assert asyncio.iscoroutine(mock_asyncio_run.call_args[0][0]) mock_cognee.delete.assert_awaited_once_with(dataset_name="test_dataset", user_id=None) + @patch("cognee.cli.commands.delete_command.get_deletion_counts") @patch("cognee.cli.commands.delete_command.fmt.confirm") - def test_execute_delete_cancelled(self, mock_confirm): + def test_execute_delete_cancelled(self, mock_confirm, mock_get_deletion_counts): """Test execute when user cancels deletion""" + mock_get_deletion_counts = AsyncMock() + mock_get_deletion_counts.return_value = DeletionCountsPreview() command = DeleteCommand() args = argparse.Namespace(dataset_name="test_dataset", user_id=None, all=False, force=False) diff --git a/cognee/tests/cli_tests/cli_unit_tests/test_cli_edge_cases.py b/cognee/tests/cli_tests/cli_unit_tests/test_cli_edge_cases.py index d4e4b0ee5..de8681be9 100644 --- a/cognee/tests/cli_tests/cli_unit_tests/test_cli_edge_cases.py +++ b/cognee/tests/cli_tests/cli_unit_tests/test_cli_edge_cases.py @@ -13,6 +13,7 @@ from cognee.cli.commands.cognify_command import CognifyCommand from cognee.cli.commands.delete_command import DeleteCommand from cognee.cli.commands.config_command import ConfigCommand from cognee.cli.exceptions import CliCommandException, CliCommandInnerException +from cognee.modules.data.methods.get_deletion_counts import DeletionCountsPreview # Mock asyncio.run to properly handle coroutines @@ -396,13 +397,17 @@ class TestDeleteCommandEdgeCases: command.execute(args) mock_confirm.assert_called_once_with("Delete ALL data from cognee?") - mock_asyncio_run.assert_called_once() + assert mock_asyncio_run.call_count == 2 assert asyncio.iscoroutine(mock_asyncio_run.call_args[0][0]) mock_cognee.delete.assert_awaited_once_with(dataset_name=None, user_id="test_user") + @patch("cognee.cli.commands.delete_command.get_deletion_counts") @patch("cognee.cli.commands.delete_command.fmt.confirm") - def test_delete_confirmation_keyboard_interrupt(self, mock_confirm): + def test_delete_confirmation_keyboard_interrupt(self, mock_confirm, mock_get_deletion_counts): """Test delete command when user interrupts confirmation""" + mock_get_deletion_counts = AsyncMock() + mock_get_deletion_counts.return_value = DeletionCountsPreview() + command = DeleteCommand() args = argparse.Namespace(dataset_name="test_dataset", user_id=None, all=False, force=False)