Feature/delete preview (#1385)

## Description

This pull request introduces a preview step to the `cognee delete`
command, fulfilling the requirements of issue #1366

When a user runs the delete command, it now first queries the database
to calculate the scope of the deletion and presents a summary (number of
datasets, data entries, users) before asking for final confirmation.
This improves the safety and usability of the command, preventing
accidental data loss.

This PR also adds the `--force` flag to bypass the preview, which is
useful for scripting and automation.

## Type of Change

- [x] New feature (non-breaking change that adds functionality)
- [ ] Bug fix (non-breaking change that fixes an issue)

## Changes Made

- **`cognee/cli/commands/delete_command.py`**: Modified to include the
preview logic. It now calls the counting function, displays the results,
and proceeds with deletion only after confirmation.
- **`cognee/modules/data/methods/get_deletion_counts.py`**: Added this
new file to contain the logic for querying the database and calculating
the deletion counts for datasets, data entries, and users.

## Testing

I have tested the changes through **Manual CLI Testing**: I ran the
`cognee delete` command with the `--dataset-name`, `--user-id`, and
`--all` flags to manually verify that the preview output is correct.

### Terminal Output
Here are screenshots of the command working with the all possible flags:
<img width="1898" height="1087" alt="cognee1"
src="https://github.com/user-attachments/assets/939aa4d0-748c-45e4-a2a6-f5e7982c1fc0"
/>
<img width="1788" height="748" alt="cognee2"
src="https://github.com/user-attachments/assets/213884be-cce1-4007-90f9-5e6d3a302ced"
/>

## Pre-submission Checklist

- [x] **I have tested my changes thoroughly before submitting this PR**
- [x] **This PR contains minimal changes necessary to address the
issue/feature**
- [x] My code follows the project's coding standards and style
guidelines
- [x] I have added tests that prove my feature works
- [ ] I have not added or changed documentation (as it was not required
for this CLI change)
- [x] I have searched existing PRs to ensure this change has been
submitted already
- [x] I have linked the relevant issue in the description

## Related Issues

Fixes #1366 

## DCO Affirmation

I affirm that all code in every commit of this pull request conforms to
the terms of the Topoteretes Developer Certificate of Origin.
This commit is contained in:
Daulet Amirkhanov 2025-10-02 15:21:57 +01:00 committed by GitHub
commit 2efce6949b
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 146 additions and 8 deletions

View file

@ -6,6 +6,7 @@ from cognee.cli.reference import SupportsCliCommand
from cognee.cli import DEFAULT_DOCS_URL
import cognee.cli.echo as fmt
from cognee.cli.exceptions import CliCommandException, CliCommandInnerException
from cognee.modules.data.methods.get_deletion_counts import get_deletion_counts
class DeleteCommand(SupportsCliCommand):
@ -41,7 +42,34 @@ Be careful with deletion operations as they are irreversible.
fmt.error("Please specify what to delete: --dataset-name, --user-id, or --all")
return
# Build confirmation message
# If --force is used, skip the preview and go straight to deletion
if not args.force:
# --- START PREVIEW LOGIC ---
fmt.echo("Gathering data for preview...")
try:
preview_data = asyncio.run(
get_deletion_counts(
dataset_name=args.dataset_name,
user_id=args.user_id,
all_data=args.all,
)
)
except CliCommandException as e:
fmt.error(f"Error occured when fetching preview data: {str(e)}")
return
if not preview_data:
fmt.success("No data found to delete.")
return
fmt.echo("You are about to delete:")
fmt.echo(
f"Datasets: {preview_data.datasets}\nEntries: {preview_data.entries}\nUsers: {preview_data.users}"
)
fmt.echo("-" * 20)
# --- END PREVIEW LOGIC ---
# Build operation message for success/failure logging
if args.all:
confirm_msg = "Delete ALL data from cognee?"
operation = "all data"
@ -51,8 +79,9 @@ Be careful with deletion operations as they are irreversible.
elif args.user_id:
confirm_msg = f"Delete all data for user '{args.user_id}'?"
operation = f"data for user '{args.user_id}'"
else:
operation = "data"
# Confirm deletion unless forced
if not args.force:
fmt.warning("This operation is irreversible!")
if not fmt.confirm(confirm_msg):
@ -64,6 +93,8 @@ Be careful with deletion operations as they are irreversible.
# Run the async delete function
async def run_delete():
try:
# NOTE: The underlying cognee.delete() function is currently not working as expected.
# This is a separate bug that this preview feature helps to expose.
if args.all:
await cognee.delete(dataset_name=None, user_id=args.user_id)
else:
@ -72,6 +103,7 @@ Be careful with deletion operations as they are irreversible.
raise CliCommandInnerException(f"Failed to delete: {str(e)}")
asyncio.run(run_delete())
# This success message may be inaccurate due to the underlying bug, but we leave it for now.
fmt.success(f"Successfully deleted {operation}")
except Exception as e:

View file

@ -0,0 +1,92 @@
from uuid import UUID
from cognee.cli.exceptions import CliCommandException
from cognee.infrastructure.databases.exceptions.exceptions import EntityNotFoundError
from sqlalchemy import select
from sqlalchemy.sql import func
from cognee.infrastructure.databases.relational import get_relational_engine
from cognee.modules.data.models import Dataset, Data, DatasetData
from cognee.modules.users.models import User
from cognee.modules.users.methods import get_user
from dataclasses import dataclass
@dataclass
class DeletionCountsPreview:
datasets: int = 0
data_entries: int = 0
users: int = 0
async def get_deletion_counts(
dataset_name: str = None, user_id: str = None, all_data: bool = False
) -> DeletionCountsPreview:
"""
Calculates the number of items that will be deleted based on the provided arguments.
"""
counts = DeletionCountsPreview()
relational_engine = get_relational_engine()
async with relational_engine.get_async_session() as session:
if dataset_name:
# Find the dataset by name
dataset_result = await session.execute(
select(Dataset).where(Dataset.name == dataset_name)
)
dataset = dataset_result.scalar_one_or_none()
if dataset is None:
raise CliCommandException(
f"No Dataset exists with the name {dataset_name}", error_code=1
)
# Count data entries linked to this dataset
count_query = (
select(func.count())
.select_from(DatasetData)
.where(DatasetData.dataset_id == dataset.id)
)
data_entry_count = (await session.execute(count_query)).scalar_one()
counts.users = 1
counts.datasets = 1
counts.entries = data_entry_count
return counts
elif all_data:
# Simplified logic: Get total counts directly from the tables.
counts.datasets = (
await session.execute(select(func.count()).select_from(Dataset))
).scalar_one()
counts.entries = (
await session.execute(select(func.count()).select_from(Data))
).scalar_one()
counts.users = (
await session.execute(select(func.count()).select_from(User))
).scalar_one()
return counts
# Placeholder for user_id logic
elif user_id:
user = None
try:
user_uuid = UUID(user_id)
user = await get_user(user_uuid)
except (ValueError, EntityNotFoundError):
raise CliCommandException(f"No User exists with ID {user_id}", error_code=1)
counts.users = 1
# Find all datasets owned by this user
datasets_query = select(Dataset).where(Dataset.owner_id == user.id)
user_datasets = (await session.execute(datasets_query)).scalars().all()
dataset_count = len(user_datasets)
counts.datasets = dataset_count
if dataset_count > 0:
dataset_ids = [d.id for d in user_datasets]
# Count all data entries across all of the user's datasets
data_count_query = (
select(func.count())
.select_from(DatasetData)
.where(DatasetData.dataset_id.in_(dataset_ids))
)
data_entry_count = (await session.execute(data_count_query)).scalar_one()
counts.entries = data_entry_count
else:
counts.entries = 0
return counts

View file

@ -12,7 +12,8 @@ from cognee.cli.commands.search_command import SearchCommand
from cognee.cli.commands.cognify_command import CognifyCommand
from cognee.cli.commands.delete_command import DeleteCommand
from cognee.cli.commands.config_command import ConfigCommand
from cognee.cli.exceptions import CliCommandException, CliCommandInnerException
from cognee.cli.exceptions import CliCommandException
from cognee.modules.data.methods.get_deletion_counts import DeletionCountsPreview
# Mock asyncio.run to properly handle coroutines
@ -282,13 +283,18 @@ class TestDeleteCommand:
assert "all" in actions
assert "force" in actions
@patch("cognee.cli.commands.delete_command.get_deletion_counts")
@patch("cognee.cli.commands.delete_command.fmt.confirm")
@patch("cognee.cli.commands.delete_command.asyncio.run", side_effect=_mock_run)
def test_execute_delete_dataset_with_confirmation(self, mock_asyncio_run, mock_confirm):
def test_execute_delete_dataset_with_confirmation(
self, mock_asyncio_run, mock_confirm, mock_get_deletion_counts
):
"""Test execute delete dataset with user confirmation"""
# Mock the cognee module
mock_cognee = MagicMock()
mock_cognee.delete = AsyncMock()
mock_get_deletion_counts = AsyncMock()
mock_get_deletion_counts.return_value = DeletionCountsPreview()
with patch.dict(sys.modules, {"cognee": mock_cognee}):
command = DeleteCommand()
@ -301,13 +307,16 @@ class TestDeleteCommand:
command.execute(args)
mock_confirm.assert_called_once_with(f"Delete dataset '{args.dataset_name}'?")
mock_asyncio_run.assert_called_once()
assert mock_asyncio_run.call_count == 2
assert asyncio.iscoroutine(mock_asyncio_run.call_args[0][0])
mock_cognee.delete.assert_awaited_once_with(dataset_name="test_dataset", user_id=None)
@patch("cognee.cli.commands.delete_command.get_deletion_counts")
@patch("cognee.cli.commands.delete_command.fmt.confirm")
def test_execute_delete_cancelled(self, mock_confirm):
def test_execute_delete_cancelled(self, mock_confirm, mock_get_deletion_counts):
"""Test execute when user cancels deletion"""
mock_get_deletion_counts = AsyncMock()
mock_get_deletion_counts.return_value = DeletionCountsPreview()
command = DeleteCommand()
args = argparse.Namespace(dataset_name="test_dataset", user_id=None, all=False, force=False)

View file

@ -13,6 +13,7 @@ from cognee.cli.commands.cognify_command import CognifyCommand
from cognee.cli.commands.delete_command import DeleteCommand
from cognee.cli.commands.config_command import ConfigCommand
from cognee.cli.exceptions import CliCommandException, CliCommandInnerException
from cognee.modules.data.methods.get_deletion_counts import DeletionCountsPreview
# Mock asyncio.run to properly handle coroutines
@ -396,13 +397,17 @@ class TestDeleteCommandEdgeCases:
command.execute(args)
mock_confirm.assert_called_once_with("Delete ALL data from cognee?")
mock_asyncio_run.assert_called_once()
assert mock_asyncio_run.call_count == 2
assert asyncio.iscoroutine(mock_asyncio_run.call_args[0][0])
mock_cognee.delete.assert_awaited_once_with(dataset_name=None, user_id="test_user")
@patch("cognee.cli.commands.delete_command.get_deletion_counts")
@patch("cognee.cli.commands.delete_command.fmt.confirm")
def test_delete_confirmation_keyboard_interrupt(self, mock_confirm):
def test_delete_confirmation_keyboard_interrupt(self, mock_confirm, mock_get_deletion_counts):
"""Test delete command when user interrupts confirmation"""
mock_get_deletion_counts = AsyncMock()
mock_get_deletion_counts.return_value = DeletionCountsPreview()
command = DeleteCommand()
args = argparse.Namespace(dataset_name="test_dataset", user_id=None, all=False, force=False)