From 3a05cca74bde5db2fa02e66858e1c2f2d88b5aa9 Mon Sep 17 00:00:00 2001 From: rajeevrajeshuni Date: Sat, 29 Nov 2025 23:56:11 +0530 Subject: [PATCH] WIP - implementing the missing delete data functionality --- cognee/cli/commands/delete_command.py | 31 +++++--- cognee/cli/tui/delete_screen.py | 26 +++++-- cognee/modules/data/methods/__init__.py | 2 + .../data/methods/delete_data_by_user.py | 71 +++++++++++++++++++ .../data/methods/delete_datasets_by_name.py | 57 +++++++++++++++ 5 files changed, 171 insertions(+), 16 deletions(-) create mode 100644 cognee/modules/data/methods/delete_data_by_user.py create mode 100644 cognee/modules/data/methods/delete_datasets_by_name.py diff --git a/cognee/cli/commands/delete_command.py b/cognee/cli/commands/delete_command.py index 8400d3b0f..9079b4ca3 100644 --- a/cognee/cli/commands/delete_command.py +++ b/cognee/cli/commands/delete_command.py @@ -1,12 +1,14 @@ import argparse import asyncio -from typing import Optional - +from uuid import UUID from cognee.cli.reference import SupportsCliCommand from cognee.cli import DEFAULT_DOCS_URL import cognee.cli.echo as fmt from cognee.cli.exceptions import CliCommandException, CliCommandInnerException from cognee.modules.data.methods.get_deletion_counts import get_deletion_counts +from cognee.modules.data.methods.delete_datasets_by_name import delete_datasets_by_name +from cognee.modules.data.methods.delete_data_by_user import delete_data_by_user +from cognee.modules.users.methods import get_default_user class DeleteCommand(SupportsCliCommand): @@ -93,18 +95,27 @@ Be careful with deletion operations as they are irreversible. # Run the async delete function async def run_delete(): try: - # NOTE: The underlying cognee.delete() function is currently not working as expected. - # This is a separate bug that this preview feature helps to expose. - if args.all: - await cognee.delete(dataset_name=None, user_id=args.user_id) + if args.dataset_name: + # Use delete_datasets_by_name for dataset deletion + user = await get_default_user() + result = await delete_datasets_by_name(args.dataset_name, user.id) + + if result["not_found"]: + fmt.warning(f"Dataset '{args.dataset_name}' not found") + return False + + fmt.success(f"Successfully deleted {result['deleted_count']} dataset(s)") + return True else: - await cognee.delete(dataset_name=args.dataset_name, user_id=args.user_id) + # For user_id deletion, use the original cognee.delete + result = await delete_data_by_user(UUID(args.user_id)) except Exception as e: raise CliCommandInnerException(f"Failed to delete: {str(e)}") from e + return True - asyncio.run(run_delete()) - # This success message may be inaccurate due to the underlying bug, but we leave it for now. - fmt.success(f"Successfully deleted {operation}") + success = asyncio.run(run_delete()) + if success and not args.dataset_name: + fmt.success(f"Successfully deleted {operation}") except Exception as e: if isinstance(e, CliCommandInnerException): diff --git a/cognee/cli/tui/delete_screen.py b/cognee/cli/tui/delete_screen.py index 2bb1155b1..bb08a3a5f 100644 --- a/cognee/cli/tui/delete_screen.py +++ b/cognee/cli/tui/delete_screen.py @@ -1,11 +1,14 @@ import asyncio -import cognee +from uuid import UUID from textual.app import ComposeResult from textual.widgets import Input, Button, Static, Label from textual.containers import Container, Vertical, Horizontal from textual.binding import Binding from cognee.cli.tui.base_screen import BaseTUIScreen from cognee.modules.data.methods.get_deletion_counts import get_deletion_counts +from cognee.modules.data.methods.delete_datasets_by_name import delete_datasets_by_name +from cognee.modules.data.methods.delete_data_by_user import delete_data_by_user +from cognee.modules.users.methods import get_default_user class DeleteTUIScreen(BaseTUIScreen): @@ -145,10 +148,21 @@ class DeleteTUIScreen(BaseTUIScreen): status.update(preview_msg) # Perform deletion - await cognee.delete(dataset_name=dataset_name, user_id=user_id) - - operation = f"dataset '{dataset_name}'" if dataset_name else f"data for user '{user_id}'" - status.update(f"✓ Successfully deleted {operation}") + if dataset_name: + # Use delete_datasets_by_name for dataset deletion + user = await get_default_user() + result = await delete_datasets_by_name(dataset_name, user.id) + + if result["not_found"]: + status.update(f"⚠️ Dataset '{dataset_name}' not found") + self.is_processing = False + return + + status.update(f"✓ Successfully deleted {result['deleted_count']} dataset(s)") + else: + # For user_id deletion, use the new delete_data_by_user method + result = await delete_data_by_user(UUID(user_id)) + status.update(f"✓ Successfully deleted {result['datasets_deleted']} datasets and {result['data_entries_deleted']} data entries for user '{user_id}'") except Exception as e: status.update(f"✗ Error: {str(e)}") @@ -194,7 +208,7 @@ class DeleteTUIScreen(BaseTUIScreen): ) status.update(preview_msg) - # Perform deletion + # Perform deletion - delete all uses the original cognee.delete import cognee await cognee.delete(dataset_name=None, user_id=None) diff --git a/cognee/modules/data/methods/__init__.py b/cognee/modules/data/methods/__init__.py index 7936a9afd..34b58590d 100644 --- a/cognee/modules/data/methods/__init__.py +++ b/cognee/modules/data/methods/__init__.py @@ -16,6 +16,8 @@ from .get_dataset_ids import get_dataset_ids # Delete from .delete_dataset import delete_dataset +from .delete_datasets_by_name import delete_datasets_by_name +from .delete_data_by_user import delete_data_by_user from .delete_data import delete_data # Create diff --git a/cognee/modules/data/methods/delete_data_by_user.py b/cognee/modules/data/methods/delete_data_by_user.py new file mode 100644 index 000000000..258321b78 --- /dev/null +++ b/cognee/modules/data/methods/delete_data_by_user.py @@ -0,0 +1,71 @@ +from uuid import UUID +from sqlalchemy import select, delete as sql_delete +from cognee.infrastructure.databases.relational import get_relational_engine +from cognee.modules.data.models import Dataset, DatasetData +from cognee.modules.users.methods import get_user +from cognee.shared.logging_utils import get_logger + +logger = get_logger() + + +async def delete_data_by_user(user_id: UUID) -> dict[str, int]: + """ + Delete all datasets and their associated data for a specific user. + + This function performs a comprehensive deletion of all data owned by a user, + including datasets, data entries, and all related records in the database. + + Args: + user_id: UUID of the user whose data should be deleted + + Returns: + Dictionary containing deletion statistics: + - datasets_deleted: Number of datasets deleted + - data_entries_deleted: Number of data entries deleted + + Raises: + ValueError: If user is not found + """ + db_engine = get_relational_engine() + + async with db_engine.get_async_session() as session: + # Verify user exists + user = await get_user(user_id) + if not user: + raise ValueError(f"User with ID {user_id} not found") + + # Get all datasets owned by this user + datasets_query = select(Dataset).where(Dataset.owner_id == user_id) + user_datasets = (await session.execute(datasets_query)).scalars().all() + + datasets_deleted = 0 + data_entries_deleted = 0 + + # Delete each dataset and its data + for dataset in user_datasets: + # Get all data entries in this dataset + data_query = select(DatasetData).where(DatasetData.dataset_id == dataset.id) + dataset_data_links = (await session.execute(data_query)).scalars().all() + + # Delete dataset-data links + for link in dataset_data_links: + await session.execute( + sql_delete(DatasetData).where(DatasetData.id == link.id) + ) + data_entries_deleted += 1 + + # Delete the dataset itself + await session.execute( + sql_delete(Dataset).where(Dataset.id == dataset.id) + ) + datasets_deleted += 1 + + # Commit all changes + await session.commit() + + logger.info(f"Deleted {datasets_deleted} datasets and {data_entries_deleted} data entries for user {user_id}") + + return { + "datasets_deleted": datasets_deleted, + "data_entries_deleted": data_entries_deleted, + } \ No newline at end of file diff --git a/cognee/modules/data/methods/delete_datasets_by_name.py b/cognee/modules/data/methods/delete_datasets_by_name.py new file mode 100644 index 000000000..abccd8f7c --- /dev/null +++ b/cognee/modules/data/methods/delete_datasets_by_name.py @@ -0,0 +1,57 @@ +from typing import Union +from uuid import UUID +from sqlalchemy import select +from cognee.infrastructure.databases.relational import get_relational_engine +from ..models import Dataset + + +async def delete_datasets_by_name( + dataset_names: Union[str, list[str]], user_id: UUID +) -> dict[str, any]: + """ + Delete datasets by name for a specific user. + + Args: + dataset_names: Single dataset name or list of dataset names to delete + user_id: UUID of the dataset owner + + Returns: + Dictionary containing: + - deleted_count: Number of datasets deleted + - deleted_ids: List of deleted dataset IDs + - not_found: List of dataset names that were not found + """ + db_engine = get_relational_engine() + + async with db_engine.get_async_session() as session: + # Normalize input to list + if isinstance(dataset_names, str): + dataset_names = [dataset_names] + + # Retrieve datasets matching the names and user_id + datasets = ( + await session.scalars( + select(Dataset) + .filter(Dataset.owner_id == user_id) + .filter(Dataset.name.in_(dataset_names)) + ) + ).all() + + # Track results + deleted_ids = [] + found_names = set() + + # Delete each dataset + for dataset in datasets: + await db_engine.delete_entity_by_id(dataset.__tablename__, dataset.id) + deleted_ids.append(dataset.id) + found_names.add(dataset.name) + + # Identify datasets that were not found + not_found = [name for name in dataset_names if name not in found_names] + + return { + "deleted_count": len(deleted_ids), + "deleted_ids": deleted_ids, + "not_found": not_found + } \ No newline at end of file