WIP - implementing the missing delete data functionality

This commit is contained in:
rajeevrajeshuni 2025-11-29 23:56:11 +05:30
parent 7224074b6c
commit 3a05cca74b
5 changed files with 171 additions and 16 deletions

View file

@ -1,12 +1,14 @@
import argparse
import asyncio
from typing import Optional
from uuid import UUID
from cognee.cli.reference import SupportsCliCommand
from cognee.cli import DEFAULT_DOCS_URL
import cognee.cli.echo as fmt
from cognee.cli.exceptions import CliCommandException, CliCommandInnerException
from cognee.modules.data.methods.get_deletion_counts import get_deletion_counts
from cognee.modules.data.methods.delete_datasets_by_name import delete_datasets_by_name
from cognee.modules.data.methods.delete_data_by_user import delete_data_by_user
from cognee.modules.users.methods import get_default_user
class DeleteCommand(SupportsCliCommand):
@ -93,18 +95,27 @@ Be careful with deletion operations as they are irreversible.
# Run the async delete function
async def run_delete():
try:
# NOTE: The underlying cognee.delete() function is currently not working as expected.
# This is a separate bug that this preview feature helps to expose.
if args.all:
await cognee.delete(dataset_name=None, user_id=args.user_id)
if args.dataset_name:
# Use delete_datasets_by_name for dataset deletion
user = await get_default_user()
result = await delete_datasets_by_name(args.dataset_name, user.id)
if result["not_found"]:
fmt.warning(f"Dataset '{args.dataset_name}' not found")
return False
fmt.success(f"Successfully deleted {result['deleted_count']} dataset(s)")
return True
else:
await cognee.delete(dataset_name=args.dataset_name, user_id=args.user_id)
# For user_id deletion, use the original cognee.delete
result = await delete_data_by_user(UUID(args.user_id))
except Exception as e:
raise CliCommandInnerException(f"Failed to delete: {str(e)}") from e
return True
asyncio.run(run_delete())
# This success message may be inaccurate due to the underlying bug, but we leave it for now.
fmt.success(f"Successfully deleted {operation}")
success = asyncio.run(run_delete())
if success and not args.dataset_name:
fmt.success(f"Successfully deleted {operation}")
except Exception as e:
if isinstance(e, CliCommandInnerException):

View file

@ -1,11 +1,14 @@
import asyncio
import cognee
from uuid import UUID
from textual.app import ComposeResult
from textual.widgets import Input, Button, Static, Label
from textual.containers import Container, Vertical, Horizontal
from textual.binding import Binding
from cognee.cli.tui.base_screen import BaseTUIScreen
from cognee.modules.data.methods.get_deletion_counts import get_deletion_counts
from cognee.modules.data.methods.delete_datasets_by_name import delete_datasets_by_name
from cognee.modules.data.methods.delete_data_by_user import delete_data_by_user
from cognee.modules.users.methods import get_default_user
class DeleteTUIScreen(BaseTUIScreen):
@ -145,10 +148,21 @@ class DeleteTUIScreen(BaseTUIScreen):
status.update(preview_msg)
# Perform deletion
await cognee.delete(dataset_name=dataset_name, user_id=user_id)
operation = f"dataset '{dataset_name}'" if dataset_name else f"data for user '{user_id}'"
status.update(f"✓ Successfully deleted {operation}")
if dataset_name:
# Use delete_datasets_by_name for dataset deletion
user = await get_default_user()
result = await delete_datasets_by_name(dataset_name, user.id)
if result["not_found"]:
status.update(f"⚠️ Dataset '{dataset_name}' not found")
self.is_processing = False
return
status.update(f"✓ Successfully deleted {result['deleted_count']} dataset(s)")
else:
# For user_id deletion, use the new delete_data_by_user method
result = await delete_data_by_user(UUID(user_id))
status.update(f"✓ Successfully deleted {result['datasets_deleted']} datasets and {result['data_entries_deleted']} data entries for user '{user_id}'")
except Exception as e:
status.update(f"✗ Error: {str(e)}")
@ -194,7 +208,7 @@ class DeleteTUIScreen(BaseTUIScreen):
)
status.update(preview_msg)
# Perform deletion
# Perform deletion - delete all uses the original cognee.delete
import cognee
await cognee.delete(dataset_name=None, user_id=None)

View file

@ -16,6 +16,8 @@ from .get_dataset_ids import get_dataset_ids
# Delete
from .delete_dataset import delete_dataset
from .delete_datasets_by_name import delete_datasets_by_name
from .delete_data_by_user import delete_data_by_user
from .delete_data import delete_data
# Create

View file

@ -0,0 +1,71 @@
from uuid import UUID
from sqlalchemy import select, delete as sql_delete
from cognee.infrastructure.databases.relational import get_relational_engine
from cognee.modules.data.models import Dataset, DatasetData
from cognee.modules.users.methods import get_user
from cognee.shared.logging_utils import get_logger
logger = get_logger()
async def delete_data_by_user(user_id: UUID) -> dict[str, int]:
"""
Delete all datasets and their associated data for a specific user.
This function performs a comprehensive deletion of all data owned by a user,
including datasets, data entries, and all related records in the database.
Args:
user_id: UUID of the user whose data should be deleted
Returns:
Dictionary containing deletion statistics:
- datasets_deleted: Number of datasets deleted
- data_entries_deleted: Number of data entries deleted
Raises:
ValueError: If user is not found
"""
db_engine = get_relational_engine()
async with db_engine.get_async_session() as session:
# Verify user exists
user = await get_user(user_id)
if not user:
raise ValueError(f"User with ID {user_id} not found")
# Get all datasets owned by this user
datasets_query = select(Dataset).where(Dataset.owner_id == user_id)
user_datasets = (await session.execute(datasets_query)).scalars().all()
datasets_deleted = 0
data_entries_deleted = 0
# Delete each dataset and its data
for dataset in user_datasets:
# Get all data entries in this dataset
data_query = select(DatasetData).where(DatasetData.dataset_id == dataset.id)
dataset_data_links = (await session.execute(data_query)).scalars().all()
# Delete dataset-data links
for link in dataset_data_links:
await session.execute(
sql_delete(DatasetData).where(DatasetData.id == link.id)
)
data_entries_deleted += 1
# Delete the dataset itself
await session.execute(
sql_delete(Dataset).where(Dataset.id == dataset.id)
)
datasets_deleted += 1
# Commit all changes
await session.commit()
logger.info(f"Deleted {datasets_deleted} datasets and {data_entries_deleted} data entries for user {user_id}")
return {
"datasets_deleted": datasets_deleted,
"data_entries_deleted": data_entries_deleted,
}

View file

@ -0,0 +1,57 @@
from typing import Union
from uuid import UUID
from sqlalchemy import select
from cognee.infrastructure.databases.relational import get_relational_engine
from ..models import Dataset
async def delete_datasets_by_name(
dataset_names: Union[str, list[str]], user_id: UUID
) -> dict[str, any]:
"""
Delete datasets by name for a specific user.
Args:
dataset_names: Single dataset name or list of dataset names to delete
user_id: UUID of the dataset owner
Returns:
Dictionary containing:
- deleted_count: Number of datasets deleted
- deleted_ids: List of deleted dataset IDs
- not_found: List of dataset names that were not found
"""
db_engine = get_relational_engine()
async with db_engine.get_async_session() as session:
# Normalize input to list
if isinstance(dataset_names, str):
dataset_names = [dataset_names]
# Retrieve datasets matching the names and user_id
datasets = (
await session.scalars(
select(Dataset)
.filter(Dataset.owner_id == user_id)
.filter(Dataset.name.in_(dataset_names))
)
).all()
# Track results
deleted_ids = []
found_names = set()
# Delete each dataset
for dataset in datasets:
await db_engine.delete_entity_by_id(dataset.__tablename__, dataset.id)
deleted_ids.append(dataset.id)
found_names.add(dataset.name)
# Identify datasets that were not found
not_found = [name for name in dataset_names if name not in found_names]
return {
"deleted_count": len(deleted_ids),
"deleted_ids": deleted_ids,
"not_found": not_found
}