WIP - implementing the missing delete data functionality
This commit is contained in:
parent
7224074b6c
commit
3a05cca74b
5 changed files with 171 additions and 16 deletions
|
|
@ -1,12 +1,14 @@
|
|||
import argparse
|
||||
import asyncio
|
||||
from typing import Optional
|
||||
|
||||
from uuid import UUID
|
||||
from cognee.cli.reference import SupportsCliCommand
|
||||
from cognee.cli import DEFAULT_DOCS_URL
|
||||
import cognee.cli.echo as fmt
|
||||
from cognee.cli.exceptions import CliCommandException, CliCommandInnerException
|
||||
from cognee.modules.data.methods.get_deletion_counts import get_deletion_counts
|
||||
from cognee.modules.data.methods.delete_datasets_by_name import delete_datasets_by_name
|
||||
from cognee.modules.data.methods.delete_data_by_user import delete_data_by_user
|
||||
from cognee.modules.users.methods import get_default_user
|
||||
|
||||
|
||||
class DeleteCommand(SupportsCliCommand):
|
||||
|
|
@ -93,18 +95,27 @@ Be careful with deletion operations as they are irreversible.
|
|||
# Run the async delete function
|
||||
async def run_delete():
|
||||
try:
|
||||
# NOTE: The underlying cognee.delete() function is currently not working as expected.
|
||||
# This is a separate bug that this preview feature helps to expose.
|
||||
if args.all:
|
||||
await cognee.delete(dataset_name=None, user_id=args.user_id)
|
||||
if args.dataset_name:
|
||||
# Use delete_datasets_by_name for dataset deletion
|
||||
user = await get_default_user()
|
||||
result = await delete_datasets_by_name(args.dataset_name, user.id)
|
||||
|
||||
if result["not_found"]:
|
||||
fmt.warning(f"Dataset '{args.dataset_name}' not found")
|
||||
return False
|
||||
|
||||
fmt.success(f"Successfully deleted {result['deleted_count']} dataset(s)")
|
||||
return True
|
||||
else:
|
||||
await cognee.delete(dataset_name=args.dataset_name, user_id=args.user_id)
|
||||
# For user_id deletion, use the original cognee.delete
|
||||
result = await delete_data_by_user(UUID(args.user_id))
|
||||
except Exception as e:
|
||||
raise CliCommandInnerException(f"Failed to delete: {str(e)}") from e
|
||||
return True
|
||||
|
||||
asyncio.run(run_delete())
|
||||
# This success message may be inaccurate due to the underlying bug, but we leave it for now.
|
||||
fmt.success(f"Successfully deleted {operation}")
|
||||
success = asyncio.run(run_delete())
|
||||
if success and not args.dataset_name:
|
||||
fmt.success(f"Successfully deleted {operation}")
|
||||
|
||||
except Exception as e:
|
||||
if isinstance(e, CliCommandInnerException):
|
||||
|
|
|
|||
|
|
@ -1,11 +1,14 @@
|
|||
import asyncio
|
||||
import cognee
|
||||
from uuid import UUID
|
||||
from textual.app import ComposeResult
|
||||
from textual.widgets import Input, Button, Static, Label
|
||||
from textual.containers import Container, Vertical, Horizontal
|
||||
from textual.binding import Binding
|
||||
from cognee.cli.tui.base_screen import BaseTUIScreen
|
||||
from cognee.modules.data.methods.get_deletion_counts import get_deletion_counts
|
||||
from cognee.modules.data.methods.delete_datasets_by_name import delete_datasets_by_name
|
||||
from cognee.modules.data.methods.delete_data_by_user import delete_data_by_user
|
||||
from cognee.modules.users.methods import get_default_user
|
||||
|
||||
|
||||
class DeleteTUIScreen(BaseTUIScreen):
|
||||
|
|
@ -145,10 +148,21 @@ class DeleteTUIScreen(BaseTUIScreen):
|
|||
status.update(preview_msg)
|
||||
|
||||
# Perform deletion
|
||||
await cognee.delete(dataset_name=dataset_name, user_id=user_id)
|
||||
|
||||
operation = f"dataset '{dataset_name}'" if dataset_name else f"data for user '{user_id}'"
|
||||
status.update(f"✓ Successfully deleted {operation}")
|
||||
if dataset_name:
|
||||
# Use delete_datasets_by_name for dataset deletion
|
||||
user = await get_default_user()
|
||||
result = await delete_datasets_by_name(dataset_name, user.id)
|
||||
|
||||
if result["not_found"]:
|
||||
status.update(f"⚠️ Dataset '{dataset_name}' not found")
|
||||
self.is_processing = False
|
||||
return
|
||||
|
||||
status.update(f"✓ Successfully deleted {result['deleted_count']} dataset(s)")
|
||||
else:
|
||||
# For user_id deletion, use the new delete_data_by_user method
|
||||
result = await delete_data_by_user(UUID(user_id))
|
||||
status.update(f"✓ Successfully deleted {result['datasets_deleted']} datasets and {result['data_entries_deleted']} data entries for user '{user_id}'")
|
||||
|
||||
except Exception as e:
|
||||
status.update(f"✗ Error: {str(e)}")
|
||||
|
|
@ -194,7 +208,7 @@ class DeleteTUIScreen(BaseTUIScreen):
|
|||
)
|
||||
status.update(preview_msg)
|
||||
|
||||
# Perform deletion
|
||||
# Perform deletion - delete all uses the original cognee.delete
|
||||
import cognee
|
||||
await cognee.delete(dataset_name=None, user_id=None)
|
||||
|
||||
|
|
|
|||
|
|
@ -16,6 +16,8 @@ from .get_dataset_ids import get_dataset_ids
|
|||
|
||||
# Delete
|
||||
from .delete_dataset import delete_dataset
|
||||
from .delete_datasets_by_name import delete_datasets_by_name
|
||||
from .delete_data_by_user import delete_data_by_user
|
||||
from .delete_data import delete_data
|
||||
|
||||
# Create
|
||||
|
|
|
|||
71
cognee/modules/data/methods/delete_data_by_user.py
Normal file
71
cognee/modules/data/methods/delete_data_by_user.py
Normal file
|
|
@ -0,0 +1,71 @@
|
|||
from uuid import UUID
|
||||
from sqlalchemy import select, delete as sql_delete
|
||||
from cognee.infrastructure.databases.relational import get_relational_engine
|
||||
from cognee.modules.data.models import Dataset, DatasetData
|
||||
from cognee.modules.users.methods import get_user
|
||||
from cognee.shared.logging_utils import get_logger
|
||||
|
||||
logger = get_logger()
|
||||
|
||||
|
||||
async def delete_data_by_user(user_id: UUID) -> dict[str, int]:
|
||||
"""
|
||||
Delete all datasets and their associated data for a specific user.
|
||||
|
||||
This function performs a comprehensive deletion of all data owned by a user,
|
||||
including datasets, data entries, and all related records in the database.
|
||||
|
||||
Args:
|
||||
user_id: UUID of the user whose data should be deleted
|
||||
|
||||
Returns:
|
||||
Dictionary containing deletion statistics:
|
||||
- datasets_deleted: Number of datasets deleted
|
||||
- data_entries_deleted: Number of data entries deleted
|
||||
|
||||
Raises:
|
||||
ValueError: If user is not found
|
||||
"""
|
||||
db_engine = get_relational_engine()
|
||||
|
||||
async with db_engine.get_async_session() as session:
|
||||
# Verify user exists
|
||||
user = await get_user(user_id)
|
||||
if not user:
|
||||
raise ValueError(f"User with ID {user_id} not found")
|
||||
|
||||
# Get all datasets owned by this user
|
||||
datasets_query = select(Dataset).where(Dataset.owner_id == user_id)
|
||||
user_datasets = (await session.execute(datasets_query)).scalars().all()
|
||||
|
||||
datasets_deleted = 0
|
||||
data_entries_deleted = 0
|
||||
|
||||
# Delete each dataset and its data
|
||||
for dataset in user_datasets:
|
||||
# Get all data entries in this dataset
|
||||
data_query = select(DatasetData).where(DatasetData.dataset_id == dataset.id)
|
||||
dataset_data_links = (await session.execute(data_query)).scalars().all()
|
||||
|
||||
# Delete dataset-data links
|
||||
for link in dataset_data_links:
|
||||
await session.execute(
|
||||
sql_delete(DatasetData).where(DatasetData.id == link.id)
|
||||
)
|
||||
data_entries_deleted += 1
|
||||
|
||||
# Delete the dataset itself
|
||||
await session.execute(
|
||||
sql_delete(Dataset).where(Dataset.id == dataset.id)
|
||||
)
|
||||
datasets_deleted += 1
|
||||
|
||||
# Commit all changes
|
||||
await session.commit()
|
||||
|
||||
logger.info(f"Deleted {datasets_deleted} datasets and {data_entries_deleted} data entries for user {user_id}")
|
||||
|
||||
return {
|
||||
"datasets_deleted": datasets_deleted,
|
||||
"data_entries_deleted": data_entries_deleted,
|
||||
}
|
||||
57
cognee/modules/data/methods/delete_datasets_by_name.py
Normal file
57
cognee/modules/data/methods/delete_datasets_by_name.py
Normal file
|
|
@ -0,0 +1,57 @@
|
|||
from typing import Union
|
||||
from uuid import UUID
|
||||
from sqlalchemy import select
|
||||
from cognee.infrastructure.databases.relational import get_relational_engine
|
||||
from ..models import Dataset
|
||||
|
||||
|
||||
async def delete_datasets_by_name(
|
||||
dataset_names: Union[str, list[str]], user_id: UUID
|
||||
) -> dict[str, any]:
|
||||
"""
|
||||
Delete datasets by name for a specific user.
|
||||
|
||||
Args:
|
||||
dataset_names: Single dataset name or list of dataset names to delete
|
||||
user_id: UUID of the dataset owner
|
||||
|
||||
Returns:
|
||||
Dictionary containing:
|
||||
- deleted_count: Number of datasets deleted
|
||||
- deleted_ids: List of deleted dataset IDs
|
||||
- not_found: List of dataset names that were not found
|
||||
"""
|
||||
db_engine = get_relational_engine()
|
||||
|
||||
async with db_engine.get_async_session() as session:
|
||||
# Normalize input to list
|
||||
if isinstance(dataset_names, str):
|
||||
dataset_names = [dataset_names]
|
||||
|
||||
# Retrieve datasets matching the names and user_id
|
||||
datasets = (
|
||||
await session.scalars(
|
||||
select(Dataset)
|
||||
.filter(Dataset.owner_id == user_id)
|
||||
.filter(Dataset.name.in_(dataset_names))
|
||||
)
|
||||
).all()
|
||||
|
||||
# Track results
|
||||
deleted_ids = []
|
||||
found_names = set()
|
||||
|
||||
# Delete each dataset
|
||||
for dataset in datasets:
|
||||
await db_engine.delete_entity_by_id(dataset.__tablename__, dataset.id)
|
||||
deleted_ids.append(dataset.id)
|
||||
found_names.add(dataset.name)
|
||||
|
||||
# Identify datasets that were not found
|
||||
not_found = [name for name in dataset_names if name not in found_names]
|
||||
|
||||
return {
|
||||
"deleted_count": len(deleted_ids),
|
||||
"deleted_ids": deleted_ids,
|
||||
"not_found": not_found
|
||||
}
|
||||
Loading…
Add table
Reference in a new issue