cleanup of delete dataset methods
This commit is contained in:
parent
3a05cca74b
commit
40846df92d
8 changed files with 72 additions and 125 deletions
|
|
@ -6,7 +6,7 @@ from cognee.cli import DEFAULT_DOCS_URL
|
|||
import cognee.cli.echo as fmt
|
||||
from cognee.cli.exceptions import CliCommandException, CliCommandInnerException
|
||||
from cognee.modules.data.methods.get_deletion_counts import get_deletion_counts
|
||||
from cognee.modules.data.methods.delete_datasets_by_name import delete_datasets_by_name
|
||||
from cognee.modules.data.methods.delete_dataset_by_name import delete_datasets_by_name
|
||||
from cognee.modules.data.methods.delete_data_by_user import delete_data_by_user
|
||||
from cognee.modules.users.methods import get_default_user
|
||||
|
||||
|
|
|
|||
|
|
@ -6,7 +6,7 @@ from textual.containers import Container, Vertical, Horizontal
|
|||
from textual.binding import Binding
|
||||
from cognee.cli.tui.base_screen import BaseTUIScreen
|
||||
from cognee.modules.data.methods.get_deletion_counts import get_deletion_counts
|
||||
from cognee.modules.data.methods.delete_datasets_by_name import delete_datasets_by_name
|
||||
from cognee.modules.data.methods.delete_dataset_by_name import delete_datasets_by_name
|
||||
from cognee.modules.data.methods.delete_data_by_user import delete_data_by_user
|
||||
from cognee.modules.users.methods import get_default_user
|
||||
|
||||
|
|
|
|||
|
|
@ -3,8 +3,7 @@ import asyncio
|
|||
from os import path
|
||||
import tempfile
|
||||
from uuid import UUID
|
||||
from typing import Optional
|
||||
from typing import AsyncGenerator, List
|
||||
from typing import Optional, AsyncGenerator, List, Union
|
||||
from contextlib import asynccontextmanager
|
||||
from sqlalchemy.orm import joinedload
|
||||
from sqlalchemy.exc import NoResultFound
|
||||
|
|
@ -235,35 +234,45 @@ class SQLAlchemyAdapter:
|
|||
return [schema[0] for schema in result.fetchall()]
|
||||
return []
|
||||
|
||||
async def delete_entity_by_id(
|
||||
self, table_name: str, data_id: UUID, schema_name: Optional[str] = "public"
|
||||
async def delete_entities_by_id(
|
||||
self,
|
||||
table_name: str,
|
||||
data_id: Union[UUID, List[UUID]], # Supports a single UUID or a List of UUIDs
|
||||
schema_name: Optional[str] = "public"
|
||||
):
|
||||
"""
|
||||
Delete an entity from the specified table based on its unique ID.
|
||||
Delete one or more entities from the specified table based on their ID(s).
|
||||
|
||||
Parameters:
|
||||
-----------
|
||||
|
||||
- table_name (str): The name of the table from which to delete the entity.
|
||||
- data_id (UUID): The unique identifier of the entity to be deleted.
|
||||
- schema_name (Optional[str]): The name of the schema where the table resides,
|
||||
defaults to 'public'. (default 'public')
|
||||
- table_name (str): The name of the table from which to delete the entities.
|
||||
- data_id (Union[UUID, List[UUID]]): The unique identifier(s) to be deleted.
|
||||
- schema_name (Optional[str]): The name of the schema where the table resides.
|
||||
"""
|
||||
if self.engine.dialect.name == "sqlite":
|
||||
async with self.get_async_session() as session:
|
||||
TableModel = await self.get_table(table_name, schema_name)
|
||||
|
||||
# Foreign key constraints are disabled by default in SQLite (for backwards compatibility),
|
||||
# so must be enabled for each database connection/session separately.
|
||||
# Ensure data_ids is a list for the WHERE clause logic
|
||||
if isinstance(data_id, list):
|
||||
data_ids_to_delete = data_id
|
||||
else:
|
||||
data_ids_to_delete = [data_id]
|
||||
|
||||
if not data_ids_to_delete:
|
||||
return
|
||||
|
||||
async with self.get_async_session() as session:
|
||||
TableModel = await self.get_table(table_name, schema_name)
|
||||
|
||||
# Handle SQLite's foreign key requirement
|
||||
if self.engine.dialect.name == "sqlite":
|
||||
from sqlalchemy import text
|
||||
await session.execute(text("PRAGMA foreign_keys = ON;"))
|
||||
|
||||
await session.execute(TableModel.delete().where(TableModel.c.id == data_id))
|
||||
await session.commit()
|
||||
else:
|
||||
async with self.get_async_session() as session:
|
||||
TableModel = await self.get_table(table_name, schema_name)
|
||||
await session.execute(TableModel.delete().where(TableModel.c.id == data_id))
|
||||
await session.commit()
|
||||
# Construct the DELETE statement using the 'in_()' operator
|
||||
stmt = TableModel.delete().where(TableModel.c.id.in_(data_ids_to_delete))
|
||||
|
||||
# Execute and commit
|
||||
await session.execute(stmt)
|
||||
await session.commit()
|
||||
|
||||
async def delete_data_entity(self, data_id: UUID):
|
||||
"""
|
||||
|
|
|
|||
|
|
@ -16,7 +16,7 @@ from .get_dataset_ids import get_dataset_ids
|
|||
|
||||
# Delete
|
||||
from .delete_dataset import delete_dataset
|
||||
from .delete_datasets_by_name import delete_datasets_by_name
|
||||
from .delete_dataset_by_name import delete_datasets_by_name
|
||||
from .delete_data_by_user import delete_data_by_user
|
||||
from .delete_data import delete_data
|
||||
|
||||
|
|
|
|||
|
|
@ -1,14 +1,14 @@
|
|||
from uuid import UUID
|
||||
from sqlalchemy import select, delete as sql_delete
|
||||
from sqlalchemy import select
|
||||
from cognee.infrastructure.databases.relational import get_relational_engine
|
||||
from cognee.modules.data.models import Dataset, DatasetData
|
||||
from cognee.modules.data.models import Dataset
|
||||
from cognee.modules.users.methods import get_user
|
||||
from cognee.shared.logging_utils import get_logger
|
||||
|
||||
logger = get_logger()
|
||||
|
||||
|
||||
async def delete_data_by_user(user_id: UUID) -> dict[str, int]:
|
||||
async def delete_data_by_user(user_id: UUID):
|
||||
"""
|
||||
Delete all datasets and their associated data for a specific user.
|
||||
|
||||
|
|
@ -18,11 +18,6 @@ async def delete_data_by_user(user_id: UUID) -> dict[str, int]:
|
|||
Args:
|
||||
user_id: UUID of the user whose data should be deleted
|
||||
|
||||
Returns:
|
||||
Dictionary containing deletion statistics:
|
||||
- datasets_deleted: Number of datasets deleted
|
||||
- data_entries_deleted: Number of data entries deleted
|
||||
|
||||
Raises:
|
||||
ValueError: If user is not found
|
||||
"""
|
||||
|
|
@ -35,37 +30,7 @@ async def delete_data_by_user(user_id: UUID) -> dict[str, int]:
|
|||
raise ValueError(f"User with ID {user_id} not found")
|
||||
|
||||
# Get all datasets owned by this user
|
||||
datasets_query = select(Dataset).where(Dataset.owner_id == user_id)
|
||||
user_datasets = (await session.execute(datasets_query)).scalars().all()
|
||||
|
||||
datasets_deleted = 0
|
||||
data_entries_deleted = 0
|
||||
|
||||
# Delete each dataset and its data
|
||||
for dataset in user_datasets:
|
||||
# Get all data entries in this dataset
|
||||
data_query = select(DatasetData).where(DatasetData.dataset_id == dataset.id)
|
||||
dataset_data_links = (await session.execute(data_query)).scalars().all()
|
||||
|
||||
# Delete dataset-data links
|
||||
for link in dataset_data_links:
|
||||
await session.execute(
|
||||
sql_delete(DatasetData).where(DatasetData.id == link.id)
|
||||
)
|
||||
data_entries_deleted += 1
|
||||
|
||||
# Delete the dataset itself
|
||||
await session.execute(
|
||||
sql_delete(Dataset).where(Dataset.id == dataset.id)
|
||||
)
|
||||
datasets_deleted += 1
|
||||
|
||||
# Commit all changes
|
||||
await session.commit()
|
||||
|
||||
logger.info(f"Deleted {datasets_deleted} datasets and {data_entries_deleted} data entries for user {user_id}")
|
||||
|
||||
return {
|
||||
"datasets_deleted": datasets_deleted,
|
||||
"data_entries_deleted": data_entries_deleted,
|
||||
}
|
||||
datasets_query = select(Dataset.id).where(Dataset.owner_id == user_id)
|
||||
user_datasets_ids = (await session.execute(datasets_query)).scalars().all()
|
||||
if user_datasets_ids:
|
||||
await db_engine.delete_entities_by_id(Dataset.__table__.name, user_datasets_ids)
|
||||
|
|
@ -5,4 +5,4 @@ from cognee.infrastructure.databases.relational import get_relational_engine
|
|||
async def delete_dataset(dataset: Dataset):
|
||||
db_engine = get_relational_engine()
|
||||
|
||||
return await db_engine.delete_entity_by_id(dataset.__tablename__, dataset.id)
|
||||
return await db_engine.delete_entities_by_id(dataset.__tablename__, dataset.id)
|
||||
|
|
|
|||
30
cognee/modules/data/methods/delete_dataset_by_name.py
Normal file
30
cognee/modules/data/methods/delete_dataset_by_name.py
Normal file
|
|
@ -0,0 +1,30 @@
|
|||
from uuid import UUID
|
||||
from sqlalchemy import select
|
||||
from cognee.infrastructure.databases.relational import get_relational_engine
|
||||
from ..models import Dataset
|
||||
|
||||
|
||||
async def delete_dataset_by_name(
|
||||
dataset_name: str, user_id: UUID
|
||||
):
|
||||
"""
|
||||
Delete a single dataset by name for a specific user.
|
||||
|
||||
Args:
|
||||
dataset_name: The name of the dataset to delete (must be a single string).
|
||||
user_id: UUID of the dataset owner.
|
||||
|
||||
"""
|
||||
db_engine = get_relational_engine()
|
||||
|
||||
async with db_engine.get_async_session() as session:
|
||||
dataset_id = (
|
||||
await session.scalars(
|
||||
select(Dataset.id)
|
||||
.filter(Dataset.owner_id == user_id)
|
||||
.filter(Dataset.name == dataset_name)
|
||||
)
|
||||
).first()
|
||||
#Keeping this out of the first session, since delete_entities_by_id creates another session.
|
||||
if dataset_id:
|
||||
await db_engine.delete_entities_by_id(Dataset.__table__.name, dataset_id)
|
||||
|
|
@ -1,57 +0,0 @@
|
|||
from typing import Union
|
||||
from uuid import UUID
|
||||
from sqlalchemy import select
|
||||
from cognee.infrastructure.databases.relational import get_relational_engine
|
||||
from ..models import Dataset
|
||||
|
||||
|
||||
async def delete_datasets_by_name(
|
||||
dataset_names: Union[str, list[str]], user_id: UUID
|
||||
) -> dict[str, any]:
|
||||
"""
|
||||
Delete datasets by name for a specific user.
|
||||
|
||||
Args:
|
||||
dataset_names: Single dataset name or list of dataset names to delete
|
||||
user_id: UUID of the dataset owner
|
||||
|
||||
Returns:
|
||||
Dictionary containing:
|
||||
- deleted_count: Number of datasets deleted
|
||||
- deleted_ids: List of deleted dataset IDs
|
||||
- not_found: List of dataset names that were not found
|
||||
"""
|
||||
db_engine = get_relational_engine()
|
||||
|
||||
async with db_engine.get_async_session() as session:
|
||||
# Normalize input to list
|
||||
if isinstance(dataset_names, str):
|
||||
dataset_names = [dataset_names]
|
||||
|
||||
# Retrieve datasets matching the names and user_id
|
||||
datasets = (
|
||||
await session.scalars(
|
||||
select(Dataset)
|
||||
.filter(Dataset.owner_id == user_id)
|
||||
.filter(Dataset.name.in_(dataset_names))
|
||||
)
|
||||
).all()
|
||||
|
||||
# Track results
|
||||
deleted_ids = []
|
||||
found_names = set()
|
||||
|
||||
# Delete each dataset
|
||||
for dataset in datasets:
|
||||
await db_engine.delete_entity_by_id(dataset.__tablename__, dataset.id)
|
||||
deleted_ids.append(dataset.id)
|
||||
found_names.add(dataset.name)
|
||||
|
||||
# Identify datasets that were not found
|
||||
not_found = [name for name in dataset_names if name not in found_names]
|
||||
|
||||
return {
|
||||
"deleted_count": len(deleted_ids),
|
||||
"deleted_ids": deleted_ids,
|
||||
"not_found": not_found
|
||||
}
|
||||
Loading…
Add table
Reference in a new issue