From f6800b979edad66102ac449e00889b709a2a4e33 Mon Sep 17 00:00:00 2001 From: Igor Ilic Date: Wed, 18 Dec 2024 15:26:13 +0100 Subject: [PATCH] feat: Add deletion of local files when deleting data Delete local files when deleting data from cognee Feature COG-475 --- .../sqlalchemy/SqlAlchemyAdapter.py | 28 +++++++++++++++++-- cognee/modules/data/methods/delete_data.py | 2 +- cognee/modules/data/methods/delete_dataset.py | 2 +- 3 files changed, 27 insertions(+), 5 deletions(-) diff --git a/cognee/infrastructure/databases/relational/sqlalchemy/SqlAlchemyAdapter.py b/cognee/infrastructure/databases/relational/sqlalchemy/SqlAlchemyAdapter.py index 8041aeaea..e0db40ca3 100644 --- a/cognee/infrastructure/databases/relational/sqlalchemy/SqlAlchemyAdapter.py +++ b/cognee/infrastructure/databases/relational/sqlalchemy/SqlAlchemyAdapter.py @@ -3,11 +3,12 @@ from uuid import UUID from typing import Optional from typing import AsyncGenerator, List from contextlib import asynccontextmanager -from sqlalchemy import text, select, MetaData, Table +from sqlalchemy import text, select, MetaData, Table, delete from sqlalchemy.orm import joinedload from sqlalchemy.ext.asyncio import AsyncSession, create_async_engine, async_sessionmaker from cognee.infrastructure.databases.exceptions import EntityNotFoundError +from cognee.modules.data.models.Data import Data from ..ModelBase import Base class SQLAlchemyAdapter(): @@ -86,9 +87,9 @@ class SQLAlchemyAdapter(): return [schema[0] for schema in result.fetchall()] return [] - async def delete_data_by_id(self, table_name: str, data_id: UUID, schema_name: Optional[str] = "public"): + async def delete_entity_by_id(self, table_name: str, data_id: UUID, schema_name: Optional[str] = "public"): """ - Delete data in given table based on id. Table must have an id Column. + Delete entity in given table based on id. Table must have an id Column. """ if self.engine.dialect.name == "sqlite": async with self.get_async_session() as session: @@ -107,6 +108,27 @@ class SQLAlchemyAdapter(): await session.commit() + async def delete_data_entity(self, data_id: UUID): + """ + Delete data and local files related to data if there are no references to it anymore. + """ + if self.engine.dialect.name == "sqlite": + async with self.get_async_session() as session: + + # Foreign key constraints are disabled by default in SQLite (for backwards compatibility), + # so must be enabled for each database connection/session separately. + await session.execute(text("PRAGMA foreign_keys = ON;")) + + data_entity = await session.execute(select(Data).where(Data.id == data_id)) + + await session.execute(delete(Data).where(Data.id == data_id)) + await session.commit() + else: + async with self.get_async_session() as session: + await session.execute(delete(Data).where(Data.id == data_id)) + await session.commit() + + async def get_table(self, table_name: str, schema_name: Optional[str] = "public") -> Table: """ Dynamically loads a table using the given table name and schema name. diff --git a/cognee/modules/data/methods/delete_data.py b/cognee/modules/data/methods/delete_data.py index c0493a606..f0e9629ec 100644 --- a/cognee/modules/data/methods/delete_data.py +++ b/cognee/modules/data/methods/delete_data.py @@ -17,4 +17,4 @@ async def delete_data(data: Data): db_engine = get_relational_engine() - return await db_engine.delete_data_by_id(data.__tablename__, data.id) + return await db_engine.delete_entity_by_id(data.__tablename__, data.id) diff --git a/cognee/modules/data/methods/delete_dataset.py b/cognee/modules/data/methods/delete_dataset.py index c2205144d..96a2e7d71 100644 --- a/cognee/modules/data/methods/delete_dataset.py +++ b/cognee/modules/data/methods/delete_dataset.py @@ -4,4 +4,4 @@ from cognee.infrastructure.databases.relational import get_relational_engine async def delete_dataset(dataset: Dataset): db_engine = get_relational_engine() - return await db_engine.delete_data_by_id(dataset.__tablename__, dataset.id) + return await db_engine.delete_entity_by_id(dataset.__tablename__, dataset.id)