feat: Add deletion of local files made by cognee through data endpoint

Delete local files made by cognee when deleting data from database through endpoint

Feature COG-475
This commit is contained in:
Igor Ilic 2024-12-19 16:35:35 +01:00
parent d9368c6398
commit c139d52938
2 changed files with 20 additions and 42 deletions

View file

@ -1,3 +1,4 @@
import os
from os import path
from uuid import UUID
from typing import Optional
@ -114,54 +115,31 @@ class SQLAlchemyAdapter():
"""
Delete data and local files related to data if there are no references to it anymore.
"""
if self.engine.dialect.name == "sqlite":
async with self.get_async_session() as session:
async with self.get_async_session() as session:
if self.engine.dialect.name == "sqlite":
# Foreign key constraints are disabled by default in SQLite (for backwards compatibility),
# so must be enabled for each database connection/session separately.
await session.execute(text("PRAGMA foreign_keys = ON;"))
data_entity = await session.scalars(select(Data).where(Data.id == data_id)).one()
try:
data_entity = (await session.scalars(select(Data).where(Data.id == data_id))).one()
except (ValueError, NoResultFound) as e:
raise EntityNotFoundError(message=f"Entity not found: {str(e)}")
# Check if other data objects point to the same raw data location
raw_data_location_entities= await session.execute(
select(Data).where(Data.raw_data_location == data_entity.raw_data_location)).all()
# Check if other data objects point to the same raw data location
raw_data_location_entities = (await session.execute(
select(Data).where(Data.raw_data_location == data_entity.raw_data_location))).all()
# Don't delete local file unless this is the only reference to the file in the database
if len(raw_data_location_entities) == 1:
# delete local file
from cognee.base_config import get_base_config
config = get_base_config()
if config.data_root_directory in raw_data_location_entities[0]:
# delete local file
pass
await session.execute(delete(Data).where(Data.id == data_id))
await session.commit()
else:
async with self.get_async_session() as session:
try:
data_entity = (await session.scalars(select(Data).where(Data.id == data_id))).one()
except (ValueError, NoResultFound) as e:
raise EntityNotFoundError(message=f"Entity not found: {str(e)}")
# Check if other data objects point to the same raw data location
raw_data_location_entities = (await session.execute(
select(Data).where(Data.raw_data_location == data_entity.raw_data_location))).all()
# Don't delete local file unless this is the only reference to the file in the database
if len(raw_data_location_entities) == 1:
# delete local file
from cognee.base_config import get_base_config
config = get_base_config()
if config.data_root_directory in raw_data_location_entities[0].raw_data_location:
# delete local file
pass
await session.execute(delete(Data).where(Data.id == data_id))
await session.commit()
# Don't delete local file unless this is the only reference to the file in the database
if len(raw_data_location_entities) == 1:
# delete local file only if it's created by cognee
from cognee.base_config import get_base_config
config = get_base_config()
if config.data_root_directory in raw_data_location_entities[0].raw_data_location:
os.remove(raw_data_location_entities[0])
await session.execute(delete(Data).where(Data.id == data_id))
await session.commit()
async def get_table(self, table_name: str, schema_name: Optional[str] = "public") -> Table:
"""

View file

@ -17,4 +17,4 @@ async def delete_data(data: Data):
db_engine = get_relational_engine()
return await db_engine.delete_entity_by_id(data.__tablename__, data.id)
return await db_engine.delete_data_entity(data.id)