feat: Add deletion of local files made by cognee through data endpoint

Delete local files made by cognee when deleting data from database through endpoint

Feature COG-475
This commit is contained in:
Igor Ilic 2024-12-19 16:35:35 +01:00
parent d9368c6398
commit c139d52938
2 changed files with 20 additions and 42 deletions

View file

@ -1,3 +1,4 @@
import os
from os import path from os import path
from uuid import UUID from uuid import UUID
from typing import Optional from typing import Optional
@ -114,54 +115,31 @@ class SQLAlchemyAdapter():
""" """
Delete data and local files related to data if there are no references to it anymore. Delete data and local files related to data if there are no references to it anymore.
""" """
if self.engine.dialect.name == "sqlite": async with self.get_async_session() as session:
async with self.get_async_session() as session: if self.engine.dialect.name == "sqlite":
# Foreign key constraints are disabled by default in SQLite (for backwards compatibility), # Foreign key constraints are disabled by default in SQLite (for backwards compatibility),
# so must be enabled for each database connection/session separately. # so must be enabled for each database connection/session separately.
await session.execute(text("PRAGMA foreign_keys = ON;")) await session.execute(text("PRAGMA foreign_keys = ON;"))
data_entity = await session.scalars(select(Data).where(Data.id == data_id)).one() try:
data_entity = (await session.scalars(select(Data).where(Data.id == data_id))).one()
except (ValueError, NoResultFound) as e:
raise EntityNotFoundError(message=f"Entity not found: {str(e)}")
# Check if other data objects point to the same raw data location # Check if other data objects point to the same raw data location
raw_data_location_entities= await session.execute( raw_data_location_entities = (await session.execute(
select(Data).where(Data.raw_data_location == data_entity.raw_data_location)).all() select(Data).where(Data.raw_data_location == data_entity.raw_data_location))).all()
# Don't delete local file unless this is the only reference to the file in the database # Don't delete local file unless this is the only reference to the file in the database
if len(raw_data_location_entities) == 1: if len(raw_data_location_entities) == 1:
# delete local file # delete local file only if it's created by cognee
from cognee.base_config import get_base_config from cognee.base_config import get_base_config
config = get_base_config() config = get_base_config()
if config.data_root_directory in raw_data_location_entities[0]: if config.data_root_directory in raw_data_location_entities[0].raw_data_location:
# delete local file os.remove(raw_data_location_entities[0])
pass
await session.execute(delete(Data).where(Data.id == data_id))
await session.commit()
else:
async with self.get_async_session() as session:
try:
data_entity = (await session.scalars(select(Data).where(Data.id == data_id))).one()
except (ValueError, NoResultFound) as e:
raise EntityNotFoundError(message=f"Entity not found: {str(e)}")
# Check if other data objects point to the same raw data location
raw_data_location_entities = (await session.execute(
select(Data).where(Data.raw_data_location == data_entity.raw_data_location))).all()
# Don't delete local file unless this is the only reference to the file in the database
if len(raw_data_location_entities) == 1:
# delete local file
from cognee.base_config import get_base_config
config = get_base_config()
if config.data_root_directory in raw_data_location_entities[0].raw_data_location:
# delete local file
pass
await session.execute(delete(Data).where(Data.id == data_id))
await session.commit()
await session.execute(delete(Data).where(Data.id == data_id))
await session.commit()
async def get_table(self, table_name: str, schema_name: Optional[str] = "public") -> Table: async def get_table(self, table_name: str, schema_name: Optional[str] = "public") -> Table:
""" """

View file

@ -17,4 +17,4 @@ async def delete_data(data: Data):
db_engine = get_relational_engine() db_engine = get_relational_engine()
return await db_engine.delete_entity_by_id(data.__tablename__, data.id) return await db_engine.delete_data_entity(data.id)