test: Add test to verify deletion of local files
Added test that checks local files created by cognee will be deleted and those not created by cognee won't Test COG-475
This commit is contained in:
parent
418cb65b35
commit
1406b6f078
2 changed files with 45 additions and 12 deletions
|
|
@ -128,7 +128,7 @@ class SQLAlchemyAdapter():
|
|||
|
||||
# Check if other data objects point to the same raw data location
|
||||
raw_data_location_entities = (await session.execute(
|
||||
select(Data).where(Data.raw_data_location == data_entity.raw_data_location))).all()
|
||||
select(Data.raw_data_location).where(Data.raw_data_location == data_entity.raw_data_location))).all()
|
||||
|
||||
# Don't delete local file unless this is the only reference to the file in the database
|
||||
if len(raw_data_location_entities) == 1:
|
||||
|
|
@ -136,7 +136,7 @@ class SQLAlchemyAdapter():
|
|||
from cognee.base_config import get_base_config
|
||||
config = get_base_config()
|
||||
if config.data_root_directory in raw_data_location_entities[0].raw_data_location:
|
||||
os.remove(raw_data_location_entities[0])
|
||||
os.remove(raw_data_location_entities[0].raw_data_location)
|
||||
|
||||
await session.execute(delete(Data).where(Data.id == data_id))
|
||||
await session.commit()
|
||||
|
|
|
|||
|
|
@ -2,12 +2,52 @@ import os
|
|||
import logging
|
||||
import pathlib
|
||||
import cognee
|
||||
|
||||
from cognee.modules.data.models import Data
|
||||
from cognee.api.v1.search import SearchType
|
||||
from cognee.modules.retrieval.brute_force_triplet_search import brute_force_triplet_search
|
||||
from cognee.modules.users.methods import get_default_user
|
||||
|
||||
logging.basicConfig(level=logging.DEBUG)
|
||||
|
||||
async def test_local_file_deletion(data_text, file_location):
|
||||
from sqlalchemy import select
|
||||
import hashlib
|
||||
from cognee.infrastructure.databases.relational import get_relational_engine
|
||||
|
||||
engine = get_relational_engine()
|
||||
|
||||
async with engine.get_async_session() as session:
|
||||
# Get hash of data contents
|
||||
encoded_text = data_text.encode("utf-8")
|
||||
data_hash = hashlib.md5(encoded_text).hexdigest()
|
||||
# Get data entry from database based on hash contents
|
||||
data = (await session.scalars(select(Data).where(Data.content_hash == data_hash))).one()
|
||||
assert os.path.isfile(data.raw_data_location), f"Data location doesn't exist: {data.raw_data_location}"
|
||||
# Test deletion of data along with local files created by cognee
|
||||
await engine.delete_data_entity(data.id)
|
||||
assert not os.path.exists(data.raw_data_location), f"Data location exists: {data.raw_data_location}"
|
||||
|
||||
async with engine.get_async_session() as session:
|
||||
# Get data entry from database based on file path
|
||||
data = (await session.scalars(select(Data).where(Data.raw_data_location == file_location))).one()
|
||||
assert os.path.isfile(data.raw_data_location), f"Data location doesn't exist: {data.raw_data_location}"
|
||||
# Test local files not created by cognee won't get deleted
|
||||
await engine.delete_data_entity(data.id)
|
||||
assert os.path.exists(data.raw_data_location), f"Data location doesn't exists: {data.raw_data_location}"
|
||||
|
||||
async def test_getting_of_documents(dataset_name_1):
|
||||
# Test getting of documents for search per dataset
|
||||
from cognee.modules.users.permissions.methods import get_document_ids_for_user
|
||||
user = await get_default_user()
|
||||
document_ids = await get_document_ids_for_user(user.id, [dataset_name_1])
|
||||
assert len(document_ids) == 1, f"Number of expected documents doesn't match {len(document_ids)} != 1"
|
||||
|
||||
# Test getting of documents for search when no dataset is provided
|
||||
user = await get_default_user()
|
||||
document_ids = await get_document_ids_for_user(user.id)
|
||||
assert len(document_ids) == 2, f"Number of expected documents doesn't match {len(document_ids)} != 2"
|
||||
|
||||
|
||||
async def main():
|
||||
cognee.config.set_vector_db_config(
|
||||
|
|
@ -67,16 +107,7 @@ async def main():
|
|||
|
||||
from cognee.infrastructure.databases.vector import get_vector_engine
|
||||
|
||||
# Test getting of documents for search per dataset
|
||||
from cognee.modules.users.permissions.methods import get_document_ids_for_user
|
||||
user = await get_default_user()
|
||||
document_ids = await get_document_ids_for_user(user.id, [dataset_name_1])
|
||||
assert len(document_ids) == 1, f"Number of expected documents doesn't match {len(document_ids)} != 1"
|
||||
|
||||
# Test getting of documents for search when no dataset is provided
|
||||
user = await get_default_user()
|
||||
document_ids = await get_document_ids_for_user(user.id)
|
||||
assert len(document_ids) == 2, f"Number of expected documents doesn't match {len(document_ids)} != 2"
|
||||
await test_getting_of_documents(dataset_name_1)
|
||||
|
||||
vector_engine = get_vector_engine()
|
||||
random_node = (await vector_engine.search("entity_name", "Quantum computer"))[0]
|
||||
|
|
@ -106,6 +137,8 @@ async def main():
|
|||
results = await brute_force_triplet_search('What is a quantum computer?')
|
||||
assert len(results) > 0
|
||||
|
||||
await test_local_file_deletion(text, explanation_file_path)
|
||||
|
||||
await cognee.prune.prune_data()
|
||||
assert not os.path.isdir(data_directory_path), "Local data files are not deleted"
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue