316 lines
13 KiB
Python
316 lines
13 KiB
Python
import os
|
|
import pathlib
|
|
import cognee
|
|
from uuid import uuid4
|
|
from cognee.modules.users.exceptions import PermissionDeniedError
|
|
from cognee.shared.logging_utils import get_logger
|
|
from cognee.modules.users.methods import get_default_user, create_user
|
|
from cognee.modules.users.permissions.methods import authorized_give_permission_on_datasets
|
|
from cognee.modules.data.methods import get_dataset_data, get_datasets_by_name
|
|
from cognee.api.v1.exceptions import DocumentNotFoundError, DatasetNotFoundError
|
|
|
|
logger = get_logger()
|
|
|
|
|
|
async def main():
|
|
# Enable permissions feature
|
|
os.environ["ENABLE_BACKEND_ACCESS_CONTROL"] = "True"
|
|
|
|
# Clean up test directories before starting
|
|
data_directory_path = str(
|
|
pathlib.Path(
|
|
os.path.join(pathlib.Path(__file__).parent, ".data_storage/test_delete_by_id")
|
|
).resolve()
|
|
)
|
|
cognee_directory_path = str(
|
|
pathlib.Path(
|
|
os.path.join(pathlib.Path(__file__).parent, ".cognee_system/test_delete_by_id")
|
|
).resolve()
|
|
)
|
|
|
|
cognee.config.data_root_directory(data_directory_path)
|
|
cognee.config.system_root_directory(cognee_directory_path)
|
|
|
|
await cognee.prune.prune_data()
|
|
await cognee.prune.prune_system(metadata=True)
|
|
|
|
# Setup database and tables
|
|
from cognee.modules.engine.operations.setup import setup
|
|
|
|
await setup()
|
|
|
|
print("🧪 Testing Delete by ID and Dataset Data Endpoints")
|
|
print("=" * 60)
|
|
|
|
# Get the default user first
|
|
default_user = await get_default_user()
|
|
|
|
# Test data
|
|
text_1 = """
|
|
Apple Inc. is an American multinational technology company that specializes in consumer electronics,
|
|
software, and online services. Apple is the world's largest technology company by revenue and,
|
|
since January 2021, the world's most valuable company.
|
|
"""
|
|
|
|
text_2 = """
|
|
Microsoft Corporation is an American multinational technology corporation which produces computer software,
|
|
consumer electronics, personal computers, and related services. Its best known software products are the
|
|
Microsoft Windows line of operating systems and the Microsoft Office suite.
|
|
"""
|
|
|
|
text_3 = """
|
|
Google LLC is an American multinational technology company that specializes in Internet-related services and products,
|
|
which include online advertising technologies, search engine, cloud computing, software, and hardware. Google has been
|
|
referred to as the most powerful company in the world and one of the world's most valuable brands.
|
|
"""
|
|
|
|
# Test 1: Setup data and datasets
|
|
print("\n📝 Test 1: Setting up test data and datasets")
|
|
|
|
# Add data for default user
|
|
await cognee.add([text_1], dataset_name="tech_companies_1", user=default_user)
|
|
|
|
# Create test user first for the second dataset
|
|
test_user = await create_user("test_user_delete@gmail.com", "test@example.com")
|
|
|
|
# Add data for test user
|
|
await cognee.add([text_2], dataset_name="tech_companies_2", user=test_user)
|
|
|
|
# Create third user for isolation testing
|
|
isolation_user = await create_user("isolation_user@gmail.com", "isolation@example.com")
|
|
|
|
# Add data for isolation user (should remain unaffected by other deletions)
|
|
await cognee.add([text_3], dataset_name="tech_companies_3", user=isolation_user)
|
|
|
|
tst = await cognee.cognify(["tech_companies_1"], user=default_user)
|
|
tst2 = await cognee.cognify(["tech_companies_2"], user=test_user)
|
|
tst3 = await cognee.cognify(["tech_companies_3"], user=isolation_user)
|
|
print("tst", tst)
|
|
print("tst2", tst2)
|
|
print("tst3", tst3)
|
|
|
|
# Extract dataset_ids from cognify results
|
|
def extract_dataset_id_from_cognify(cognify_result):
|
|
"""Extract dataset_id from cognify output dictionary"""
|
|
for dataset_id, pipeline_result in cognify_result.items():
|
|
return dataset_id # Return the first (and likely only) dataset_id
|
|
return None
|
|
|
|
# Get dataset IDs from cognify results
|
|
dataset_id_1 = extract_dataset_id_from_cognify(tst)
|
|
dataset_id_2 = extract_dataset_id_from_cognify(tst2)
|
|
dataset_id_3 = extract_dataset_id_from_cognify(tst3)
|
|
|
|
print(f"📋 Extracted dataset_id from tst: {dataset_id_1}")
|
|
print(f"📋 Extracted dataset_id from tst2: {dataset_id_2}")
|
|
print(f"📋 Extracted dataset_id from tst3: {dataset_id_3}")
|
|
|
|
# Get dataset data for deletion testing
|
|
dataset_data_1 = await get_dataset_data(dataset_id_1)
|
|
dataset_data_2 = await get_dataset_data(dataset_id_2)
|
|
dataset_data_3 = await get_dataset_data(dataset_id_3)
|
|
|
|
print(f"📊 Dataset 1 contains {len(dataset_data_1)} data items")
|
|
print(f"📊 Dataset 2 contains {len(dataset_data_2)} data items")
|
|
print(f"📊 Dataset 3 (isolation) contains {len(dataset_data_3)} data items")
|
|
|
|
# Test 2: Get data to delete from the extracted datasets
|
|
print("\n📝 Test 2: Preparing data for deletion from cognify results")
|
|
|
|
# Use the first data item from each dataset for testing
|
|
data_to_delete_id = dataset_data_1[0].id if dataset_data_1 else None
|
|
data_to_delete_from_test_user = dataset_data_2[0].id if dataset_data_2 else None
|
|
|
|
# Create datasets objects for testing
|
|
from cognee.modules.data.models import Dataset
|
|
|
|
default_dataset = Dataset(id=dataset_id_1, name="tech_companies_1", owner_id=default_user.id)
|
|
|
|
# Create dataset object for permission testing (test_user already created above)
|
|
test_dataset = Dataset(id=dataset_id_2, name="tech_companies_2", owner_id=test_user.id)
|
|
|
|
print(f"🔍 Data to delete ID: {data_to_delete_id}")
|
|
print(f"🔍 Test user data ID: {data_to_delete_from_test_user}")
|
|
|
|
print("\n📝 Test 3: Testing delete endpoint with proper permissions")
|
|
|
|
try:
|
|
result = await cognee.delete(data_id=data_to_delete_id, dataset_id=default_dataset.id)
|
|
print("✅ Delete successful for data owner")
|
|
assert result["status"] == "success", "Delete should succeed for data owner"
|
|
except Exception as e:
|
|
print(f"❌ Unexpected error in delete test: {e}")
|
|
raise
|
|
|
|
# Test 4: Test delete without permissions (should fail)
|
|
print("\n📝 Test 4: Testing delete endpoint without permissions")
|
|
|
|
delete_permission_error = False
|
|
try:
|
|
await cognee.delete(
|
|
data_id=data_to_delete_from_test_user,
|
|
dataset_id=test_dataset.id,
|
|
user=default_user, # Wrong user - should fail
|
|
)
|
|
except (PermissionDeniedError, DatasetNotFoundError):
|
|
delete_permission_error = True
|
|
print("✅ Delete correctly denied for user without permission")
|
|
except Exception as e:
|
|
print(f"❌ Unexpected error type: {e}")
|
|
|
|
assert delete_permission_error, "Delete should fail for user without permission"
|
|
|
|
# Test 5: Test delete with non-existent data_id
|
|
print("\n📝 Test 5: Testing delete endpoint with non-existent data_id")
|
|
|
|
non_existent_data_id = uuid4()
|
|
data_not_found_error = False
|
|
try:
|
|
await cognee.delete(
|
|
data_id=non_existent_data_id, dataset_id=default_dataset.id, user=default_user
|
|
)
|
|
except DocumentNotFoundError:
|
|
data_not_found_error = True
|
|
print("✅ Delete correctly failed for non-existent data_id")
|
|
except Exception as e:
|
|
print(f"❌ Unexpected error type: {e}")
|
|
|
|
assert data_not_found_error, "Delete should fail for non-existent data_id"
|
|
|
|
# Test 6: Test delete with non-existent dataset_id
|
|
print("\n📝 Test 6: Testing delete endpoint with non-existent dataset_id")
|
|
|
|
non_existent_dataset_id = uuid4()
|
|
dataset_not_found_error = False
|
|
try:
|
|
await cognee.delete(
|
|
data_id=data_to_delete_from_test_user,
|
|
dataset_id=non_existent_dataset_id,
|
|
user=test_user,
|
|
)
|
|
except (DatasetNotFoundError, PermissionDeniedError):
|
|
dataset_not_found_error = True
|
|
print("✅ Delete correctly failed for non-existent dataset_id")
|
|
except Exception as e:
|
|
print(f"❌ Unexpected error type: {e}")
|
|
|
|
assert dataset_not_found_error, "Delete should fail for non-existent dataset_id"
|
|
|
|
# Test 7: Test delete with data that doesn't belong to the dataset
|
|
print("\n📝 Test 7: Testing delete endpoint with data not in specified dataset")
|
|
|
|
# Add more data to create a scenario where data exists but not in the specified dataset
|
|
await cognee.add([text_1], dataset_name="another_dataset", user=default_user)
|
|
await cognee.cognify(["another_dataset"], user=default_user)
|
|
|
|
another_datasets = await get_datasets_by_name(["another_dataset"], default_user.id)
|
|
another_dataset = another_datasets[0]
|
|
|
|
data_not_in_dataset_error = False
|
|
try:
|
|
# Try to delete data from test_user's dataset using default_user's data_id
|
|
await cognee.delete(
|
|
data_id=data_to_delete_from_test_user, # This data belongs to test_user's dataset
|
|
dataset_id=another_dataset.id, # But we're specifying default_user's other dataset
|
|
user=default_user,
|
|
)
|
|
except DocumentNotFoundError:
|
|
data_not_in_dataset_error = True
|
|
print("✅ Delete correctly failed for data not in specified dataset")
|
|
except Exception as e:
|
|
print(f"❌ Unexpected error type: {e}")
|
|
|
|
assert data_not_in_dataset_error, "Delete should fail when data doesn't belong to dataset"
|
|
|
|
# Test 8: Test permission granting and delete
|
|
print("\n📝 Test 8: Testing delete after granting permissions")
|
|
|
|
# Give default_user delete permission on test_user's dataset
|
|
await authorized_give_permission_on_datasets(
|
|
default_user.id,
|
|
[test_dataset.id],
|
|
"delete",
|
|
test_user.id,
|
|
)
|
|
|
|
try:
|
|
result = await cognee.delete(
|
|
data_id=data_to_delete_from_test_user,
|
|
dataset_id=test_dataset.id,
|
|
user=default_user, # Now should work with granted permission
|
|
)
|
|
print("✅ Delete successful after granting permission", result)
|
|
assert result["status"] == "success", "Delete should succeed after granting permission"
|
|
except Exception as e:
|
|
print(f"❌ Unexpected error after granting permission: {e}")
|
|
raise
|
|
|
|
# Test 9: Verify graph database cleanup
|
|
print("\n📝 Test 9: Verifying comprehensive deletion (graph, vector, relational)")
|
|
|
|
from cognee.infrastructure.databases.graph import get_graph_engine
|
|
|
|
graph_engine = await get_graph_engine()
|
|
nodes, edges = await graph_engine.get_graph_data()
|
|
|
|
# We should still have some nodes/edges from the remaining data, but fewer than before
|
|
print(f"✅ Graph database state after deletions - Nodes: {len(nodes)}, Edges: {len(edges)}")
|
|
|
|
# Test 10: Verify isolation user's data remains untouched
|
|
print("\n📝 Test 10: Verifying isolation user's data remains intact")
|
|
|
|
try:
|
|
# Get isolation user's data after all deletions
|
|
isolation_dataset_data_after = await get_dataset_data(dataset_id_3)
|
|
|
|
print(
|
|
f"📊 Isolation user's dataset still contains {len(isolation_dataset_data_after)} data items"
|
|
)
|
|
|
|
# Verify data count is unchanged
|
|
assert len(isolation_dataset_data_after) == len(dataset_data_3), (
|
|
f"Isolation user's data count changed! Expected {len(dataset_data_3)}, got {len(isolation_dataset_data_after)}"
|
|
)
|
|
|
|
# Verify specific data items are still there
|
|
original_data_ids = {str(data.id) for data in dataset_data_3}
|
|
remaining_data_ids = {str(data.id) for data in isolation_dataset_data_after}
|
|
|
|
assert original_data_ids == remaining_data_ids, "Isolation user's data IDs have changed!"
|
|
|
|
# Try to search isolation user's data to ensure it's still accessible
|
|
isolation_search_results = await cognee.search(
|
|
"Google technology company", user=isolation_user
|
|
)
|
|
assert len(isolation_search_results) > 0, "Isolation user's data should still be searchable"
|
|
|
|
print("✅ Isolation user's data completely unaffected by other users' deletions")
|
|
print(f" - Data count unchanged: {len(isolation_dataset_data_after)} items")
|
|
print(" - All original data IDs preserved")
|
|
print(f" - Data still searchable: {len(isolation_search_results)} results")
|
|
|
|
except Exception as e:
|
|
print(f"❌ Error verifying isolation user's data: {e}")
|
|
raise
|
|
|
|
print("\n" + "=" * 60)
|
|
print("🎉 All tests passed! Delete by ID endpoint working correctly.")
|
|
print("=" * 60)
|
|
|
|
print("""
|
|
📋 SUMMARY OF TESTED FUNCTIONALITY:
|
|
✅ Delete endpoint accepts data_id and dataset_id parameters
|
|
✅ Permission checking works for delete operations
|
|
✅ Proper error handling for non-existent data/datasets
|
|
✅ Data ownership validation (data must belong to specified dataset)
|
|
✅ Permission granting and revocation works correctly
|
|
✅ Comprehensive deletion across all databases (graph, vector, relational)
|
|
✅ Dataset data endpoint now checks read permissions properly
|
|
✅ Data isolation: Other users' data remains completely unaffected by deletions
|
|
""")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
import asyncio
|
|
|
|
asyncio.run(main())
|