diff --git a/.github/workflows/e2e_tests.yml b/.github/workflows/e2e_tests.yml index cb69e9ef6..8cd62910c 100644 --- a/.github/workflows/e2e_tests.yml +++ b/.github/workflows/e2e_tests.yml @@ -237,6 +237,31 @@ jobs: EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }} run: uv run python ./cognee/tests/test_dataset_database_handler.py + test-dataset-database-deletion: + name: Test dataset database deletion in Cognee + runs-on: ubuntu-22.04 + steps: + - name: Check out repository + uses: actions/checkout@v4 + + - name: Cognee Setup + uses: ./.github/actions/cognee_setup + with: + python-version: '3.11.x' + + - name: Run dataset databases deletion test + env: + ENV: 'dev' + LLM_MODEL: ${{ secrets.LLM_MODEL }} + LLM_ENDPOINT: ${{ secrets.LLM_ENDPOINT }} + LLM_API_KEY: ${{ secrets.LLM_API_KEY }} + LLM_API_VERSION: ${{ secrets.LLM_API_VERSION }} + EMBEDDING_MODEL: ${{ secrets.EMBEDDING_MODEL }} + EMBEDDING_ENDPOINT: ${{ secrets.EMBEDDING_ENDPOINT }} + EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }} + EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }} + run: uv run python ./cognee/tests/test_dataset_delete.py + test-permissions: name: Test permissions with different situations in Cognee runs-on: ubuntu-22.04 diff --git a/cognee/tests/test_dataset_delete.py b/cognee/tests/test_dataset_delete.py new file mode 100644 index 000000000..3554267cd --- /dev/null +++ b/cognee/tests/test_dataset_delete.py @@ -0,0 +1,80 @@ +import os +import asyncio +import pathlib +from uuid import UUID + +import cognee +from cognee.shared.logging_utils import setup_logging, ERROR +from cognee.modules.data.methods.delete_dataset import delete_dataset +from cognee.modules.data.methods.get_dataset import get_dataset +from cognee.modules.users.methods import get_default_user + + +async def main(): + # Set data and system directory paths + data_directory_path = str( + pathlib.Path( + os.path.join(pathlib.Path(__file__).parent, ".data_storage/test_dataset_delete") + ).resolve() + ) + cognee.config.data_root_directory(data_directory_path) + cognee_directory_path = str( + pathlib.Path( + os.path.join(pathlib.Path(__file__).parent, ".cognee_system/test_dataset_delete") + ).resolve() + ) + cognee.config.system_root_directory(cognee_directory_path) + + # Create a clean slate for cognee -- reset data and system state + print("Resetting cognee data...") + await cognee.prune.prune_data() + await cognee.prune.prune_system(metadata=True) + print("Data reset complete.\n") + + # cognee knowledge graph will be created based on this text + text = """ + Natural language processing (NLP) is an interdisciplinary + subfield of computer science and information retrieval. + """ + + # Add the text, and make it available for cognify + await cognee.add(text, "nlp_dataset") + await cognee.add("Quantum computing is the study of quantum computers.", "quantum_dataset") + + # Use LLMs and cognee to create knowledge graph + ret_val = await cognee.cognify() + user = await get_default_user() + + for val in ret_val: + dataset_id = str(val) + vector_db_path = os.path.join( + cognee_directory_path, "databases", str(user.id), dataset_id + ".lance.db" + ) + graph_db_path = os.path.join( + cognee_directory_path, "databases", str(user.id), dataset_id + ".pkl" + ) + + # Check if databases are properly created and exist before deletion + assert os.path.exists(graph_db_path), "Graph database file not found." + assert os.path.exists(vector_db_path), "Vector database file not found." + + dataset = await get_dataset(user_id=user.id, dataset_id=UUID(dataset_id)) + await delete_dataset(dataset) + + # Confirm databases have been deleted + assert not os.path.exists( + os.path.join(cognee_directory_path, "databases", str(user.id), str(val)) + ), "Graph database file not found." + assert not os.path.exists( + os.path.join(cognee_directory_path, "databases", str(user.id), str(val)) + ), "Vector database file not found." + + +if __name__ == "__main__": + logger = setup_logging(log_level=ERROR) + loop = asyncio.new_event_loop() + asyncio.set_event_loop(loop) + try: + loop.run_until_complete(main()) + finally: + loop.run_until_complete(loop.shutdown_asyncgens())