diff --git a/alembic/versions/84e5d08260d6_replace_graph_ledger_table_with_nodes_.py b/alembic/versions/84e5d08260d6_replace_graph_ledger_table_with_nodes_.py index 919b5bfb4..14433e403 100644 --- a/alembic/versions/84e5d08260d6_replace_graph_ledger_table_with_nodes_.py +++ b/alembic/versions/84e5d08260d6_replace_graph_ledger_table_with_nodes_.py @@ -27,9 +27,6 @@ def upgrade() -> None: table_names = inspector.get_table_names() - if "graph_relationship_ledger" in table_names: - op.drop_table("graph_relationship_ledger") - if "nodes" not in table_names: op.create_table( "nodes", @@ -84,37 +81,3 @@ def downgrade() -> None: if "edges" in table_names: op.drop_table("edges") - - if "graph_relationship_ledger" not in table_names: - op.create_table( - "graph_relationship_ledger", - sa.Column( - "id", - sa.UUID, - primary_key=True, - default=lambda: uuid5(NAMESPACE_OID, f"{datetime.now(timezone.utc).timestamp()}"), - ), - sa.Column("source_node_id", sa.UUID, nullable=False), - sa.Column("destination_node_id", sa.UUID, nullable=False), - sa.Column("creator_function", sa.String(), nullable=False), - sa.Column("node_label", sa.String(), nullable=False), - sa.Column( - "created_at", sa.DateTime(timezone=True), default=lambda: datetime.now(timezone.utc) - ), - sa.Column( - "deleted_at", sa.DateTime(timezone=True), default=lambda: datetime.now(timezone.utc) - ), - sa.Column("user_id", sa.UUID, nullable=False), - ) - - op.create_index("idx_graph_relationship_id", "graph_relationship_ledger", ["id"]) - op.create_index( - "idx_graph_relationship_ledger_source_node_id", - "graph_relationship_ledger", - ["source_node_id"], - ) - op.create_index( - "idx_graph_relationship_ledger_destination_node_id", - "graph_relationship_ledger", - ["destination_node_id"], - ) diff --git a/cognee/tests/test_delete_default_graph.py b/cognee/tests/test_delete_default_graph.py index 1481c09d3..a35b8a640 100644 --- a/cognee/tests/test_delete_default_graph.py +++ b/cognee/tests/test_delete_default_graph.py @@ -33,14 +33,6 @@ async def main(mock_create_structured_output: AsyncMock): await cognee.prune.prune_system(metadata=True) await setup() - vector_engine = get_vector_engine() - - assert not await vector_engine.has_collection("EdgeType_relationship_name") - assert not await vector_engine.has_collection("Entity_name") - assert not await vector_engine.has_collection("DocumentChunk_text") - assert not await vector_engine.has_collection("TextSummary_text") - assert not await vector_engine.has_collection("TextDocument_text") - def mock_llm_output(text_input: str, system_prompt: str, response_model): if text_input == "test": # LLM connection test return "test" @@ -115,6 +107,14 @@ async def main(mock_create_structured_output: AsyncMock): mock_create_structured_output.side_effect = mock_llm_output + vector_engine = get_vector_engine() + + assert not await vector_engine.has_collection("EdgeType_relationship_name") + assert not await vector_engine.has_collection("Entity_name") + assert not await vector_engine.has_collection("DocumentChunk_text") + assert not await vector_engine.has_collection("TextSummary_text") + assert not await vector_engine.has_collection("TextDocument_text") + add_john_result = await cognee.add( "John works for Apple. He is also affiliated with a non-profit organization called 'Food for Hungry'" ) @@ -164,6 +164,8 @@ async def main(mock_create_structured_output: AsyncMock): after_delete_nodes_by_vector_collection[collection_name] = [] after_delete_nodes_by_vector_collection[collection_name].append(node) + vector_engine = get_vector_engine() + removed_node_ids = initial_node_ids - after_first_delete_node_ids for collection_name, initial_nodes in initial_nodes_by_vector_collection.items(): diff --git a/cognee/tests/test_delete_default_graph_non_mocked.py b/cognee/tests/test_delete_default_graph_non_mocked.py index b2d167a70..ce5087347 100644 --- a/cognee/tests/test_delete_default_graph_non_mocked.py +++ b/cognee/tests/test_delete_default_graph_non_mocked.py @@ -27,14 +27,6 @@ async def main(): await cognee.prune.prune_system(metadata=True) await setup() - vector_engine = get_vector_engine() - - assert not await vector_engine.has_collection("EdgeType_relationship_name") - assert not await vector_engine.has_collection("Entity_name") - assert not await vector_engine.has_collection("DocumentChunk_text") - assert not await vector_engine.has_collection("TextSummary_text") - assert not await vector_engine.has_collection("TextDocument_text") - add_result = await cognee.add( "John works for Apple. He is also affiliated with a non-profit organization called 'Food for Hungry'" ) @@ -45,6 +37,14 @@ async def main(): ) maries_data_id = add_result.data_ingestion_info[0]["data_id"] + vector_engine = get_vector_engine() + + assert not await vector_engine.has_collection("EdgeType_relationship_name") + assert not await vector_engine.has_collection("Entity_name") + assert not await vector_engine.has_collection("DocumentChunk_text") + assert not await vector_engine.has_collection("TextSummary_text") + assert not await vector_engine.has_collection("TextDocument_text") + cognify_result: dict = await cognee.cognify() dataset_id = list(cognify_result.keys())[0] diff --git a/cognee/tests/test_delete_default_graph_with_legacy_data_1.py b/cognee/tests/test_delete_default_graph_with_legacy_data_1.py index dc331199b..fa80773cb 100644 --- a/cognee/tests/test_delete_default_graph_with_legacy_data_1.py +++ b/cognee/tests/test_delete_default_graph_with_legacy_data_1.py @@ -147,6 +147,8 @@ def get_nodes_and_edges(): @pytest.mark.asyncio @patch.object(LLMGateway, "acreate_structured_output", new_callable=AsyncMock) async def main(mock_create_structured_output: AsyncMock): + os.environ["ENABLE_BACKEND_ACCESS_CONTROL"] = "False" + data_directory_path = os.path.join( pathlib.Path(__file__).parent, ".data_storage/test_delete_default_graph_with_legacy_graph_1" ) @@ -172,43 +174,7 @@ async def main(mock_create_structured_output: AsyncMock): user = await get_default_user() - graph_engine = await get_graph_engine() - old_nodes, old_edges = get_nodes_and_edges() - old_document = old_nodes[0] - - await graph_engine.add_nodes(old_nodes) - await graph_engine.add_edges(old_edges) - - await index_data_points(old_nodes) - await index_graph_edges(old_edges) - - await record_data_in_legacy_ledger(old_nodes, old_edges, user) - - db_engine = get_relational_engine() - - dataset = await create_authorized_dataset("main_dataset", user) - - async with db_engine.get_async_session() as session: - old_data = Data( - id=old_document.id, - name=old_document.name, - extension="txt", - raw_data_location=old_document.raw_data_location, - external_metadata=old_document.external_metadata, - mime_type=old_document.mime_type, - owner_id=user.id, - pipeline_status={ - "cognify_pipeline": { - str(dataset.id): DataItemStatus.DATA_ITEM_PROCESSING_COMPLETED, - } - }, - ) - session.add(old_data) - - dataset.data.append(old_data) - session.add(dataset) - - await session.commit() + old_nodes, old_edges = await add_mocked_legacy_data(user) def mock_llm_output(text_input: str, system_prompt: str, response_model): if text_input == "test": # LLM connection test @@ -332,6 +298,8 @@ async def main(mock_create_structured_output: AsyncMock): after_delete_nodes_by_vector_collection[collection_name] = [] after_delete_nodes_by_vector_collection[collection_name].append(node) + vector_engine = get_vector_engine() + removed_node_ids = initial_node_ids - after_first_delete_node_ids for collection_name, initial_nodes in initial_nodes_by_vector_collection.items(): @@ -366,6 +334,48 @@ async def main(mock_create_structured_output: AsyncMock): assert len(vector_items) == len(query_edge_ids), "Vector items are not deleted." +async def add_mocked_legacy_data(user): + graph_engine = await get_graph_engine() + old_nodes, old_edges = get_nodes_and_edges() + old_document = old_nodes[0] + + await graph_engine.add_nodes(old_nodes) + await graph_engine.add_edges(old_edges) + + await index_data_points(old_nodes) + await index_graph_edges(old_edges) + + await record_data_in_legacy_ledger(old_nodes, old_edges, user) + + db_engine = get_relational_engine() + + dataset = await create_authorized_dataset("main_dataset", user) + + async with db_engine.get_async_session() as session: + old_data = Data( + id=old_document.id, + name=old_document.name, + extension="txt", + raw_data_location=old_document.raw_data_location, + external_metadata=old_document.external_metadata, + mime_type=old_document.mime_type, + owner_id=user.id, + pipeline_status={ + "cognify_pipeline": { + str(dataset.id): DataItemStatus.DATA_ITEM_PROCESSING_COMPLETED, + } + }, + ) + session.add(old_data) + + dataset.data.append(old_data) + session.add(dataset) + + await session.commit() + + return old_nodes, old_edges + + if __name__ == "__main__": import asyncio diff --git a/cognee/tests/test_delete_default_graph_with_legacy_data_2.py b/cognee/tests/test_delete_default_graph_with_legacy_data_2.py index 082d250f5..51c962768 100644 --- a/cognee/tests/test_delete_default_graph_with_legacy_data_2.py +++ b/cognee/tests/test_delete_default_graph_with_legacy_data_2.py @@ -146,6 +146,8 @@ def get_nodes_and_edges(): @pytest.mark.asyncio @patch.object(LLMGateway, "acreate_structured_output", new_callable=AsyncMock) async def main(mock_create_structured_output: AsyncMock): + os.environ["ENABLE_BACKEND_ACCESS_CONTROL"] = "False" + data_directory_path = os.path.join( pathlib.Path(__file__).parent, ".data_storage/test_delete_default_graph_with_legacy_graph_2" ) @@ -171,43 +173,7 @@ async def main(mock_create_structured_output: AsyncMock): user = await get_default_user() - graph_engine = await get_graph_engine() - old_nodes, old_edges = get_nodes_and_edges() - old_document = old_nodes[0] - - await graph_engine.add_nodes(old_nodes) - await graph_engine.add_edges(old_edges) - - await index_data_points(old_nodes) - await index_graph_edges(old_edges) - - await record_data_in_legacy_ledger(old_nodes, old_edges, user) - - db_engine = get_relational_engine() - - dataset = await create_authorized_dataset("main_dataset", user) - - async with db_engine.get_async_session() as session: - old_data = Data( - id=old_document.id, - name=old_document.name, - extension="txt", - raw_data_location=old_document.raw_data_location, - external_metadata=old_document.external_metadata, - mime_type=old_document.mime_type, - owner_id=user.id, - pipeline_status={ - "cognify_pipeline": { - str(dataset.id): DataItemStatus.DATA_ITEM_PROCESSING_COMPLETED, - } - }, - ) - session.add(old_data) - - dataset.data.append(old_data) - session.add(dataset) - - await session.commit() + old_document, old_nodes, old_edges = await add_mocked_legacy_data(user) def mock_llm_output(text_input: str, system_prompt: str, response_model): if text_input == "test": # LLM connection test @@ -328,6 +294,8 @@ async def main(mock_create_structured_output: AsyncMock): after_delete_nodes_by_vector_collection[collection_name] = [] after_delete_nodes_by_vector_collection[collection_name].append(node) + vector_engine = get_vector_engine() + removed_node_ids = initial_node_ids - after_first_delete_node_ids for collection_name, initial_nodes in initial_nodes_by_vector_collection.items(): @@ -363,6 +331,48 @@ async def main(mock_create_structured_output: AsyncMock): assert len(vector_items) == len(query_edge_ids), "Vector items are not deleted." +async def add_mocked_legacy_data(user): + graph_engine = await get_graph_engine() + old_nodes, old_edges = get_nodes_and_edges() + old_document = old_nodes[0] + + await graph_engine.add_nodes(old_nodes) + await graph_engine.add_edges(old_edges) + + await index_data_points(old_nodes) + await index_graph_edges(old_edges) + + await record_data_in_legacy_ledger(old_nodes, old_edges, user) + + db_engine = get_relational_engine() + + dataset = await create_authorized_dataset("main_dataset", user) + + async with db_engine.get_async_session() as session: + old_data = Data( + id=old_document.id, + name=old_document.name, + extension="txt", + raw_data_location=old_document.raw_data_location, + external_metadata=old_document.external_metadata, + mime_type=old_document.mime_type, + owner_id=user.id, + pipeline_status={ + "cognify_pipeline": { + str(dataset.id): DataItemStatus.DATA_ITEM_PROCESSING_COMPLETED, + } + }, + ) + session.add(old_data) + + dataset.data.append(old_data) + session.add(dataset) + + await session.commit() + + return old_document, old_nodes, old_edges + + if __name__ == "__main__": import asyncio