fix: legacy data deletion tests doesn't require auth

This commit is contained in:
Boris Arzentar 2025-11-10 11:04:48 +01:00
parent 183852b8b5
commit 4198ee0d8b
No known key found for this signature in database
GPG key ID: D5CC274C784807B7
5 changed files with 112 additions and 127 deletions

View file

@ -27,9 +27,6 @@ def upgrade() -> None:
table_names = inspector.get_table_names()
if "graph_relationship_ledger" in table_names:
op.drop_table("graph_relationship_ledger")
if "nodes" not in table_names:
op.create_table(
"nodes",
@ -84,37 +81,3 @@ def downgrade() -> None:
if "edges" in table_names:
op.drop_table("edges")
if "graph_relationship_ledger" not in table_names:
op.create_table(
"graph_relationship_ledger",
sa.Column(
"id",
sa.UUID,
primary_key=True,
default=lambda: uuid5(NAMESPACE_OID, f"{datetime.now(timezone.utc).timestamp()}"),
),
sa.Column("source_node_id", sa.UUID, nullable=False),
sa.Column("destination_node_id", sa.UUID, nullable=False),
sa.Column("creator_function", sa.String(), nullable=False),
sa.Column("node_label", sa.String(), nullable=False),
sa.Column(
"created_at", sa.DateTime(timezone=True), default=lambda: datetime.now(timezone.utc)
),
sa.Column(
"deleted_at", sa.DateTime(timezone=True), default=lambda: datetime.now(timezone.utc)
),
sa.Column("user_id", sa.UUID, nullable=False),
)
op.create_index("idx_graph_relationship_id", "graph_relationship_ledger", ["id"])
op.create_index(
"idx_graph_relationship_ledger_source_node_id",
"graph_relationship_ledger",
["source_node_id"],
)
op.create_index(
"idx_graph_relationship_ledger_destination_node_id",
"graph_relationship_ledger",
["destination_node_id"],
)

View file

@ -33,14 +33,6 @@ async def main(mock_create_structured_output: AsyncMock):
await cognee.prune.prune_system(metadata=True)
await setup()
vector_engine = get_vector_engine()
assert not await vector_engine.has_collection("EdgeType_relationship_name")
assert not await vector_engine.has_collection("Entity_name")
assert not await vector_engine.has_collection("DocumentChunk_text")
assert not await vector_engine.has_collection("TextSummary_text")
assert not await vector_engine.has_collection("TextDocument_text")
def mock_llm_output(text_input: str, system_prompt: str, response_model):
if text_input == "test": # LLM connection test
return "test"
@ -115,6 +107,14 @@ async def main(mock_create_structured_output: AsyncMock):
mock_create_structured_output.side_effect = mock_llm_output
vector_engine = get_vector_engine()
assert not await vector_engine.has_collection("EdgeType_relationship_name")
assert not await vector_engine.has_collection("Entity_name")
assert not await vector_engine.has_collection("DocumentChunk_text")
assert not await vector_engine.has_collection("TextSummary_text")
assert not await vector_engine.has_collection("TextDocument_text")
add_john_result = await cognee.add(
"John works for Apple. He is also affiliated with a non-profit organization called 'Food for Hungry'"
)
@ -164,6 +164,8 @@ async def main(mock_create_structured_output: AsyncMock):
after_delete_nodes_by_vector_collection[collection_name] = []
after_delete_nodes_by_vector_collection[collection_name].append(node)
vector_engine = get_vector_engine()
removed_node_ids = initial_node_ids - after_first_delete_node_ids
for collection_name, initial_nodes in initial_nodes_by_vector_collection.items():

View file

@ -27,14 +27,6 @@ async def main():
await cognee.prune.prune_system(metadata=True)
await setup()
vector_engine = get_vector_engine()
assert not await vector_engine.has_collection("EdgeType_relationship_name")
assert not await vector_engine.has_collection("Entity_name")
assert not await vector_engine.has_collection("DocumentChunk_text")
assert not await vector_engine.has_collection("TextSummary_text")
assert not await vector_engine.has_collection("TextDocument_text")
add_result = await cognee.add(
"John works for Apple. He is also affiliated with a non-profit organization called 'Food for Hungry'"
)
@ -45,6 +37,14 @@ async def main():
)
maries_data_id = add_result.data_ingestion_info[0]["data_id"]
vector_engine = get_vector_engine()
assert not await vector_engine.has_collection("EdgeType_relationship_name")
assert not await vector_engine.has_collection("Entity_name")
assert not await vector_engine.has_collection("DocumentChunk_text")
assert not await vector_engine.has_collection("TextSummary_text")
assert not await vector_engine.has_collection("TextDocument_text")
cognify_result: dict = await cognee.cognify()
dataset_id = list(cognify_result.keys())[0]

View file

@ -147,6 +147,8 @@ def get_nodes_and_edges():
@pytest.mark.asyncio
@patch.object(LLMGateway, "acreate_structured_output", new_callable=AsyncMock)
async def main(mock_create_structured_output: AsyncMock):
os.environ["ENABLE_BACKEND_ACCESS_CONTROL"] = "False"
data_directory_path = os.path.join(
pathlib.Path(__file__).parent, ".data_storage/test_delete_default_graph_with_legacy_graph_1"
)
@ -172,43 +174,7 @@ async def main(mock_create_structured_output: AsyncMock):
user = await get_default_user()
graph_engine = await get_graph_engine()
old_nodes, old_edges = get_nodes_and_edges()
old_document = old_nodes[0]
await graph_engine.add_nodes(old_nodes)
await graph_engine.add_edges(old_edges)
await index_data_points(old_nodes)
await index_graph_edges(old_edges)
await record_data_in_legacy_ledger(old_nodes, old_edges, user)
db_engine = get_relational_engine()
dataset = await create_authorized_dataset("main_dataset", user)
async with db_engine.get_async_session() as session:
old_data = Data(
id=old_document.id,
name=old_document.name,
extension="txt",
raw_data_location=old_document.raw_data_location,
external_metadata=old_document.external_metadata,
mime_type=old_document.mime_type,
owner_id=user.id,
pipeline_status={
"cognify_pipeline": {
str(dataset.id): DataItemStatus.DATA_ITEM_PROCESSING_COMPLETED,
}
},
)
session.add(old_data)
dataset.data.append(old_data)
session.add(dataset)
await session.commit()
old_nodes, old_edges = await add_mocked_legacy_data(user)
def mock_llm_output(text_input: str, system_prompt: str, response_model):
if text_input == "test": # LLM connection test
@ -332,6 +298,8 @@ async def main(mock_create_structured_output: AsyncMock):
after_delete_nodes_by_vector_collection[collection_name] = []
after_delete_nodes_by_vector_collection[collection_name].append(node)
vector_engine = get_vector_engine()
removed_node_ids = initial_node_ids - after_first_delete_node_ids
for collection_name, initial_nodes in initial_nodes_by_vector_collection.items():
@ -366,6 +334,48 @@ async def main(mock_create_structured_output: AsyncMock):
assert len(vector_items) == len(query_edge_ids), "Vector items are not deleted."
async def add_mocked_legacy_data(user):
graph_engine = await get_graph_engine()
old_nodes, old_edges = get_nodes_and_edges()
old_document = old_nodes[0]
await graph_engine.add_nodes(old_nodes)
await graph_engine.add_edges(old_edges)
await index_data_points(old_nodes)
await index_graph_edges(old_edges)
await record_data_in_legacy_ledger(old_nodes, old_edges, user)
db_engine = get_relational_engine()
dataset = await create_authorized_dataset("main_dataset", user)
async with db_engine.get_async_session() as session:
old_data = Data(
id=old_document.id,
name=old_document.name,
extension="txt",
raw_data_location=old_document.raw_data_location,
external_metadata=old_document.external_metadata,
mime_type=old_document.mime_type,
owner_id=user.id,
pipeline_status={
"cognify_pipeline": {
str(dataset.id): DataItemStatus.DATA_ITEM_PROCESSING_COMPLETED,
}
},
)
session.add(old_data)
dataset.data.append(old_data)
session.add(dataset)
await session.commit()
return old_nodes, old_edges
if __name__ == "__main__":
import asyncio

View file

@ -146,6 +146,8 @@ def get_nodes_and_edges():
@pytest.mark.asyncio
@patch.object(LLMGateway, "acreate_structured_output", new_callable=AsyncMock)
async def main(mock_create_structured_output: AsyncMock):
os.environ["ENABLE_BACKEND_ACCESS_CONTROL"] = "False"
data_directory_path = os.path.join(
pathlib.Path(__file__).parent, ".data_storage/test_delete_default_graph_with_legacy_graph_2"
)
@ -171,43 +173,7 @@ async def main(mock_create_structured_output: AsyncMock):
user = await get_default_user()
graph_engine = await get_graph_engine()
old_nodes, old_edges = get_nodes_and_edges()
old_document = old_nodes[0]
await graph_engine.add_nodes(old_nodes)
await graph_engine.add_edges(old_edges)
await index_data_points(old_nodes)
await index_graph_edges(old_edges)
await record_data_in_legacy_ledger(old_nodes, old_edges, user)
db_engine = get_relational_engine()
dataset = await create_authorized_dataset("main_dataset", user)
async with db_engine.get_async_session() as session:
old_data = Data(
id=old_document.id,
name=old_document.name,
extension="txt",
raw_data_location=old_document.raw_data_location,
external_metadata=old_document.external_metadata,
mime_type=old_document.mime_type,
owner_id=user.id,
pipeline_status={
"cognify_pipeline": {
str(dataset.id): DataItemStatus.DATA_ITEM_PROCESSING_COMPLETED,
}
},
)
session.add(old_data)
dataset.data.append(old_data)
session.add(dataset)
await session.commit()
old_document, old_nodes, old_edges = await add_mocked_legacy_data(user)
def mock_llm_output(text_input: str, system_prompt: str, response_model):
if text_input == "test": # LLM connection test
@ -328,6 +294,8 @@ async def main(mock_create_structured_output: AsyncMock):
after_delete_nodes_by_vector_collection[collection_name] = []
after_delete_nodes_by_vector_collection[collection_name].append(node)
vector_engine = get_vector_engine()
removed_node_ids = initial_node_ids - after_first_delete_node_ids
for collection_name, initial_nodes in initial_nodes_by_vector_collection.items():
@ -363,6 +331,48 @@ async def main(mock_create_structured_output: AsyncMock):
assert len(vector_items) == len(query_edge_ids), "Vector items are not deleted."
async def add_mocked_legacy_data(user):
graph_engine = await get_graph_engine()
old_nodes, old_edges = get_nodes_and_edges()
old_document = old_nodes[0]
await graph_engine.add_nodes(old_nodes)
await graph_engine.add_edges(old_edges)
await index_data_points(old_nodes)
await index_graph_edges(old_edges)
await record_data_in_legacy_ledger(old_nodes, old_edges, user)
db_engine = get_relational_engine()
dataset = await create_authorized_dataset("main_dataset", user)
async with db_engine.get_async_session() as session:
old_data = Data(
id=old_document.id,
name=old_document.name,
extension="txt",
raw_data_location=old_document.raw_data_location,
external_metadata=old_document.external_metadata,
mime_type=old_document.mime_type,
owner_id=user.id,
pipeline_status={
"cognify_pipeline": {
str(dataset.id): DataItemStatus.DATA_ITEM_PROCESSING_COMPLETED,
}
},
)
session.add(old_data)
dataset.data.append(old_data)
session.add(dataset)
await session.commit()
return old_document, old_nodes, old_edges
if __name__ == "__main__":
import asyncio