fix: use new delete in API and cli
This commit is contained in:
parent
c438be70c3
commit
dc64dd5ffa
33 changed files with 253 additions and 988 deletions
|
|
@ -196,7 +196,7 @@ class CogneeClient:
|
|||
)
|
||||
return results
|
||||
|
||||
async def delete(self, data_id: UUID, dataset_id: UUID, mode: str = "soft") -> Dict[str, Any]:
|
||||
async def delete(self, data_id: UUID, dataset_id: UUID) -> Dict[str, Any]:
|
||||
"""
|
||||
Delete data from a dataset.
|
||||
|
||||
|
|
@ -206,8 +206,6 @@ class CogneeClient:
|
|||
ID of the data to delete
|
||||
dataset_id : UUID
|
||||
ID of the dataset containing the data
|
||||
mode : str
|
||||
Deletion mode ("soft" or "hard")
|
||||
|
||||
Returns
|
||||
-------
|
||||
|
|
@ -216,12 +214,9 @@ class CogneeClient:
|
|||
"""
|
||||
if self.use_api:
|
||||
# API mode: Make HTTP request
|
||||
endpoint = f"{self.api_url}/api/v1/delete"
|
||||
params = {"data_id": str(data_id), "dataset_id": str(dataset_id), "mode": mode}
|
||||
endpoint = f"{self.api_url}/api/v1/datasets/{str(dataset_id)}/data/{str(data_id)}"
|
||||
|
||||
response = await self.client.delete(
|
||||
endpoint, params=params, headers=self._get_headers()
|
||||
)
|
||||
response = await self.client.delete(endpoint, headers=self._get_headers())
|
||||
response.raise_for_status()
|
||||
return response.json()
|
||||
else:
|
||||
|
|
@ -230,10 +225,11 @@ class CogneeClient:
|
|||
|
||||
with redirect_stdout(sys.stderr):
|
||||
user = await get_default_user()
|
||||
result = await self.cognee.delete(
|
||||
data_id=data_id, dataset_id=dataset_id, mode=mode, user=user
|
||||
await self.cognee.datasets.delete_data(
|
||||
dataset_id=dataset_id,
|
||||
data_id=data_id,
|
||||
user_id=user.id,
|
||||
)
|
||||
return result
|
||||
|
||||
async def prune_data(self) -> Dict[str, Any]:
|
||||
"""
|
||||
|
|
|
|||
|
|
@ -16,7 +16,6 @@ from cognee.shared.logging_utils import setup_logging
|
|||
logger = setup_logging()
|
||||
|
||||
from .api.v1.add import add
|
||||
from .api.v1.delete import delete
|
||||
from .api.v1.cognify import cognify
|
||||
from .modules.memify import memify
|
||||
from .api.v1.update import update
|
||||
|
|
|
|||
|
|
@ -25,7 +25,6 @@ from cognee.api.v1.cognify.routers import get_code_pipeline_router, get_cognify_
|
|||
from cognee.api.v1.search.routers import get_search_router
|
||||
from cognee.api.v1.memify.routers import get_memify_router
|
||||
from cognee.api.v1.add.routers import get_add_router
|
||||
from cognee.api.v1.delete.routers import get_delete_router
|
||||
from cognee.api.v1.responses.routers import get_responses_router
|
||||
from cognee.api.v1.sync.routers import get_sync_router
|
||||
from cognee.api.v1.update.routers import get_update_router
|
||||
|
|
@ -262,8 +261,6 @@ app.include_router(get_settings_router(), prefix="/api/v1/settings", tags=["sett
|
|||
|
||||
app.include_router(get_visualize_router(), prefix="/api/v1/visualize", tags=["visualize"])
|
||||
|
||||
app.include_router(get_delete_router(), prefix="/api/v1/delete", tags=["delete"])
|
||||
|
||||
app.include_router(get_update_router(), prefix="/api/v1/update", tags=["update"])
|
||||
|
||||
app.include_router(get_responses_router(), prefix="/api/v1/responses", tags=["responses"])
|
||||
|
|
|
|||
|
|
@ -2,6 +2,9 @@ import os
|
|||
import pathlib
|
||||
import asyncio
|
||||
from typing import Optional
|
||||
from uuid import UUID, uuid4
|
||||
|
||||
from pydantic import BaseModel
|
||||
from cognee.shared.logging_utils import get_logger, setup_logging
|
||||
from cognee.modules.observability.get_observe import get_observe
|
||||
|
||||
|
|
@ -83,15 +86,19 @@ async def run_code_graph_pipeline(
|
|||
async with db_engine.get_async_session() as session:
|
||||
dataset = await create_dataset(dataset_name, user, session)
|
||||
|
||||
class RepoData(BaseModel):
|
||||
id: UUID
|
||||
repo_path: str
|
||||
|
||||
data = RepoData(id=uuid4(), repo_path=repo_path)
|
||||
|
||||
if include_docs:
|
||||
non_code_pipeline_run = run_tasks(
|
||||
non_code_tasks, dataset, repo_path, user, "cognify_pipeline"
|
||||
)
|
||||
non_code_pipeline_run = run_tasks(non_code_tasks, dataset, data, user, "cognify_pipeline")
|
||||
async for run_status in non_code_pipeline_run:
|
||||
yield run_status
|
||||
|
||||
async for run_status in run_tasks(
|
||||
tasks, dataset, repo_path, user, "cognify_code_pipeline", incremental_loading=False
|
||||
tasks, dataset, data, user, "cognify_code_pipeline", incremental_loading=False
|
||||
):
|
||||
yield run_status
|
||||
|
||||
|
|
|
|||
|
|
@ -0,0 +1 @@
|
|||
from .datasets import datasets
|
||||
|
|
@ -1,4 +1,8 @@
|
|||
from typing import Optional
|
||||
from uuid import UUID
|
||||
from cognee.modules.data.exceptions.exceptions import UnauthorizedDataAccessError
|
||||
from cognee.modules.data.methods import get_datasets
|
||||
from cognee.modules.graph.methods import delete_data_nodes_and_edges, delete_dataset_nodes_and_edges
|
||||
from cognee.modules.users.methods import get_default_user
|
||||
from cognee.modules.ingestion import discover_directory_datasets
|
||||
from cognee.modules.pipelines.operations.get_pipeline_status import get_pipeline_status
|
||||
|
|
@ -31,10 +35,53 @@ class datasets:
|
|||
return await get_pipeline_status(dataset_ids, pipeline_name="cognify_pipeline")
|
||||
|
||||
@staticmethod
|
||||
async def delete_dataset(dataset_id: str):
|
||||
async def delete_dataset(dataset_id: UUID, user_id: Optional[UUID] = None):
|
||||
from cognee.modules.data.methods import get_dataset, delete_dataset
|
||||
|
||||
user = await get_default_user()
|
||||
if not user_id:
|
||||
user = await get_default_user()
|
||||
user_id = user.id
|
||||
|
||||
dataset = await get_dataset(user.id, dataset_id)
|
||||
|
||||
if not dataset:
|
||||
raise UnauthorizedDataAccessError(f"Dataset {dataset_id} not accessible.")
|
||||
|
||||
await delete_dataset_nodes_and_edges(dataset_id)
|
||||
|
||||
return await delete_dataset(dataset)
|
||||
|
||||
@staticmethod
|
||||
async def delete_data(dataset_id: UUID, data_id: UUID, user_id: Optional[UUID] = None):
|
||||
from cognee.modules.data.methods import delete_data, get_data
|
||||
|
||||
if not user_id:
|
||||
user = await get_default_user()
|
||||
user_id = user.id
|
||||
|
||||
data = await get_data(user_id, data_id)
|
||||
|
||||
if not data:
|
||||
# If data is not found in the system, user is using a custom graph model.
|
||||
await delete_data_nodes_and_edges(dataset_id, data_id)
|
||||
return
|
||||
|
||||
data_datasets = data.datasets
|
||||
|
||||
if not data or not any([dataset.id == dataset_id for dataset in data_datasets]):
|
||||
raise UnauthorizedDataAccessError(f"Data {data_id} not accessible.")
|
||||
|
||||
await delete_data_nodes_and_edges(dataset_id, data.id)
|
||||
|
||||
await delete_data(data)
|
||||
|
||||
@staticmethod
|
||||
async def delete_all(user_id: Optional[UUID] = None):
|
||||
if not user_id:
|
||||
user = await get_default_user()
|
||||
user_id = user.id
|
||||
|
||||
user_datasets = await get_datasets(user_id)
|
||||
|
||||
for dataset in user_datasets:
|
||||
await datasets.delete_dataset(dataset.id, user_id)
|
||||
|
|
|
|||
|
|
@ -9,12 +9,13 @@ from fastapi.encoders import jsonable_encoder
|
|||
from fastapi import HTTPException, Query, Depends
|
||||
from fastapi.responses import JSONResponse, FileResponse
|
||||
|
||||
from cognee import datasets
|
||||
from cognee.api.DTO import InDTO, OutDTO
|
||||
from cognee.infrastructure.databases.relational import get_relational_engine
|
||||
from cognee.modules.data.methods import get_authorized_existing_datasets
|
||||
from cognee.modules.data.methods import create_dataset, get_datasets_by_name
|
||||
from cognee.shared.logging_utils import get_logger
|
||||
from cognee.api.v1.exceptions import DataNotFoundError, DatasetNotFoundError
|
||||
from cognee.api.v1.exceptions import DataNotFoundError
|
||||
from cognee.modules.users.models import User
|
||||
from cognee.modules.users.methods import get_authenticated_user
|
||||
from cognee.modules.users.permissions.methods import (
|
||||
|
|
@ -175,6 +176,20 @@ def get_datasets_router() -> APIRouter:
|
|||
detail=f"Error creating dataset: {str(error)}",
|
||||
) from error
|
||||
|
||||
@router.delete("")
|
||||
async def delete_all(user: User = Depends(get_authenticated_user)):
|
||||
"""
|
||||
Delete all user's data.
|
||||
|
||||
This endpoint permanently deletes all datasets that user created and all its associated data.
|
||||
The user must have delete permissions on the dataset to perform this operation.
|
||||
|
||||
## Response
|
||||
No content returned on successful deletion.
|
||||
If no datasets exist for the users, nothing happens.
|
||||
"""
|
||||
await datasets.delete_all(user.id)
|
||||
|
||||
@router.delete(
|
||||
"/{dataset_id}", response_model=None, responses={404: {"model": ErrorResponseDTO}}
|
||||
)
|
||||
|
|
@ -204,14 +219,7 @@ def get_datasets_router() -> APIRouter:
|
|||
},
|
||||
)
|
||||
|
||||
from cognee.modules.data.methods import get_dataset, delete_dataset
|
||||
|
||||
dataset = await get_dataset(user.id, dataset_id)
|
||||
|
||||
if dataset is None:
|
||||
raise DatasetNotFoundError(message=f"Dataset ({str(dataset_id)}) not found.")
|
||||
|
||||
await delete_dataset(dataset)
|
||||
await datasets.delete_dataset(dataset_id, user.id)
|
||||
|
||||
@router.delete(
|
||||
"/{dataset_id}/data/{data_id}",
|
||||
|
|
@ -249,21 +257,7 @@ def get_datasets_router() -> APIRouter:
|
|||
},
|
||||
)
|
||||
|
||||
from cognee.modules.data.methods import get_data, delete_data
|
||||
from cognee.modules.data.methods import get_dataset
|
||||
|
||||
# Check if user has permission to access dataset and data by trying to get the dataset
|
||||
dataset = await get_dataset(user.id, dataset_id)
|
||||
|
||||
if dataset is None:
|
||||
raise DatasetNotFoundError(message=f"Dataset ({str(dataset_id)}) not found.")
|
||||
|
||||
data = await get_data(user.id, data_id)
|
||||
|
||||
if data is None:
|
||||
raise DataNotFoundError(message=f"Data ({str(data_id)}) not found.")
|
||||
|
||||
await delete_data(data)
|
||||
await datasets.delete_data(dataset_id, data_id, user.id)
|
||||
|
||||
@router.get("/{dataset_id}/graph", response_model=GraphDTO)
|
||||
async def get_dataset_graph(dataset_id: UUID, user: User = Depends(get_authenticated_user)):
|
||||
|
|
|
|||
|
|
@ -1 +0,0 @@
|
|||
from .delete import delete
|
||||
|
|
@ -1,269 +0,0 @@
|
|||
from uuid import UUID
|
||||
from sqlalchemy import select
|
||||
from sqlalchemy.sql import delete as sql_delete
|
||||
|
||||
from cognee.infrastructure.engine import DataPoint
|
||||
from cognee.infrastructure.databases.graph import get_graph_engine
|
||||
|
||||
from cognee.modules.users.models import User
|
||||
|
||||
from cognee.infrastructure.databases.vector import get_vector_engine
|
||||
from cognee.infrastructure.databases.relational import get_relational_engine
|
||||
from cognee.shared.logging_utils import get_logger
|
||||
from cognee.modules.data.models import Data, DatasetData, Dataset
|
||||
from cognee.modules.graph.utils.convert_node_to_data_point import get_all_subclasses
|
||||
from cognee.modules.users.methods import get_default_user
|
||||
from cognee.modules.data.methods import get_authorized_existing_datasets
|
||||
from cognee.context_global_variables import set_database_global_context_variables
|
||||
|
||||
from cognee.api.v1.exceptions import (
|
||||
DocumentNotFoundError,
|
||||
DatasetNotFoundError,
|
||||
DocumentSubgraphNotFoundError,
|
||||
)
|
||||
|
||||
logger = get_logger()
|
||||
|
||||
|
||||
async def delete(
|
||||
data_id: UUID,
|
||||
dataset_id: UUID,
|
||||
mode: str = "soft",
|
||||
user: User = None,
|
||||
):
|
||||
"""Delete data by its ID from the specified dataset.
|
||||
|
||||
Args:
|
||||
data_id: The UUID of the data to delete
|
||||
dataset_id: The UUID of the dataset containing the data
|
||||
mode: "soft" (default) or "hard" - hard mode also deletes degree-one entity nodes
|
||||
user: User doing the operation, if none default user will be used.
|
||||
|
||||
Returns:
|
||||
Dict with deletion results
|
||||
|
||||
Raises:
|
||||
DocumentNotFoundError: If data is not found
|
||||
DatasetNotFoundError: If dataset is not found
|
||||
PermissionDeniedError: If user doesn't have delete permission on dataset
|
||||
"""
|
||||
if user is None:
|
||||
user = await get_default_user()
|
||||
|
||||
# Verify user has delete permission on the dataset
|
||||
dataset_list = await get_authorized_existing_datasets([dataset_id], "delete", user)
|
||||
|
||||
if not dataset_list:
|
||||
raise DatasetNotFoundError(f"Dataset not found or access denied: {dataset_id}")
|
||||
|
||||
dataset = dataset_list[0]
|
||||
|
||||
# Will only be used if ENABLE_BACKEND_ACCESS_CONTROL is set to True
|
||||
|
||||
await set_database_global_context_variables(dataset.id, dataset.owner_id)
|
||||
|
||||
# Get the data record and verify it exists and belongs to the dataset
|
||||
db_engine = get_relational_engine()
|
||||
async with db_engine.get_async_session() as session:
|
||||
# Check if data exists
|
||||
data_point = (
|
||||
await session.execute(select(Data).filter(Data.id == data_id))
|
||||
).scalar_one_or_none()
|
||||
|
||||
if data_point is None:
|
||||
raise DocumentNotFoundError(f"Data not found with ID: {data_id}")
|
||||
|
||||
# Check if data belongs to the specified dataset
|
||||
dataset_data_link = (
|
||||
await session.execute(
|
||||
select(DatasetData).filter(
|
||||
DatasetData.data_id == data_id, DatasetData.dataset_id == dataset_id
|
||||
)
|
||||
)
|
||||
).scalar_one_or_none()
|
||||
|
||||
if dataset_data_link is None:
|
||||
raise DocumentNotFoundError(f"Data {data_id} not found in dataset {dataset_id}")
|
||||
|
||||
# Get the content hash for deletion
|
||||
data_id = str(data_point.id)
|
||||
|
||||
# Use the existing comprehensive deletion logic
|
||||
return await delete_single_document(data_id, dataset.id, mode)
|
||||
|
||||
|
||||
async def delete_single_document(data_id: str, dataset_id: UUID = None, mode: str = "soft"):
|
||||
"""Delete a single document by its content hash."""
|
||||
|
||||
# Delete from graph database
|
||||
deletion_result = await delete_document_subgraph(data_id, mode)
|
||||
|
||||
logger.info(f"Deletion result: {deletion_result}")
|
||||
|
||||
# Get the deleted node IDs and convert to UUID
|
||||
deleted_node_ids = []
|
||||
for node_id in deletion_result["deleted_node_ids"]:
|
||||
try:
|
||||
# Handle both string and UUID formats
|
||||
if isinstance(node_id, str):
|
||||
# Remove any hyphens if present
|
||||
node_id = node_id.replace("-", "")
|
||||
deleted_node_ids.append(UUID(node_id))
|
||||
else:
|
||||
deleted_node_ids.append(node_id)
|
||||
except Exception as e:
|
||||
logger.error(f"Error converting node ID {node_id} to UUID: {e}")
|
||||
continue
|
||||
|
||||
# Delete from vector database
|
||||
vector_engine = get_vector_engine()
|
||||
|
||||
# Determine vector collections dynamically
|
||||
subclasses = get_all_subclasses(DataPoint)
|
||||
vector_collections = []
|
||||
|
||||
for subclass in subclasses:
|
||||
index_fields = subclass.model_fields["metadata"].default.get("index_fields", [])
|
||||
for field_name in index_fields:
|
||||
vector_collections.append(f"{subclass.__name__}_{field_name}")
|
||||
|
||||
# If no collections found, use default collections
|
||||
if not vector_collections:
|
||||
vector_collections = [
|
||||
"DocumentChunk_text",
|
||||
"EdgeType_relationship_name",
|
||||
"EntityType_name",
|
||||
"Entity_name",
|
||||
"TextDocument_name",
|
||||
"TextSummary_text",
|
||||
]
|
||||
|
||||
# Delete records from each vector collection that exists
|
||||
for collection in vector_collections:
|
||||
if await vector_engine.has_collection(collection):
|
||||
await vector_engine.delete_data_points(
|
||||
collection, [str(node_id) for node_id in deleted_node_ids]
|
||||
)
|
||||
|
||||
# Delete from relational database
|
||||
db_engine = get_relational_engine()
|
||||
async with db_engine.get_async_session() as session:
|
||||
# Update graph_relationship_ledger with deleted_at timestamps
|
||||
from sqlalchemy import update, and_, or_
|
||||
from datetime import datetime
|
||||
from cognee.modules.data.models.graph_relationship_ledger import GraphRelationshipLedger
|
||||
|
||||
update_stmt = (
|
||||
update(GraphRelationshipLedger)
|
||||
.where(
|
||||
or_(
|
||||
GraphRelationshipLedger.source_node_id.in_(deleted_node_ids),
|
||||
GraphRelationshipLedger.destination_node_id.in_(deleted_node_ids),
|
||||
)
|
||||
)
|
||||
.values(deleted_at=datetime.now())
|
||||
)
|
||||
await session.execute(update_stmt)
|
||||
|
||||
# Get the data point
|
||||
data_point = (
|
||||
await session.execute(select(Data).filter(Data.id == UUID(data_id)))
|
||||
).scalar_one_or_none()
|
||||
|
||||
if data_point is None:
|
||||
raise DocumentNotFoundError(
|
||||
f"Document not found in relational DB with data id: {data_id}"
|
||||
)
|
||||
|
||||
doc_id = data_point.id
|
||||
|
||||
# Get the dataset
|
||||
dataset = (
|
||||
await session.execute(select(Dataset).filter(Dataset.id == dataset_id))
|
||||
).scalar_one_or_none()
|
||||
|
||||
if dataset is None:
|
||||
raise DatasetNotFoundError(f"Dataset not found: {dataset_id}")
|
||||
|
||||
# Delete from dataset_data table
|
||||
dataset_delete_stmt = sql_delete(DatasetData).where(
|
||||
DatasetData.data_id == doc_id, DatasetData.dataset_id == dataset.id
|
||||
)
|
||||
await session.execute(dataset_delete_stmt)
|
||||
|
||||
# Check if the document is in any other datasets
|
||||
remaining_datasets = (
|
||||
await session.execute(select(DatasetData).filter(DatasetData.data_id == doc_id))
|
||||
).scalar_one_or_none()
|
||||
|
||||
# If the document is not in any other datasets, delete it from the data table
|
||||
if remaining_datasets is None:
|
||||
data_delete_stmt = sql_delete(Data).where(Data.id == doc_id)
|
||||
await session.execute(data_delete_stmt)
|
||||
|
||||
await session.commit()
|
||||
|
||||
return {
|
||||
"status": "success",
|
||||
"message": "Document deleted from both graph and relational databases",
|
||||
"graph_deletions": deletion_result["deleted_counts"],
|
||||
"data_id": data_id,
|
||||
"dataset": dataset_id,
|
||||
"deleted_node_ids": [
|
||||
str(node_id) for node_id in deleted_node_ids
|
||||
], # Convert back to strings for response
|
||||
}
|
||||
|
||||
|
||||
async def delete_document_subgraph(document_id: str, mode: str = "soft"):
|
||||
"""Delete a document and all its related nodes in the correct order."""
|
||||
graph_db = await get_graph_engine()
|
||||
subgraph = await graph_db.get_document_subgraph(document_id)
|
||||
if not subgraph:
|
||||
raise DocumentSubgraphNotFoundError(f"Document not found with id: {document_id}")
|
||||
|
||||
# Delete in the correct order to maintain graph integrity
|
||||
deletion_order = [
|
||||
("orphan_entities", "orphaned entities"),
|
||||
("orphan_types", "orphaned entity types"),
|
||||
(
|
||||
"made_from_nodes",
|
||||
"made_from nodes",
|
||||
), # Move before chunks since summaries are connected to chunks
|
||||
("chunks", "document chunks"),
|
||||
("document", "document"),
|
||||
]
|
||||
|
||||
deleted_counts = {}
|
||||
deleted_node_ids = []
|
||||
for key, description in deletion_order:
|
||||
nodes = subgraph[key]
|
||||
if nodes:
|
||||
for node in nodes:
|
||||
node_id = node["id"]
|
||||
await graph_db.delete_node(node_id)
|
||||
deleted_node_ids.append(node_id)
|
||||
deleted_counts[description] = len(nodes)
|
||||
|
||||
# If hard mode, also delete degree-one nodes
|
||||
if mode == "hard":
|
||||
# Get and delete degree one entity nodes
|
||||
degree_one_entity_nodes = await graph_db.get_degree_one_nodes("Entity")
|
||||
for node in degree_one_entity_nodes:
|
||||
await graph_db.delete_node(node["id"])
|
||||
deleted_node_ids.append(node["id"])
|
||||
deleted_counts["degree_one_entities"] = deleted_counts.get("degree_one_entities", 0) + 1
|
||||
|
||||
# Get and delete degree one entity types
|
||||
degree_one_entity_types = await graph_db.get_degree_one_nodes("EntityType")
|
||||
for node in degree_one_entity_types:
|
||||
await graph_db.delete_node(node["id"])
|
||||
deleted_node_ids.append(node["id"])
|
||||
deleted_counts["degree_one_types"] = deleted_counts.get("degree_one_types", 0) + 1
|
||||
|
||||
return {
|
||||
"status": "success",
|
||||
"deleted_counts": deleted_counts,
|
||||
"document_id": document_id,
|
||||
"deleted_node_ids": deleted_node_ids,
|
||||
}
|
||||
|
|
@ -1 +0,0 @@
|
|||
from .get_delete_router import get_delete_router
|
||||
|
|
@ -1,60 +0,0 @@
|
|||
from fastapi import Depends
|
||||
from fastapi.responses import JSONResponse
|
||||
from fastapi import APIRouter
|
||||
from uuid import UUID
|
||||
from cognee.shared.logging_utils import get_logger
|
||||
from cognee.modules.users.models import User
|
||||
from cognee.modules.users.methods import get_authenticated_user
|
||||
from cognee.shared.utils import send_telemetry
|
||||
|
||||
logger = get_logger()
|
||||
|
||||
|
||||
def get_delete_router() -> APIRouter:
|
||||
router = APIRouter()
|
||||
|
||||
@router.delete("", response_model=None)
|
||||
async def delete(
|
||||
data_id: UUID,
|
||||
dataset_id: UUID,
|
||||
mode: str = "soft",
|
||||
user: User = Depends(get_authenticated_user),
|
||||
):
|
||||
"""Delete data by its ID from the specified dataset.
|
||||
|
||||
Args:
|
||||
data_id: The UUID of the data to delete
|
||||
dataset_id: The UUID of the dataset containing the data
|
||||
mode: "soft" (default) or "hard" - hard mode also deletes degree-one entity nodes
|
||||
user: Authenticated user
|
||||
|
||||
Returns:
|
||||
JSON response indicating success or failure
|
||||
|
||||
"""
|
||||
send_telemetry(
|
||||
"Delete API Endpoint Invoked",
|
||||
user.id,
|
||||
additional_properties={
|
||||
"endpoint": "DELETE /v1/delete",
|
||||
"dataset_id": str(dataset_id),
|
||||
"data_id": str(data_id),
|
||||
},
|
||||
)
|
||||
|
||||
from cognee.api.v1.delete import delete as cognee_delete
|
||||
|
||||
try:
|
||||
result = await cognee_delete(
|
||||
data_id=data_id,
|
||||
dataset_id=dataset_id,
|
||||
mode=mode,
|
||||
user=user,
|
||||
)
|
||||
return result
|
||||
|
||||
except Exception as error:
|
||||
logger.error(f"Error during deletion by data_id: {str(error)}")
|
||||
return JSONResponse(status_code=409, content={"error": str(error)})
|
||||
|
||||
return router
|
||||
|
|
@ -2,9 +2,9 @@ from uuid import UUID
|
|||
from typing import Union, BinaryIO, List, Optional
|
||||
|
||||
from cognee.modules.users.models import User
|
||||
from cognee.api.v1.delete import delete
|
||||
from cognee.api.v1.add import add
|
||||
from cognee.api.v1.cognify import cognify
|
||||
from cognee.api.v1.datasets import datasets
|
||||
|
||||
|
||||
async def update(
|
||||
|
|
@ -72,10 +72,10 @@ async def update(
|
|||
- Processing status and any errors
|
||||
- Execution timestamps and metadata
|
||||
"""
|
||||
await delete(
|
||||
data_id=data_id,
|
||||
await datasets.delete_data(
|
||||
dataset_id=dataset_id,
|
||||
user=user,
|
||||
data_id=data_id,
|
||||
user_id=user.id,
|
||||
)
|
||||
|
||||
await add(
|
||||
|
|
|
|||
|
|
@ -1,11 +1,11 @@
|
|||
import argparse
|
||||
import asyncio
|
||||
from typing import Optional
|
||||
|
||||
from cognee.cli.reference import SupportsCliCommand
|
||||
from cognee.cli import DEFAULT_DOCS_URL
|
||||
import cognee.cli.echo as fmt
|
||||
from cognee.cli.exceptions import CliCommandException, CliCommandInnerException
|
||||
from cognee.modules.data.methods import get_datasets_by_name
|
||||
from cognee.modules.data.methods.get_deletion_counts import get_deletion_counts
|
||||
|
||||
|
||||
|
|
@ -38,8 +38,10 @@ Be careful with deletion operations as they are irreversible.
|
|||
import cognee
|
||||
|
||||
# Validate arguments
|
||||
if not any([args.dataset_name, args.user_id, args.all]):
|
||||
fmt.error("Please specify what to delete: --dataset-name, --user-id, or --all")
|
||||
if not any([args.dataset_name, args.dataset_id, args.data_id, args.user_id, args.all]):
|
||||
fmt.error(
|
||||
"Please specify what to delete: --dataset-name, --dataset-id, --data-id, --user-id, or --all"
|
||||
)
|
||||
return
|
||||
|
||||
# If --force is used, skip the preview and go straight to deletion
|
||||
|
|
@ -93,12 +95,29 @@ Be careful with deletion operations as they are irreversible.
|
|||
# Run the async delete function
|
||||
async def run_delete():
|
||||
try:
|
||||
# NOTE: The underlying cognee.delete() function is currently not working as expected.
|
||||
# This is a separate bug that this preview feature helps to expose.
|
||||
if args.all:
|
||||
await cognee.delete(dataset_name=None, user_id=args.user_id)
|
||||
else:
|
||||
await cognee.delete(dataset_name=args.dataset_name, user_id=args.user_id)
|
||||
await cognee.datasets.delete_all(user_id=args.user_id)
|
||||
elif args.dataset_name or args.dataset_id:
|
||||
dataset_id = args.dataset_id
|
||||
|
||||
if args.dataset_name and not args.dataset_id:
|
||||
datasets = await get_datasets_by_name(
|
||||
args.dataset_name, user_id=args.user_id
|
||||
)
|
||||
|
||||
if not datasets:
|
||||
raise CliCommandException(
|
||||
f"No dataset found for name '{args.dataset_name}'."
|
||||
)
|
||||
|
||||
dataset = datasets[0]
|
||||
dataset_id = dataset.id
|
||||
|
||||
await cognee.datasets.delete_dataset(
|
||||
dataset_id=dataset_id, user_id=args.user_id
|
||||
)
|
||||
elif args.dataset_id and args.data_id:
|
||||
await cognee.datasets.delete_data(args.dataset_id, args.data_id)
|
||||
except Exception as e:
|
||||
raise CliCommandInnerException(f"Failed to delete: {str(e)}") from e
|
||||
|
||||
|
|
|
|||
|
|
@ -41,7 +41,7 @@ class Data(Base):
|
|||
"Dataset",
|
||||
secondary=DatasetData.__tablename__,
|
||||
back_populates="data",
|
||||
lazy="noload",
|
||||
lazy="selectin",
|
||||
cascade="all, delete",
|
||||
)
|
||||
|
||||
|
|
|
|||
|
|
@ -1,40 +0,0 @@
|
|||
from datetime import datetime, timezone
|
||||
from uuid import uuid5, NAMESPACE_OID
|
||||
from sqlalchemy import UUID, Column, DateTime, String, Index
|
||||
|
||||
from cognee.infrastructure.databases.relational import Base
|
||||
|
||||
|
||||
class GraphRelationshipLedger(Base):
|
||||
__tablename__ = "graph_relationship_ledger"
|
||||
|
||||
id = Column(
|
||||
UUID,
|
||||
primary_key=True,
|
||||
default=lambda: uuid5(NAMESPACE_OID, f"{datetime.now(timezone.utc).timestamp()}"),
|
||||
)
|
||||
source_node_id = Column(UUID, nullable=False)
|
||||
destination_node_id = Column(UUID, nullable=False)
|
||||
creator_function = Column(String, nullable=False)
|
||||
node_label = Column(String, nullable=True)
|
||||
created_at = Column(DateTime(timezone=True), default=lambda: datetime.now(timezone.utc))
|
||||
deleted_at = Column(DateTime(timezone=True), nullable=True)
|
||||
user_id = Column(UUID, nullable=True)
|
||||
|
||||
# Create indexes
|
||||
__table_args__ = (
|
||||
Index("idx_graph_relationship_id", "id"),
|
||||
Index("idx_graph_relationship_ledger_source_node_id", "source_node_id"),
|
||||
Index("idx_graph_relationship_ledger_destination_node_id", "destination_node_id"),
|
||||
)
|
||||
|
||||
def to_json(self) -> dict:
|
||||
return {
|
||||
"id": str(self.id),
|
||||
"source_node_id": str(self.parent_id),
|
||||
"destination_node_id": str(self.child_id),
|
||||
"creator_function": self.creator_function,
|
||||
"created_at": self.created_at.isoformat(),
|
||||
"deleted_at": self.deleted_at.isoformat() if self.deleted_at else None,
|
||||
"user_id": str(self.user_id),
|
||||
}
|
||||
|
|
@ -1,8 +1,16 @@
|
|||
from .get_formatted_graph_data import get_formatted_graph_data
|
||||
|
||||
from .upsert_edges import upsert_edges
|
||||
from .upsert_nodes import upsert_nodes
|
||||
|
||||
from .get_data_related_nodes import get_data_related_nodes
|
||||
from .get_data_related_edges import get_data_related_edges
|
||||
from .delete_data_related_nodes import delete_data_related_nodes
|
||||
from .delete_data_related_edges import delete_data_related_edges
|
||||
from .get_data_related_edges import get_data_related_edges
|
||||
from .delete_data_nodes_and_edges import delete_data_nodes_and_edges
|
||||
|
||||
from .get_dataset_related_nodes import get_dataset_related_nodes
|
||||
from .get_dataset_related_edges import get_dataset_related_edges
|
||||
from .delete_dataset_related_nodes import delete_dataset_related_nodes
|
||||
from .delete_dataset_related_edges import delete_dataset_related_edges
|
||||
from .delete_dataset_nodes_and_edges import delete_dataset_nodes_and_edges
|
||||
|
|
|
|||
|
|
@ -0,0 +1,43 @@
|
|||
from uuid import UUID
|
||||
from typing import Dict, List
|
||||
|
||||
from cognee.infrastructure.databases.graph import get_graph_engine
|
||||
from cognee.infrastructure.databases.vector.get_vector_engine import get_vector_engine
|
||||
from cognee.modules.engine.utils import generate_edge_id
|
||||
from cognee.modules.graph.methods import (
|
||||
delete_dataset_related_edges,
|
||||
delete_dataset_related_nodes,
|
||||
get_dataset_related_nodes,
|
||||
get_dataset_related_edges,
|
||||
)
|
||||
|
||||
|
||||
async def delete_dataset_nodes_and_edges(dataset_id: UUID) -> None:
|
||||
affected_nodes = await get_dataset_related_nodes(dataset_id)
|
||||
|
||||
graph_engine = await get_graph_engine()
|
||||
await graph_engine.delete_nodes([str(node.slug) for node in affected_nodes])
|
||||
|
||||
affected_vector_collections: Dict[str, List] = {}
|
||||
for node in affected_nodes:
|
||||
for indexed_field in node.indexed_fields:
|
||||
collection_name = f"{node.type}_{indexed_field}"
|
||||
if collection_name not in affected_vector_collections:
|
||||
affected_vector_collections[collection_name] = []
|
||||
affected_vector_collections[collection_name].append(node)
|
||||
|
||||
vector_engine = get_vector_engine()
|
||||
for affected_collection, affected_nodes in affected_vector_collections.items():
|
||||
await vector_engine.delete_data_points(
|
||||
affected_collection, [node.id for node in affected_nodes]
|
||||
)
|
||||
|
||||
affected_relationships = await get_dataset_related_edges(dataset_id)
|
||||
|
||||
await vector_engine.delete_data_points(
|
||||
"EdgeType_relationship_name",
|
||||
[generate_edge_id(edge.relationship_name) for edge in affected_relationships],
|
||||
)
|
||||
|
||||
await delete_dataset_related_nodes(dataset_id)
|
||||
await delete_dataset_related_edges(dataset_id)
|
||||
13
cognee/modules/graph/methods/delete_dataset_related_edges.py
Normal file
13
cognee/modules/graph/methods/delete_dataset_related_edges.py
Normal file
|
|
@ -0,0 +1,13 @@
|
|||
from uuid import UUID
|
||||
from sqlalchemy import delete, select
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
|
||||
from cognee.infrastructure.databases.relational import with_async_session
|
||||
from cognee.modules.graph.models import Edge
|
||||
|
||||
|
||||
@with_async_session
|
||||
async def delete_dataset_related_edges(dataset_id: UUID, session: AsyncSession):
|
||||
nodes = (await session.scalars(select(Edge).where(Edge.dataset_id == dataset_id))).all()
|
||||
|
||||
await session.execute(delete(Edge).where(Edge.id.in_([node.id for node in nodes])))
|
||||
13
cognee/modules/graph/methods/delete_dataset_related_nodes.py
Normal file
13
cognee/modules/graph/methods/delete_dataset_related_nodes.py
Normal file
|
|
@ -0,0 +1,13 @@
|
|||
from uuid import UUID
|
||||
from sqlalchemy import delete, select
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
|
||||
from cognee.infrastructure.databases.relational import with_async_session
|
||||
from cognee.modules.graph.models import Node
|
||||
|
||||
|
||||
@with_async_session
|
||||
async def delete_dataset_related_nodes(dataset_id: UUID, session: AsyncSession):
|
||||
nodes = (await session.scalars(select(Node).where(Node.dataset_id == dataset_id))).all()
|
||||
|
||||
await session.execute(delete(Node).where(Node.id.in_([node.id for node in nodes])))
|
||||
15
cognee/modules/graph/methods/get_dataset_related_edges.py
Normal file
15
cognee/modules/graph/methods/get_dataset_related_edges.py
Normal file
|
|
@ -0,0 +1,15 @@
|
|||
from uuid import UUID
|
||||
from sqlalchemy import select
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
|
||||
from cognee.infrastructure.databases.relational import with_async_session
|
||||
from cognee.modules.graph.models import Edge
|
||||
|
||||
|
||||
@with_async_session
|
||||
async def get_dataset_related_edges(dataset_id: UUID, session: AsyncSession):
|
||||
return (
|
||||
await session.scalars(
|
||||
select(Edge).where(Edge.dataset_id == dataset_id).distinct(Edge.relationship_name)
|
||||
)
|
||||
).all()
|
||||
14
cognee/modules/graph/methods/get_dataset_related_nodes.py
Normal file
14
cognee/modules/graph/methods/get_dataset_related_nodes.py
Normal file
|
|
@ -0,0 +1,14 @@
|
|||
from uuid import UUID
|
||||
from sqlalchemy import select
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
|
||||
from cognee.infrastructure.databases.relational import with_async_session
|
||||
from cognee.modules.graph.models import Node
|
||||
|
||||
|
||||
@with_async_session
|
||||
async def get_dataset_related_nodes(dataset_id: UUID, session: AsyncSession):
|
||||
query_statement = select(Node).where(Node.dataset_id == dataset_id)
|
||||
|
||||
data_related_nodes = await session.scalars(query_statement)
|
||||
return data_related_nodes.all()
|
||||
|
|
@ -20,7 +20,7 @@ class Edge(Base):
|
|||
|
||||
data_id: Mapped[UUID] = mapped_column(UUID(as_uuid=True), index=True, nullable=False)
|
||||
|
||||
dataset_id: Mapped[UUID] = mapped_column(UUID(as_uuid=True), nullable=False)
|
||||
dataset_id: Mapped[UUID] = mapped_column(UUID(as_uuid=True), index=True, nullable=False)
|
||||
|
||||
source_node_id: Mapped[UUID] = mapped_column(UUID(as_uuid=True), nullable=False)
|
||||
destination_node_id: Mapped[UUID] = mapped_column(UUID(as_uuid=True), nullable=False)
|
||||
|
|
|
|||
|
|
@ -23,7 +23,7 @@ class Node(Base):
|
|||
|
||||
data_id: Mapped[UUID] = mapped_column(UUID(as_uuid=True), nullable=False)
|
||||
|
||||
dataset_id: Mapped[UUID] = mapped_column(UUID(as_uuid=True), nullable=False)
|
||||
dataset_id: Mapped[UUID] = mapped_column(UUID(as_uuid=True), index=True, nullable=False)
|
||||
|
||||
label: Mapped[str] = mapped_column(String(255))
|
||||
type: Mapped[str] = mapped_column(String(255), nullable=False)
|
||||
|
|
|
|||
|
|
@ -1,54 +0,0 @@
|
|||
from datetime import datetime, timezone
|
||||
from uuid import UUID
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
from cognee.modules.data.models import graph_relationship_ledger
|
||||
from cognee.modules.users.models import User
|
||||
|
||||
|
||||
async def create_relationship(
|
||||
session: AsyncSession,
|
||||
source_node_id: UUID,
|
||||
destination_node_id: UUID,
|
||||
creator_function: str,
|
||||
user: User,
|
||||
) -> None:
|
||||
"""Create a relationship between two nodes in the graph.
|
||||
|
||||
Args:
|
||||
session: Database session
|
||||
source_node_id: ID of the source node
|
||||
destination_node_id: ID of the destination node
|
||||
creator_function: Name of the function creating the relationship
|
||||
user: User creating the relationship
|
||||
"""
|
||||
relationship = graph_relationship_ledger(
|
||||
source_node_id=source_node_id,
|
||||
destination_node_id=destination_node_id,
|
||||
creator_function=creator_function,
|
||||
user_id=user.id,
|
||||
)
|
||||
session.add(relationship)
|
||||
await session.flush()
|
||||
|
||||
|
||||
async def delete_relationship(
|
||||
session: AsyncSession,
|
||||
source_node_id: UUID,
|
||||
destination_node_id: UUID,
|
||||
user: User,
|
||||
) -> None:
|
||||
"""Mark a relationship as deleted.
|
||||
|
||||
Args:
|
||||
session: Database session
|
||||
source_node_id: ID of the source node
|
||||
destination_node_id: ID of the destination node
|
||||
user: User deleting the relationship
|
||||
"""
|
||||
relationship = await session.get(
|
||||
graph_relationship_ledger, (source_node_id, destination_node_id)
|
||||
)
|
||||
if relationship:
|
||||
relationship.deleted_at = datetime.now(timezone.utc)
|
||||
session.add(relationship)
|
||||
await session.flush()
|
||||
|
|
@ -1,7 +1,8 @@
|
|||
import os
|
||||
from typing import List
|
||||
|
||||
|
||||
async def get_non_py_files(repo_path):
|
||||
async def get_non_py_files(repo_data: List):
|
||||
"""
|
||||
Get files that are not .py files and their contents.
|
||||
|
||||
|
|
@ -13,13 +14,15 @@ async def get_non_py_files(repo_path):
|
|||
Parameters:
|
||||
-----------
|
||||
|
||||
- repo_path: The file system path to the repository to scan for non-Python files.
|
||||
- repo_data: The data object containing file system path to the repository to scan for non-Python files.
|
||||
|
||||
Returns:
|
||||
--------
|
||||
|
||||
A list of file paths that are not Python files and meet the specified criteria.
|
||||
"""
|
||||
repo_path = repo_data[0].repo_path
|
||||
|
||||
if not os.path.exists(repo_path):
|
||||
return {}
|
||||
|
||||
|
|
|
|||
|
|
@ -134,7 +134,7 @@ def run_coroutine(coroutine_func, *args, **kwargs):
|
|||
|
||||
|
||||
async def get_repo_file_dependencies(
|
||||
repo_path: str,
|
||||
repo_data: List,
|
||||
detailed_extraction: bool = False,
|
||||
supported_languages: list = None,
|
||||
excluded_paths: Optional[List[str]] = None,
|
||||
|
|
@ -156,8 +156,7 @@ async def get_repo_file_dependencies(
|
|||
- supported_languages (list | None): Subset of languages to include; if None, use defaults.
|
||||
"""
|
||||
|
||||
if isinstance(repo_path, list) and len(repo_path) == 1:
|
||||
repo_path = repo_path[0]
|
||||
repo_path: str = repo_data[0].repo_path
|
||||
|
||||
if not os.path.exists(repo_path):
|
||||
raise FileNotFoundError(f"Repository path {repo_path} does not exist.")
|
||||
|
|
|
|||
|
|
@ -1,316 +0,0 @@
|
|||
import os
|
||||
import pathlib
|
||||
import cognee
|
||||
from uuid import uuid4
|
||||
from cognee.modules.users.exceptions import PermissionDeniedError
|
||||
from cognee.shared.logging_utils import get_logger
|
||||
from cognee.modules.users.methods import get_default_user, create_user
|
||||
from cognee.modules.users.permissions.methods import authorized_give_permission_on_datasets
|
||||
from cognee.modules.data.methods import get_dataset_data, get_datasets_by_name
|
||||
from cognee.api.v1.exceptions import DocumentNotFoundError, DatasetNotFoundError
|
||||
|
||||
logger = get_logger()
|
||||
|
||||
|
||||
async def main():
|
||||
# Enable permissions feature
|
||||
os.environ["ENABLE_BACKEND_ACCESS_CONTROL"] = "True"
|
||||
|
||||
# Clean up test directories before starting
|
||||
data_directory_path = str(
|
||||
pathlib.Path(
|
||||
os.path.join(pathlib.Path(__file__).parent, ".data_storage/test_delete_by_id")
|
||||
).resolve()
|
||||
)
|
||||
cognee_directory_path = str(
|
||||
pathlib.Path(
|
||||
os.path.join(pathlib.Path(__file__).parent, ".cognee_system/test_delete_by_id")
|
||||
).resolve()
|
||||
)
|
||||
|
||||
cognee.config.data_root_directory(data_directory_path)
|
||||
cognee.config.system_root_directory(cognee_directory_path)
|
||||
|
||||
await cognee.prune.prune_data()
|
||||
await cognee.prune.prune_system(metadata=True)
|
||||
|
||||
# Setup database and tables
|
||||
from cognee.modules.engine.operations.setup import setup
|
||||
|
||||
await setup()
|
||||
|
||||
print("🧪 Testing Delete by ID and Dataset Data Endpoints")
|
||||
print("=" * 60)
|
||||
|
||||
# Get the default user first
|
||||
default_user = await get_default_user()
|
||||
|
||||
# Test data
|
||||
text_1 = """
|
||||
Apple Inc. is an American multinational technology company that specializes in consumer electronics,
|
||||
software, and online services. Apple is the world's largest technology company by revenue and,
|
||||
since January 2021, the world's most valuable company.
|
||||
"""
|
||||
|
||||
text_2 = """
|
||||
Microsoft Corporation is an American multinational technology corporation which produces computer software,
|
||||
consumer electronics, personal computers, and related services. Its best known software products are the
|
||||
Microsoft Windows line of operating systems and the Microsoft Office suite.
|
||||
"""
|
||||
|
||||
text_3 = """
|
||||
Google LLC is an American multinational technology company that specializes in Internet-related services and products,
|
||||
which include online advertising technologies, search engine, cloud computing, software, and hardware. Google has been
|
||||
referred to as the most powerful company in the world and one of the world's most valuable brands.
|
||||
"""
|
||||
|
||||
# Test 1: Setup data and datasets
|
||||
print("\n📝 Test 1: Setting up test data and datasets")
|
||||
|
||||
# Add data for default user
|
||||
await cognee.add([text_1], dataset_name="tech_companies_1", user=default_user)
|
||||
|
||||
# Create test user first for the second dataset
|
||||
test_user = await create_user("test_user_delete@gmail.com", "test@example.com")
|
||||
|
||||
# Add data for test user
|
||||
await cognee.add([text_2], dataset_name="tech_companies_2", user=test_user)
|
||||
|
||||
# Create third user for isolation testing
|
||||
isolation_user = await create_user("isolation_user@gmail.com", "isolation@example.com")
|
||||
|
||||
# Add data for isolation user (should remain unaffected by other deletions)
|
||||
await cognee.add([text_3], dataset_name="tech_companies_3", user=isolation_user)
|
||||
|
||||
tst = await cognee.cognify(["tech_companies_1"], user=default_user)
|
||||
tst2 = await cognee.cognify(["tech_companies_2"], user=test_user)
|
||||
tst3 = await cognee.cognify(["tech_companies_3"], user=isolation_user)
|
||||
print("tst", tst)
|
||||
print("tst2", tst2)
|
||||
print("tst3", tst3)
|
||||
|
||||
# Extract dataset_ids from cognify results
|
||||
def extract_dataset_id_from_cognify(cognify_result):
|
||||
"""Extract dataset_id from cognify output dictionary"""
|
||||
for dataset_id, pipeline_result in cognify_result.items():
|
||||
return dataset_id # Return the first (and likely only) dataset_id
|
||||
return None
|
||||
|
||||
# Get dataset IDs from cognify results
|
||||
dataset_id_1 = extract_dataset_id_from_cognify(tst)
|
||||
dataset_id_2 = extract_dataset_id_from_cognify(tst2)
|
||||
dataset_id_3 = extract_dataset_id_from_cognify(tst3)
|
||||
|
||||
print(f"📋 Extracted dataset_id from tst: {dataset_id_1}")
|
||||
print(f"📋 Extracted dataset_id from tst2: {dataset_id_2}")
|
||||
print(f"📋 Extracted dataset_id from tst3: {dataset_id_3}")
|
||||
|
||||
# Get dataset data for deletion testing
|
||||
dataset_data_1 = await get_dataset_data(dataset_id_1)
|
||||
dataset_data_2 = await get_dataset_data(dataset_id_2)
|
||||
dataset_data_3 = await get_dataset_data(dataset_id_3)
|
||||
|
||||
print(f"📊 Dataset 1 contains {len(dataset_data_1)} data items")
|
||||
print(f"📊 Dataset 2 contains {len(dataset_data_2)} data items")
|
||||
print(f"📊 Dataset 3 (isolation) contains {len(dataset_data_3)} data items")
|
||||
|
||||
# Test 2: Get data to delete from the extracted datasets
|
||||
print("\n📝 Test 2: Preparing data for deletion from cognify results")
|
||||
|
||||
# Use the first data item from each dataset for testing
|
||||
data_to_delete_id = dataset_data_1[0].id if dataset_data_1 else None
|
||||
data_to_delete_from_test_user = dataset_data_2[0].id if dataset_data_2 else None
|
||||
|
||||
# Create datasets objects for testing
|
||||
from cognee.modules.data.models import Dataset
|
||||
|
||||
default_dataset = Dataset(id=dataset_id_1, name="tech_companies_1", owner_id=default_user.id)
|
||||
|
||||
# Create dataset object for permission testing (test_user already created above)
|
||||
test_dataset = Dataset(id=dataset_id_2, name="tech_companies_2", owner_id=test_user.id)
|
||||
|
||||
print(f"🔍 Data to delete ID: {data_to_delete_id}")
|
||||
print(f"🔍 Test user data ID: {data_to_delete_from_test_user}")
|
||||
|
||||
print("\n📝 Test 3: Testing delete endpoint with proper permissions")
|
||||
|
||||
try:
|
||||
result = await cognee.delete(data_id=data_to_delete_id, dataset_id=default_dataset.id)
|
||||
print("✅ Delete successful for data owner")
|
||||
assert result["status"] == "success", "Delete should succeed for data owner"
|
||||
except Exception as e:
|
||||
print(f"❌ Unexpected error in delete test: {e}")
|
||||
raise
|
||||
|
||||
# Test 4: Test delete without permissions (should fail)
|
||||
print("\n📝 Test 4: Testing delete endpoint without permissions")
|
||||
|
||||
delete_permission_error = False
|
||||
try:
|
||||
await cognee.delete(
|
||||
data_id=data_to_delete_from_test_user,
|
||||
dataset_id=test_dataset.id,
|
||||
user=default_user, # Wrong user - should fail
|
||||
)
|
||||
except (PermissionDeniedError, DatasetNotFoundError):
|
||||
delete_permission_error = True
|
||||
print("✅ Delete correctly denied for user without permission")
|
||||
except Exception as e:
|
||||
print(f"❌ Unexpected error type: {e}")
|
||||
|
||||
assert delete_permission_error, "Delete should fail for user without permission"
|
||||
|
||||
# Test 5: Test delete with non-existent data_id
|
||||
print("\n📝 Test 5: Testing delete endpoint with non-existent data_id")
|
||||
|
||||
non_existent_data_id = uuid4()
|
||||
data_not_found_error = False
|
||||
try:
|
||||
await cognee.delete(
|
||||
data_id=non_existent_data_id, dataset_id=default_dataset.id, user=default_user
|
||||
)
|
||||
except DocumentNotFoundError:
|
||||
data_not_found_error = True
|
||||
print("✅ Delete correctly failed for non-existent data_id")
|
||||
except Exception as e:
|
||||
print(f"❌ Unexpected error type: {e}")
|
||||
|
||||
assert data_not_found_error, "Delete should fail for non-existent data_id"
|
||||
|
||||
# Test 6: Test delete with non-existent dataset_id
|
||||
print("\n📝 Test 6: Testing delete endpoint with non-existent dataset_id")
|
||||
|
||||
non_existent_dataset_id = uuid4()
|
||||
dataset_not_found_error = False
|
||||
try:
|
||||
await cognee.delete(
|
||||
data_id=data_to_delete_from_test_user,
|
||||
dataset_id=non_existent_dataset_id,
|
||||
user=test_user,
|
||||
)
|
||||
except (DatasetNotFoundError, PermissionDeniedError):
|
||||
dataset_not_found_error = True
|
||||
print("✅ Delete correctly failed for non-existent dataset_id")
|
||||
except Exception as e:
|
||||
print(f"❌ Unexpected error type: {e}")
|
||||
|
||||
assert dataset_not_found_error, "Delete should fail for non-existent dataset_id"
|
||||
|
||||
# Test 7: Test delete with data that doesn't belong to the dataset
|
||||
print("\n📝 Test 7: Testing delete endpoint with data not in specified dataset")
|
||||
|
||||
# Add more data to create a scenario where data exists but not in the specified dataset
|
||||
await cognee.add([text_1], dataset_name="another_dataset", user=default_user)
|
||||
await cognee.cognify(["another_dataset"], user=default_user)
|
||||
|
||||
another_datasets = await get_datasets_by_name(["another_dataset"], default_user.id)
|
||||
another_dataset = another_datasets[0]
|
||||
|
||||
data_not_in_dataset_error = False
|
||||
try:
|
||||
# Try to delete data from test_user's dataset using default_user's data_id
|
||||
await cognee.delete(
|
||||
data_id=data_to_delete_from_test_user, # This data belongs to test_user's dataset
|
||||
dataset_id=another_dataset.id, # But we're specifying default_user's other dataset
|
||||
user=default_user,
|
||||
)
|
||||
except DocumentNotFoundError:
|
||||
data_not_in_dataset_error = True
|
||||
print("✅ Delete correctly failed for data not in specified dataset")
|
||||
except Exception as e:
|
||||
print(f"❌ Unexpected error type: {e}")
|
||||
|
||||
assert data_not_in_dataset_error, "Delete should fail when data doesn't belong to dataset"
|
||||
|
||||
# Test 8: Test permission granting and delete
|
||||
print("\n📝 Test 8: Testing delete after granting permissions")
|
||||
|
||||
# Give default_user delete permission on test_user's dataset
|
||||
await authorized_give_permission_on_datasets(
|
||||
default_user.id,
|
||||
[test_dataset.id],
|
||||
"delete",
|
||||
test_user.id,
|
||||
)
|
||||
|
||||
try:
|
||||
result = await cognee.delete(
|
||||
data_id=data_to_delete_from_test_user,
|
||||
dataset_id=test_dataset.id,
|
||||
user=default_user, # Now should work with granted permission
|
||||
)
|
||||
print("✅ Delete successful after granting permission", result)
|
||||
assert result["status"] == "success", "Delete should succeed after granting permission"
|
||||
except Exception as e:
|
||||
print(f"❌ Unexpected error after granting permission: {e}")
|
||||
raise
|
||||
|
||||
# Test 9: Verify graph database cleanup
|
||||
print("\n📝 Test 9: Verifying comprehensive deletion (graph, vector, relational)")
|
||||
|
||||
from cognee.infrastructure.databases.graph import get_graph_engine
|
||||
|
||||
graph_engine = await get_graph_engine()
|
||||
nodes, edges = await graph_engine.get_graph_data()
|
||||
|
||||
# We should still have some nodes/edges from the remaining data, but fewer than before
|
||||
print(f"✅ Graph database state after deletions - Nodes: {len(nodes)}, Edges: {len(edges)}")
|
||||
|
||||
# Test 10: Verify isolation user's data remains untouched
|
||||
print("\n📝 Test 10: Verifying isolation user's data remains intact")
|
||||
|
||||
try:
|
||||
# Get isolation user's data after all deletions
|
||||
isolation_dataset_data_after = await get_dataset_data(dataset_id_3)
|
||||
|
||||
print(
|
||||
f"📊 Isolation user's dataset still contains {len(isolation_dataset_data_after)} data items"
|
||||
)
|
||||
|
||||
# Verify data count is unchanged
|
||||
assert len(isolation_dataset_data_after) == len(dataset_data_3), (
|
||||
f"Isolation user's data count changed! Expected {len(dataset_data_3)}, got {len(isolation_dataset_data_after)}"
|
||||
)
|
||||
|
||||
# Verify specific data items are still there
|
||||
original_data_ids = {str(data.id) for data in dataset_data_3}
|
||||
remaining_data_ids = {str(data.id) for data in isolation_dataset_data_after}
|
||||
|
||||
assert original_data_ids == remaining_data_ids, "Isolation user's data IDs have changed!"
|
||||
|
||||
# Try to search isolation user's data to ensure it's still accessible
|
||||
isolation_search_results = await cognee.search(
|
||||
"Google technology company", user=isolation_user
|
||||
)
|
||||
assert len(isolation_search_results) > 0, "Isolation user's data should still be searchable"
|
||||
|
||||
print("✅ Isolation user's data completely unaffected by other users' deletions")
|
||||
print(f" - Data count unchanged: {len(isolation_dataset_data_after)} items")
|
||||
print(" - All original data IDs preserved")
|
||||
print(f" - Data still searchable: {len(isolation_search_results)} results")
|
||||
|
||||
except Exception as e:
|
||||
print(f"❌ Error verifying isolation user's data: {e}")
|
||||
raise
|
||||
|
||||
print("\n" + "=" * 60)
|
||||
print("🎉 All tests passed! Delete by ID endpoint working correctly.")
|
||||
print("=" * 60)
|
||||
|
||||
print("""
|
||||
📋 SUMMARY OF TESTED FUNCTIONALITY:
|
||||
✅ Delete endpoint accepts data_id and dataset_id parameters
|
||||
✅ Permission checking works for delete operations
|
||||
✅ Proper error handling for non-existent data/datasets
|
||||
✅ Data ownership validation (data must belong to specified dataset)
|
||||
✅ Permission granting and revocation works correctly
|
||||
✅ Comprehensive deletion across all databases (graph, vector, relational)
|
||||
✅ Dataset data endpoint now checks read permissions properly
|
||||
✅ Data isolation: Other users' data remains completely unaffected by deletions
|
||||
""")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
import asyncio
|
||||
|
||||
asyncio.run(main())
|
||||
|
|
@ -4,6 +4,7 @@ from typing import List
|
|||
from uuid import uuid4
|
||||
|
||||
import cognee
|
||||
from cognee.api.v1.datasets import datasets
|
||||
from cognee.infrastructure.engine import DataPoint
|
||||
from cognee.modules.data.models import Data, Dataset
|
||||
from cognee.modules.engine.operations.setup import setup
|
||||
|
|
@ -90,12 +91,12 @@ async def main():
|
|||
"Nodes and edges are not correctly added to the graph."
|
||||
)
|
||||
|
||||
await delete_data_nodes_and_edges(dataset.id, data1.id) # type: ignore
|
||||
await datasets.delete_data(dataset.id, data1.id, user.id) # type: ignore
|
||||
|
||||
nodes, edges = await graph_engine.get_graph_data()
|
||||
assert len(nodes) == 2 and len(edges) == 1, "Nodes and edges are not deleted properly."
|
||||
|
||||
await delete_data_nodes_and_edges(dataset.id, data2.id) # type: ignore
|
||||
await datasets.delete_data(dataset.id, data2.id, user.id) # type: ignore
|
||||
|
||||
nodes, edges = await graph_engine.get_graph_data()
|
||||
assert len(nodes) == 0 and len(edges) == 0, "Nodes and edges are not deleted."
|
||||
|
|
|
|||
|
|
@ -2,6 +2,7 @@ import os
|
|||
import pathlib
|
||||
|
||||
import cognee
|
||||
from cognee.api.v1.datasets import datasets
|
||||
from cognee.api.v1.visualize.visualize import visualize_graph
|
||||
from cognee.infrastructure.databases.vector import get_vector_engine
|
||||
from cognee.infrastructure.databases.graph import get_graph_engine
|
||||
|
|
@ -10,6 +11,7 @@ from cognee.modules.engine.operations.setup import setup
|
|||
from cognee.modules.graph.methods import (
|
||||
delete_data_nodes_and_edges,
|
||||
)
|
||||
from cognee.modules.users.methods import get_default_user
|
||||
from cognee.shared.logging_utils import get_logger
|
||||
|
||||
logger = get_logger()
|
||||
|
|
@ -58,9 +60,8 @@ async def main():
|
|||
nodes, edges = await graph_engine.get_graph_data()
|
||||
assert len(nodes) >= 12 and len(edges) >= 18, "Nodes and edges are not deleted."
|
||||
|
||||
await delete_data_nodes_and_edges(dataset_id, added_data.id) # type: ignore
|
||||
|
||||
await delete_data(added_data)
|
||||
user = await get_default_user()
|
||||
await datasets.delete_data(dataset_id, added_data.id, user.id) # type: ignore
|
||||
|
||||
file_path = os.path.join(
|
||||
pathlib.Path(__file__).parent, ".artifacts", "graph_visualization_after_delete.html"
|
||||
|
|
|
|||
|
|
@ -1,85 +0,0 @@
|
|||
import os
|
||||
import shutil
|
||||
import cognee
|
||||
import pathlib
|
||||
from cognee.shared.logging_utils import get_logger
|
||||
from cognee.modules.data.methods import get_dataset_data
|
||||
|
||||
logger = get_logger()
|
||||
|
||||
|
||||
async def main():
|
||||
await cognee.prune.prune_data()
|
||||
await cognee.prune.prune_system(metadata=True)
|
||||
|
||||
pdf_document = os.path.join(
|
||||
pathlib.Path(__file__).parent, "test_data/artificial-intelligence.pdf"
|
||||
)
|
||||
|
||||
txt_document = os.path.join(
|
||||
pathlib.Path(__file__).parent, "test_data/Natural_language_processing_copy.txt"
|
||||
)
|
||||
|
||||
audio_document = os.path.join(pathlib.Path(__file__).parent, "test_data/text_to_speech.mp3")
|
||||
|
||||
image_document = os.path.join(pathlib.Path(__file__).parent, "test_data/example.png")
|
||||
|
||||
unstructured_document = os.path.join(pathlib.Path(__file__).parent, "test_data/example.pptx")
|
||||
|
||||
text_document_as_literal = """
|
||||
1. Audi
|
||||
Audi is known for its modern designs and advanced technology. Founded in the early 1900s, the brand has earned a reputation for precision engineering and innovation. With features like the Quattro all-wheel-drive system, Audi offers a range of vehicles from stylish sedans to high-performance sports cars.
|
||||
|
||||
2. BMW
|
||||
BMW, short for Bayerische Motoren Werke, is celebrated for its focus on performance and driving pleasure. The company's vehicles are designed to provide a dynamic and engaging driving experience, and their slogan, "The Ultimate Driving Machine," reflects that commitment. BMW produces a variety of cars that combine luxury with sporty performance.
|
||||
|
||||
3. Mercedes-Benz
|
||||
Mercedes-Benz is synonymous with luxury and quality. With a history dating back to the early 20th century, the brand is known for its elegant designs, innovative safety features, and high-quality engineering. Mercedes-Benz manufactures not only luxury sedans but also SUVs, sports cars, and commercial vehicles, catering to a wide range of needs.
|
||||
|
||||
4. Porsche
|
||||
Porsche is a name that stands for high-performance sports cars. Founded in 1931, the brand has become famous for models like the iconic Porsche 911. Porsche cars are celebrated for their speed, precision, and distinctive design, appealing to car enthusiasts who value both performance and style.
|
||||
|
||||
5. Volkswagen
|
||||
Volkswagen, which means "people's car" in German, was established with the idea of making affordable and reliable vehicles accessible to everyone. Over the years, Volkswagen has produced several iconic models, such as the Beetle and the Golf. Today, it remains one of the largest car manufacturers in the world, offering a wide range of vehicles that balance practicality with quality.
|
||||
|
||||
Each of these car manufacturer contributes to Germany's reputation as a leader in the global automotive industry, showcasing a blend of innovation, performance, and design excellence.
|
||||
"""
|
||||
|
||||
# Add documents and get dataset information
|
||||
add_result = await cognee.add(
|
||||
[
|
||||
pdf_document,
|
||||
txt_document,
|
||||
text_document_as_literal,
|
||||
unstructured_document,
|
||||
audio_document,
|
||||
image_document,
|
||||
]
|
||||
)
|
||||
dataset_id = add_result.dataset_id
|
||||
|
||||
await cognee.cognify()
|
||||
|
||||
from cognee.infrastructure.databases.graph import get_graph_engine
|
||||
|
||||
graph_engine = await get_graph_engine()
|
||||
nodes, edges = await graph_engine.get_graph_data()
|
||||
assert len(nodes) > 10 and len(edges) > 10, "Graph database is not loaded."
|
||||
|
||||
# Get the data IDs from the dataset
|
||||
dataset_data = await get_dataset_data(dataset_id)
|
||||
assert len(dataset_data) > 0, "Dataset should contain data"
|
||||
|
||||
# Delete each document using its ID
|
||||
for data_item in dataset_data:
|
||||
await cognee.delete(data_item.id, dataset_id, mode="hard")
|
||||
|
||||
nodes, edges = await graph_engine.get_graph_data()
|
||||
|
||||
assert len(nodes) == 0 and len(edges) == 0, "Document is not deleted with hard delete."
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
import asyncio
|
||||
|
||||
asyncio.run(main())
|
||||
|
|
@ -1,85 +0,0 @@
|
|||
import os
|
||||
import shutil
|
||||
import cognee
|
||||
import pathlib
|
||||
from cognee.shared.logging_utils import get_logger
|
||||
from cognee.modules.data.methods import get_dataset_data
|
||||
|
||||
logger = get_logger()
|
||||
|
||||
|
||||
async def main():
|
||||
await cognee.prune.prune_data()
|
||||
await cognee.prune.prune_system(metadata=True)
|
||||
|
||||
pdf_document = os.path.join(
|
||||
pathlib.Path(__file__).parent, "test_data/artificial-intelligence.pdf"
|
||||
)
|
||||
|
||||
txt_document = os.path.join(
|
||||
pathlib.Path(__file__).parent, "test_data/Natural_language_processing_copy.txt"
|
||||
)
|
||||
|
||||
audio_document = os.path.join(pathlib.Path(__file__).parent, "test_data/text_to_speech.mp3")
|
||||
|
||||
image_document = os.path.join(pathlib.Path(__file__).parent, "test_data/example.png")
|
||||
|
||||
unstructured_document = os.path.join(pathlib.Path(__file__).parent, "test_data/example.pptx")
|
||||
|
||||
text_document_as_literal = """
|
||||
1. Audi
|
||||
Audi is known for its modern designs and advanced technology. Founded in the early 1900s, the brand has earned a reputation for precision engineering and innovation. With features like the Quattro all-wheel-drive system, Audi offers a range of vehicles from stylish sedans to high-performance sports cars.
|
||||
|
||||
2. BMW
|
||||
BMW, short for Bayerische Motoren Werke, is celebrated for its focus on performance and driving pleasure. The company's vehicles are designed to provide a dynamic and engaging driving experience, and their slogan, "The Ultimate Driving Machine," reflects that commitment. BMW produces a variety of cars that combine luxury with sporty performance.
|
||||
|
||||
3. Mercedes-Benz
|
||||
Mercedes-Benz is synonymous with luxury and quality. With a history dating back to the early 20th century, the brand is known for its elegant designs, innovative safety features, and high-quality engineering. Mercedes-Benz manufactures not only luxury sedans but also SUVs, sports cars, and commercial vehicles, catering to a wide range of needs.
|
||||
|
||||
4. Porsche
|
||||
Porsche is a name that stands for high-performance sports cars. Founded in 1931, the brand has become famous for models like the iconic Porsche 911. Porsche cars are celebrated for their speed, precision, and distinctive design, appealing to car enthusiasts who value both performance and style.
|
||||
|
||||
5. Volkswagen
|
||||
Volkswagen, which means "people's car" in German, was established with the idea of making affordable and reliable vehicles accessible to everyone. Over the years, Volkswagen has produced several iconic models, such as the Beetle and the Golf. Today, it remains one of the largest car manufacturers in the world, offering a wide range of vehicles that balance practicality with quality.
|
||||
|
||||
Each of these car manufacturer contributes to Germany's reputation as a leader in the global automotive industry, showcasing a blend of innovation, performance, and design excellence.
|
||||
"""
|
||||
|
||||
# Add documents and get dataset information
|
||||
add_result = await cognee.add(
|
||||
[
|
||||
pdf_document,
|
||||
txt_document,
|
||||
text_document_as_literal,
|
||||
unstructured_document,
|
||||
audio_document,
|
||||
image_document,
|
||||
]
|
||||
)
|
||||
dataset_id = add_result.dataset_id
|
||||
|
||||
await cognee.cognify()
|
||||
|
||||
from cognee.infrastructure.databases.graph import get_graph_engine
|
||||
|
||||
graph_engine = await get_graph_engine()
|
||||
nodes, edges = await graph_engine.get_graph_data()
|
||||
assert len(nodes) > 10 and len(edges) > 10, "Graph database is not loaded."
|
||||
|
||||
# Get the data IDs from the dataset
|
||||
dataset_data = await get_dataset_data(dataset_id)
|
||||
assert len(dataset_data) > 0, "Dataset should contain data"
|
||||
|
||||
# Delete each document using its ID
|
||||
for data_item in dataset_data:
|
||||
await cognee.delete(data_item.id, dataset_id, mode="soft")
|
||||
|
||||
nodes, edges = await graph_engine.get_graph_data()
|
||||
|
||||
assert len(nodes) == 0 and len(edges) == 0, "Document is not deleted with soft delete."
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
import asyncio
|
||||
|
||||
asyncio.run(main())
|
||||
|
|
@ -188,8 +188,10 @@ async def main():
|
|||
test_user_dataset_data = await get_dataset_data(test_user_dataset_id)
|
||||
text_data_id = test_user_dataset_data[0].id
|
||||
|
||||
await cognee.delete(
|
||||
data_id=text_data_id, dataset_id=test_user_dataset_id, user=default_user
|
||||
await cognee.datasets.delete_data(
|
||||
dataset_id=test_user_dataset_id,
|
||||
data_id=text_data_id,
|
||||
user_id=default_user.id,
|
||||
)
|
||||
except PermissionDeniedError:
|
||||
delete_error = True
|
||||
|
|
@ -201,7 +203,9 @@ async def main():
|
|||
test_user_dataset_data = await get_dataset_data(test_user_dataset_id)
|
||||
text_data_id = test_user_dataset_data[0].id
|
||||
|
||||
await cognee.delete(data_id=text_data_id, dataset_id=test_user_dataset_id, user=test_user)
|
||||
await cognee.datasets.delete_data(
|
||||
dataset_id=test_user_dataset_id, data_id=text_data_id, user_id=test_user.id
|
||||
)
|
||||
|
||||
# Actually give permission to default_user to delete data for test_users dataset
|
||||
await authorized_give_permission_on_datasets(
|
||||
|
|
@ -216,8 +220,10 @@ async def main():
|
|||
test_user_dataset_data = await get_dataset_data(test_user_dataset_id)
|
||||
explanation_file_data_id = test_user_dataset_data[0].id
|
||||
|
||||
await cognee.delete(
|
||||
data_id=explanation_file_data_id, dataset_id=test_user_dataset_id, user=default_user
|
||||
await cognee.datasets.delete_data(
|
||||
dataset_id=test_user_dataset_id,
|
||||
data_id=explanation_file_data_id,
|
||||
user_id=default_user.id,
|
||||
)
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -11,7 +11,7 @@ from cognee import prune
|
|||
# from cognee import visualize_graph
|
||||
from cognee.infrastructure.databases.graph import get_graph_engine
|
||||
from cognee.low_level import setup, DataPoint
|
||||
from cognee.modules.data.models import Data, Dataset
|
||||
from cognee.modules.data.models import Dataset
|
||||
from cognee.modules.users.methods import get_default_user
|
||||
from cognee.pipelines import run_tasks, Task
|
||||
from cognee.tasks.storage import add_data_points
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue