fix: use new delete in API and cli

This commit is contained in:
Boris Arzentar 2025-10-13 17:09:42 +02:00
parent c438be70c3
commit dc64dd5ffa
No known key found for this signature in database
GPG key ID: D5CC274C784807B7
33 changed files with 253 additions and 988 deletions

View file

@ -196,7 +196,7 @@ class CogneeClient:
)
return results
async def delete(self, data_id: UUID, dataset_id: UUID, mode: str = "soft") -> Dict[str, Any]:
async def delete(self, data_id: UUID, dataset_id: UUID) -> Dict[str, Any]:
"""
Delete data from a dataset.
@ -206,8 +206,6 @@ class CogneeClient:
ID of the data to delete
dataset_id : UUID
ID of the dataset containing the data
mode : str
Deletion mode ("soft" or "hard")
Returns
-------
@ -216,12 +214,9 @@ class CogneeClient:
"""
if self.use_api:
# API mode: Make HTTP request
endpoint = f"{self.api_url}/api/v1/delete"
params = {"data_id": str(data_id), "dataset_id": str(dataset_id), "mode": mode}
endpoint = f"{self.api_url}/api/v1/datasets/{str(dataset_id)}/data/{str(data_id)}"
response = await self.client.delete(
endpoint, params=params, headers=self._get_headers()
)
response = await self.client.delete(endpoint, headers=self._get_headers())
response.raise_for_status()
return response.json()
else:
@ -230,10 +225,11 @@ class CogneeClient:
with redirect_stdout(sys.stderr):
user = await get_default_user()
result = await self.cognee.delete(
data_id=data_id, dataset_id=dataset_id, mode=mode, user=user
await self.cognee.datasets.delete_data(
dataset_id=dataset_id,
data_id=data_id,
user_id=user.id,
)
return result
async def prune_data(self) -> Dict[str, Any]:
"""

View file

@ -16,7 +16,6 @@ from cognee.shared.logging_utils import setup_logging
logger = setup_logging()
from .api.v1.add import add
from .api.v1.delete import delete
from .api.v1.cognify import cognify
from .modules.memify import memify
from .api.v1.update import update

View file

@ -25,7 +25,6 @@ from cognee.api.v1.cognify.routers import get_code_pipeline_router, get_cognify_
from cognee.api.v1.search.routers import get_search_router
from cognee.api.v1.memify.routers import get_memify_router
from cognee.api.v1.add.routers import get_add_router
from cognee.api.v1.delete.routers import get_delete_router
from cognee.api.v1.responses.routers import get_responses_router
from cognee.api.v1.sync.routers import get_sync_router
from cognee.api.v1.update.routers import get_update_router
@ -262,8 +261,6 @@ app.include_router(get_settings_router(), prefix="/api/v1/settings", tags=["sett
app.include_router(get_visualize_router(), prefix="/api/v1/visualize", tags=["visualize"])
app.include_router(get_delete_router(), prefix="/api/v1/delete", tags=["delete"])
app.include_router(get_update_router(), prefix="/api/v1/update", tags=["update"])
app.include_router(get_responses_router(), prefix="/api/v1/responses", tags=["responses"])

View file

@ -2,6 +2,9 @@ import os
import pathlib
import asyncio
from typing import Optional
from uuid import UUID, uuid4
from pydantic import BaseModel
from cognee.shared.logging_utils import get_logger, setup_logging
from cognee.modules.observability.get_observe import get_observe
@ -83,15 +86,19 @@ async def run_code_graph_pipeline(
async with db_engine.get_async_session() as session:
dataset = await create_dataset(dataset_name, user, session)
class RepoData(BaseModel):
id: UUID
repo_path: str
data = RepoData(id=uuid4(), repo_path=repo_path)
if include_docs:
non_code_pipeline_run = run_tasks(
non_code_tasks, dataset, repo_path, user, "cognify_pipeline"
)
non_code_pipeline_run = run_tasks(non_code_tasks, dataset, data, user, "cognify_pipeline")
async for run_status in non_code_pipeline_run:
yield run_status
async for run_status in run_tasks(
tasks, dataset, repo_path, user, "cognify_code_pipeline", incremental_loading=False
tasks, dataset, data, user, "cognify_code_pipeline", incremental_loading=False
):
yield run_status

View file

@ -0,0 +1 @@
from .datasets import datasets

View file

@ -1,4 +1,8 @@
from typing import Optional
from uuid import UUID
from cognee.modules.data.exceptions.exceptions import UnauthorizedDataAccessError
from cognee.modules.data.methods import get_datasets
from cognee.modules.graph.methods import delete_data_nodes_and_edges, delete_dataset_nodes_and_edges
from cognee.modules.users.methods import get_default_user
from cognee.modules.ingestion import discover_directory_datasets
from cognee.modules.pipelines.operations.get_pipeline_status import get_pipeline_status
@ -31,10 +35,53 @@ class datasets:
return await get_pipeline_status(dataset_ids, pipeline_name="cognify_pipeline")
@staticmethod
async def delete_dataset(dataset_id: str):
async def delete_dataset(dataset_id: UUID, user_id: Optional[UUID] = None):
from cognee.modules.data.methods import get_dataset, delete_dataset
user = await get_default_user()
if not user_id:
user = await get_default_user()
user_id = user.id
dataset = await get_dataset(user.id, dataset_id)
if not dataset:
raise UnauthorizedDataAccessError(f"Dataset {dataset_id} not accessible.")
await delete_dataset_nodes_and_edges(dataset_id)
return await delete_dataset(dataset)
@staticmethod
async def delete_data(dataset_id: UUID, data_id: UUID, user_id: Optional[UUID] = None):
from cognee.modules.data.methods import delete_data, get_data
if not user_id:
user = await get_default_user()
user_id = user.id
data = await get_data(user_id, data_id)
if not data:
# If data is not found in the system, user is using a custom graph model.
await delete_data_nodes_and_edges(dataset_id, data_id)
return
data_datasets = data.datasets
if not data or not any([dataset.id == dataset_id for dataset in data_datasets]):
raise UnauthorizedDataAccessError(f"Data {data_id} not accessible.")
await delete_data_nodes_and_edges(dataset_id, data.id)
await delete_data(data)
@staticmethod
async def delete_all(user_id: Optional[UUID] = None):
if not user_id:
user = await get_default_user()
user_id = user.id
user_datasets = await get_datasets(user_id)
for dataset in user_datasets:
await datasets.delete_dataset(dataset.id, user_id)

View file

@ -9,12 +9,13 @@ from fastapi.encoders import jsonable_encoder
from fastapi import HTTPException, Query, Depends
from fastapi.responses import JSONResponse, FileResponse
from cognee import datasets
from cognee.api.DTO import InDTO, OutDTO
from cognee.infrastructure.databases.relational import get_relational_engine
from cognee.modules.data.methods import get_authorized_existing_datasets
from cognee.modules.data.methods import create_dataset, get_datasets_by_name
from cognee.shared.logging_utils import get_logger
from cognee.api.v1.exceptions import DataNotFoundError, DatasetNotFoundError
from cognee.api.v1.exceptions import DataNotFoundError
from cognee.modules.users.models import User
from cognee.modules.users.methods import get_authenticated_user
from cognee.modules.users.permissions.methods import (
@ -175,6 +176,20 @@ def get_datasets_router() -> APIRouter:
detail=f"Error creating dataset: {str(error)}",
) from error
@router.delete("")
async def delete_all(user: User = Depends(get_authenticated_user)):
"""
Delete all user's data.
This endpoint permanently deletes all datasets that user created and all its associated data.
The user must have delete permissions on the dataset to perform this operation.
## Response
No content returned on successful deletion.
If no datasets exist for the users, nothing happens.
"""
await datasets.delete_all(user.id)
@router.delete(
"/{dataset_id}", response_model=None, responses={404: {"model": ErrorResponseDTO}}
)
@ -204,14 +219,7 @@ def get_datasets_router() -> APIRouter:
},
)
from cognee.modules.data.methods import get_dataset, delete_dataset
dataset = await get_dataset(user.id, dataset_id)
if dataset is None:
raise DatasetNotFoundError(message=f"Dataset ({str(dataset_id)}) not found.")
await delete_dataset(dataset)
await datasets.delete_dataset(dataset_id, user.id)
@router.delete(
"/{dataset_id}/data/{data_id}",
@ -249,21 +257,7 @@ def get_datasets_router() -> APIRouter:
},
)
from cognee.modules.data.methods import get_data, delete_data
from cognee.modules.data.methods import get_dataset
# Check if user has permission to access dataset and data by trying to get the dataset
dataset = await get_dataset(user.id, dataset_id)
if dataset is None:
raise DatasetNotFoundError(message=f"Dataset ({str(dataset_id)}) not found.")
data = await get_data(user.id, data_id)
if data is None:
raise DataNotFoundError(message=f"Data ({str(data_id)}) not found.")
await delete_data(data)
await datasets.delete_data(dataset_id, data_id, user.id)
@router.get("/{dataset_id}/graph", response_model=GraphDTO)
async def get_dataset_graph(dataset_id: UUID, user: User = Depends(get_authenticated_user)):

View file

@ -1 +0,0 @@
from .delete import delete

View file

@ -1,269 +0,0 @@
from uuid import UUID
from sqlalchemy import select
from sqlalchemy.sql import delete as sql_delete
from cognee.infrastructure.engine import DataPoint
from cognee.infrastructure.databases.graph import get_graph_engine
from cognee.modules.users.models import User
from cognee.infrastructure.databases.vector import get_vector_engine
from cognee.infrastructure.databases.relational import get_relational_engine
from cognee.shared.logging_utils import get_logger
from cognee.modules.data.models import Data, DatasetData, Dataset
from cognee.modules.graph.utils.convert_node_to_data_point import get_all_subclasses
from cognee.modules.users.methods import get_default_user
from cognee.modules.data.methods import get_authorized_existing_datasets
from cognee.context_global_variables import set_database_global_context_variables
from cognee.api.v1.exceptions import (
DocumentNotFoundError,
DatasetNotFoundError,
DocumentSubgraphNotFoundError,
)
logger = get_logger()
async def delete(
data_id: UUID,
dataset_id: UUID,
mode: str = "soft",
user: User = None,
):
"""Delete data by its ID from the specified dataset.
Args:
data_id: The UUID of the data to delete
dataset_id: The UUID of the dataset containing the data
mode: "soft" (default) or "hard" - hard mode also deletes degree-one entity nodes
user: User doing the operation, if none default user will be used.
Returns:
Dict with deletion results
Raises:
DocumentNotFoundError: If data is not found
DatasetNotFoundError: If dataset is not found
PermissionDeniedError: If user doesn't have delete permission on dataset
"""
if user is None:
user = await get_default_user()
# Verify user has delete permission on the dataset
dataset_list = await get_authorized_existing_datasets([dataset_id], "delete", user)
if not dataset_list:
raise DatasetNotFoundError(f"Dataset not found or access denied: {dataset_id}")
dataset = dataset_list[0]
# Will only be used if ENABLE_BACKEND_ACCESS_CONTROL is set to True
await set_database_global_context_variables(dataset.id, dataset.owner_id)
# Get the data record and verify it exists and belongs to the dataset
db_engine = get_relational_engine()
async with db_engine.get_async_session() as session:
# Check if data exists
data_point = (
await session.execute(select(Data).filter(Data.id == data_id))
).scalar_one_or_none()
if data_point is None:
raise DocumentNotFoundError(f"Data not found with ID: {data_id}")
# Check if data belongs to the specified dataset
dataset_data_link = (
await session.execute(
select(DatasetData).filter(
DatasetData.data_id == data_id, DatasetData.dataset_id == dataset_id
)
)
).scalar_one_or_none()
if dataset_data_link is None:
raise DocumentNotFoundError(f"Data {data_id} not found in dataset {dataset_id}")
# Get the content hash for deletion
data_id = str(data_point.id)
# Use the existing comprehensive deletion logic
return await delete_single_document(data_id, dataset.id, mode)
async def delete_single_document(data_id: str, dataset_id: UUID = None, mode: str = "soft"):
"""Delete a single document by its content hash."""
# Delete from graph database
deletion_result = await delete_document_subgraph(data_id, mode)
logger.info(f"Deletion result: {deletion_result}")
# Get the deleted node IDs and convert to UUID
deleted_node_ids = []
for node_id in deletion_result["deleted_node_ids"]:
try:
# Handle both string and UUID formats
if isinstance(node_id, str):
# Remove any hyphens if present
node_id = node_id.replace("-", "")
deleted_node_ids.append(UUID(node_id))
else:
deleted_node_ids.append(node_id)
except Exception as e:
logger.error(f"Error converting node ID {node_id} to UUID: {e}")
continue
# Delete from vector database
vector_engine = get_vector_engine()
# Determine vector collections dynamically
subclasses = get_all_subclasses(DataPoint)
vector_collections = []
for subclass in subclasses:
index_fields = subclass.model_fields["metadata"].default.get("index_fields", [])
for field_name in index_fields:
vector_collections.append(f"{subclass.__name__}_{field_name}")
# If no collections found, use default collections
if not vector_collections:
vector_collections = [
"DocumentChunk_text",
"EdgeType_relationship_name",
"EntityType_name",
"Entity_name",
"TextDocument_name",
"TextSummary_text",
]
# Delete records from each vector collection that exists
for collection in vector_collections:
if await vector_engine.has_collection(collection):
await vector_engine.delete_data_points(
collection, [str(node_id) for node_id in deleted_node_ids]
)
# Delete from relational database
db_engine = get_relational_engine()
async with db_engine.get_async_session() as session:
# Update graph_relationship_ledger with deleted_at timestamps
from sqlalchemy import update, and_, or_
from datetime import datetime
from cognee.modules.data.models.graph_relationship_ledger import GraphRelationshipLedger
update_stmt = (
update(GraphRelationshipLedger)
.where(
or_(
GraphRelationshipLedger.source_node_id.in_(deleted_node_ids),
GraphRelationshipLedger.destination_node_id.in_(deleted_node_ids),
)
)
.values(deleted_at=datetime.now())
)
await session.execute(update_stmt)
# Get the data point
data_point = (
await session.execute(select(Data).filter(Data.id == UUID(data_id)))
).scalar_one_or_none()
if data_point is None:
raise DocumentNotFoundError(
f"Document not found in relational DB with data id: {data_id}"
)
doc_id = data_point.id
# Get the dataset
dataset = (
await session.execute(select(Dataset).filter(Dataset.id == dataset_id))
).scalar_one_or_none()
if dataset is None:
raise DatasetNotFoundError(f"Dataset not found: {dataset_id}")
# Delete from dataset_data table
dataset_delete_stmt = sql_delete(DatasetData).where(
DatasetData.data_id == doc_id, DatasetData.dataset_id == dataset.id
)
await session.execute(dataset_delete_stmt)
# Check if the document is in any other datasets
remaining_datasets = (
await session.execute(select(DatasetData).filter(DatasetData.data_id == doc_id))
).scalar_one_or_none()
# If the document is not in any other datasets, delete it from the data table
if remaining_datasets is None:
data_delete_stmt = sql_delete(Data).where(Data.id == doc_id)
await session.execute(data_delete_stmt)
await session.commit()
return {
"status": "success",
"message": "Document deleted from both graph and relational databases",
"graph_deletions": deletion_result["deleted_counts"],
"data_id": data_id,
"dataset": dataset_id,
"deleted_node_ids": [
str(node_id) for node_id in deleted_node_ids
], # Convert back to strings for response
}
async def delete_document_subgraph(document_id: str, mode: str = "soft"):
"""Delete a document and all its related nodes in the correct order."""
graph_db = await get_graph_engine()
subgraph = await graph_db.get_document_subgraph(document_id)
if not subgraph:
raise DocumentSubgraphNotFoundError(f"Document not found with id: {document_id}")
# Delete in the correct order to maintain graph integrity
deletion_order = [
("orphan_entities", "orphaned entities"),
("orphan_types", "orphaned entity types"),
(
"made_from_nodes",
"made_from nodes",
), # Move before chunks since summaries are connected to chunks
("chunks", "document chunks"),
("document", "document"),
]
deleted_counts = {}
deleted_node_ids = []
for key, description in deletion_order:
nodes = subgraph[key]
if nodes:
for node in nodes:
node_id = node["id"]
await graph_db.delete_node(node_id)
deleted_node_ids.append(node_id)
deleted_counts[description] = len(nodes)
# If hard mode, also delete degree-one nodes
if mode == "hard":
# Get and delete degree one entity nodes
degree_one_entity_nodes = await graph_db.get_degree_one_nodes("Entity")
for node in degree_one_entity_nodes:
await graph_db.delete_node(node["id"])
deleted_node_ids.append(node["id"])
deleted_counts["degree_one_entities"] = deleted_counts.get("degree_one_entities", 0) + 1
# Get and delete degree one entity types
degree_one_entity_types = await graph_db.get_degree_one_nodes("EntityType")
for node in degree_one_entity_types:
await graph_db.delete_node(node["id"])
deleted_node_ids.append(node["id"])
deleted_counts["degree_one_types"] = deleted_counts.get("degree_one_types", 0) + 1
return {
"status": "success",
"deleted_counts": deleted_counts,
"document_id": document_id,
"deleted_node_ids": deleted_node_ids,
}

View file

@ -1 +0,0 @@
from .get_delete_router import get_delete_router

View file

@ -1,60 +0,0 @@
from fastapi import Depends
from fastapi.responses import JSONResponse
from fastapi import APIRouter
from uuid import UUID
from cognee.shared.logging_utils import get_logger
from cognee.modules.users.models import User
from cognee.modules.users.methods import get_authenticated_user
from cognee.shared.utils import send_telemetry
logger = get_logger()
def get_delete_router() -> APIRouter:
router = APIRouter()
@router.delete("", response_model=None)
async def delete(
data_id: UUID,
dataset_id: UUID,
mode: str = "soft",
user: User = Depends(get_authenticated_user),
):
"""Delete data by its ID from the specified dataset.
Args:
data_id: The UUID of the data to delete
dataset_id: The UUID of the dataset containing the data
mode: "soft" (default) or "hard" - hard mode also deletes degree-one entity nodes
user: Authenticated user
Returns:
JSON response indicating success or failure
"""
send_telemetry(
"Delete API Endpoint Invoked",
user.id,
additional_properties={
"endpoint": "DELETE /v1/delete",
"dataset_id": str(dataset_id),
"data_id": str(data_id),
},
)
from cognee.api.v1.delete import delete as cognee_delete
try:
result = await cognee_delete(
data_id=data_id,
dataset_id=dataset_id,
mode=mode,
user=user,
)
return result
except Exception as error:
logger.error(f"Error during deletion by data_id: {str(error)}")
return JSONResponse(status_code=409, content={"error": str(error)})
return router

View file

@ -2,9 +2,9 @@ from uuid import UUID
from typing import Union, BinaryIO, List, Optional
from cognee.modules.users.models import User
from cognee.api.v1.delete import delete
from cognee.api.v1.add import add
from cognee.api.v1.cognify import cognify
from cognee.api.v1.datasets import datasets
async def update(
@ -72,10 +72,10 @@ async def update(
- Processing status and any errors
- Execution timestamps and metadata
"""
await delete(
data_id=data_id,
await datasets.delete_data(
dataset_id=dataset_id,
user=user,
data_id=data_id,
user_id=user.id,
)
await add(

View file

@ -1,11 +1,11 @@
import argparse
import asyncio
from typing import Optional
from cognee.cli.reference import SupportsCliCommand
from cognee.cli import DEFAULT_DOCS_URL
import cognee.cli.echo as fmt
from cognee.cli.exceptions import CliCommandException, CliCommandInnerException
from cognee.modules.data.methods import get_datasets_by_name
from cognee.modules.data.methods.get_deletion_counts import get_deletion_counts
@ -38,8 +38,10 @@ Be careful with deletion operations as they are irreversible.
import cognee
# Validate arguments
if not any([args.dataset_name, args.user_id, args.all]):
fmt.error("Please specify what to delete: --dataset-name, --user-id, or --all")
if not any([args.dataset_name, args.dataset_id, args.data_id, args.user_id, args.all]):
fmt.error(
"Please specify what to delete: --dataset-name, --dataset-id, --data-id, --user-id, or --all"
)
return
# If --force is used, skip the preview and go straight to deletion
@ -93,12 +95,29 @@ Be careful with deletion operations as they are irreversible.
# Run the async delete function
async def run_delete():
try:
# NOTE: The underlying cognee.delete() function is currently not working as expected.
# This is a separate bug that this preview feature helps to expose.
if args.all:
await cognee.delete(dataset_name=None, user_id=args.user_id)
else:
await cognee.delete(dataset_name=args.dataset_name, user_id=args.user_id)
await cognee.datasets.delete_all(user_id=args.user_id)
elif args.dataset_name or args.dataset_id:
dataset_id = args.dataset_id
if args.dataset_name and not args.dataset_id:
datasets = await get_datasets_by_name(
args.dataset_name, user_id=args.user_id
)
if not datasets:
raise CliCommandException(
f"No dataset found for name '{args.dataset_name}'."
)
dataset = datasets[0]
dataset_id = dataset.id
await cognee.datasets.delete_dataset(
dataset_id=dataset_id, user_id=args.user_id
)
elif args.dataset_id and args.data_id:
await cognee.datasets.delete_data(args.dataset_id, args.data_id)
except Exception as e:
raise CliCommandInnerException(f"Failed to delete: {str(e)}") from e

View file

@ -41,7 +41,7 @@ class Data(Base):
"Dataset",
secondary=DatasetData.__tablename__,
back_populates="data",
lazy="noload",
lazy="selectin",
cascade="all, delete",
)

View file

@ -1,40 +0,0 @@
from datetime import datetime, timezone
from uuid import uuid5, NAMESPACE_OID
from sqlalchemy import UUID, Column, DateTime, String, Index
from cognee.infrastructure.databases.relational import Base
class GraphRelationshipLedger(Base):
__tablename__ = "graph_relationship_ledger"
id = Column(
UUID,
primary_key=True,
default=lambda: uuid5(NAMESPACE_OID, f"{datetime.now(timezone.utc).timestamp()}"),
)
source_node_id = Column(UUID, nullable=False)
destination_node_id = Column(UUID, nullable=False)
creator_function = Column(String, nullable=False)
node_label = Column(String, nullable=True)
created_at = Column(DateTime(timezone=True), default=lambda: datetime.now(timezone.utc))
deleted_at = Column(DateTime(timezone=True), nullable=True)
user_id = Column(UUID, nullable=True)
# Create indexes
__table_args__ = (
Index("idx_graph_relationship_id", "id"),
Index("idx_graph_relationship_ledger_source_node_id", "source_node_id"),
Index("idx_graph_relationship_ledger_destination_node_id", "destination_node_id"),
)
def to_json(self) -> dict:
return {
"id": str(self.id),
"source_node_id": str(self.parent_id),
"destination_node_id": str(self.child_id),
"creator_function": self.creator_function,
"created_at": self.created_at.isoformat(),
"deleted_at": self.deleted_at.isoformat() if self.deleted_at else None,
"user_id": str(self.user_id),
}

View file

@ -1,8 +1,16 @@
from .get_formatted_graph_data import get_formatted_graph_data
from .upsert_edges import upsert_edges
from .upsert_nodes import upsert_nodes
from .get_data_related_nodes import get_data_related_nodes
from .get_data_related_edges import get_data_related_edges
from .delete_data_related_nodes import delete_data_related_nodes
from .delete_data_related_edges import delete_data_related_edges
from .get_data_related_edges import get_data_related_edges
from .delete_data_nodes_and_edges import delete_data_nodes_and_edges
from .get_dataset_related_nodes import get_dataset_related_nodes
from .get_dataset_related_edges import get_dataset_related_edges
from .delete_dataset_related_nodes import delete_dataset_related_nodes
from .delete_dataset_related_edges import delete_dataset_related_edges
from .delete_dataset_nodes_and_edges import delete_dataset_nodes_and_edges

View file

@ -0,0 +1,43 @@
from uuid import UUID
from typing import Dict, List
from cognee.infrastructure.databases.graph import get_graph_engine
from cognee.infrastructure.databases.vector.get_vector_engine import get_vector_engine
from cognee.modules.engine.utils import generate_edge_id
from cognee.modules.graph.methods import (
delete_dataset_related_edges,
delete_dataset_related_nodes,
get_dataset_related_nodes,
get_dataset_related_edges,
)
async def delete_dataset_nodes_and_edges(dataset_id: UUID) -> None:
affected_nodes = await get_dataset_related_nodes(dataset_id)
graph_engine = await get_graph_engine()
await graph_engine.delete_nodes([str(node.slug) for node in affected_nodes])
affected_vector_collections: Dict[str, List] = {}
for node in affected_nodes:
for indexed_field in node.indexed_fields:
collection_name = f"{node.type}_{indexed_field}"
if collection_name not in affected_vector_collections:
affected_vector_collections[collection_name] = []
affected_vector_collections[collection_name].append(node)
vector_engine = get_vector_engine()
for affected_collection, affected_nodes in affected_vector_collections.items():
await vector_engine.delete_data_points(
affected_collection, [node.id for node in affected_nodes]
)
affected_relationships = await get_dataset_related_edges(dataset_id)
await vector_engine.delete_data_points(
"EdgeType_relationship_name",
[generate_edge_id(edge.relationship_name) for edge in affected_relationships],
)
await delete_dataset_related_nodes(dataset_id)
await delete_dataset_related_edges(dataset_id)

View file

@ -0,0 +1,13 @@
from uuid import UUID
from sqlalchemy import delete, select
from sqlalchemy.ext.asyncio import AsyncSession
from cognee.infrastructure.databases.relational import with_async_session
from cognee.modules.graph.models import Edge
@with_async_session
async def delete_dataset_related_edges(dataset_id: UUID, session: AsyncSession):
nodes = (await session.scalars(select(Edge).where(Edge.dataset_id == dataset_id))).all()
await session.execute(delete(Edge).where(Edge.id.in_([node.id for node in nodes])))

View file

@ -0,0 +1,13 @@
from uuid import UUID
from sqlalchemy import delete, select
from sqlalchemy.ext.asyncio import AsyncSession
from cognee.infrastructure.databases.relational import with_async_session
from cognee.modules.graph.models import Node
@with_async_session
async def delete_dataset_related_nodes(dataset_id: UUID, session: AsyncSession):
nodes = (await session.scalars(select(Node).where(Node.dataset_id == dataset_id))).all()
await session.execute(delete(Node).where(Node.id.in_([node.id for node in nodes])))

View file

@ -0,0 +1,15 @@
from uuid import UUID
from sqlalchemy import select
from sqlalchemy.ext.asyncio import AsyncSession
from cognee.infrastructure.databases.relational import with_async_session
from cognee.modules.graph.models import Edge
@with_async_session
async def get_dataset_related_edges(dataset_id: UUID, session: AsyncSession):
return (
await session.scalars(
select(Edge).where(Edge.dataset_id == dataset_id).distinct(Edge.relationship_name)
)
).all()

View file

@ -0,0 +1,14 @@
from uuid import UUID
from sqlalchemy import select
from sqlalchemy.ext.asyncio import AsyncSession
from cognee.infrastructure.databases.relational import with_async_session
from cognee.modules.graph.models import Node
@with_async_session
async def get_dataset_related_nodes(dataset_id: UUID, session: AsyncSession):
query_statement = select(Node).where(Node.dataset_id == dataset_id)
data_related_nodes = await session.scalars(query_statement)
return data_related_nodes.all()

View file

@ -20,7 +20,7 @@ class Edge(Base):
data_id: Mapped[UUID] = mapped_column(UUID(as_uuid=True), index=True, nullable=False)
dataset_id: Mapped[UUID] = mapped_column(UUID(as_uuid=True), nullable=False)
dataset_id: Mapped[UUID] = mapped_column(UUID(as_uuid=True), index=True, nullable=False)
source_node_id: Mapped[UUID] = mapped_column(UUID(as_uuid=True), nullable=False)
destination_node_id: Mapped[UUID] = mapped_column(UUID(as_uuid=True), nullable=False)

View file

@ -23,7 +23,7 @@ class Node(Base):
data_id: Mapped[UUID] = mapped_column(UUID(as_uuid=True), nullable=False)
dataset_id: Mapped[UUID] = mapped_column(UUID(as_uuid=True), nullable=False)
dataset_id: Mapped[UUID] = mapped_column(UUID(as_uuid=True), index=True, nullable=False)
label: Mapped[str] = mapped_column(String(255))
type: Mapped[str] = mapped_column(String(255), nullable=False)

View file

@ -1,54 +0,0 @@
from datetime import datetime, timezone
from uuid import UUID
from sqlalchemy.ext.asyncio import AsyncSession
from cognee.modules.data.models import graph_relationship_ledger
from cognee.modules.users.models import User
async def create_relationship(
session: AsyncSession,
source_node_id: UUID,
destination_node_id: UUID,
creator_function: str,
user: User,
) -> None:
"""Create a relationship between two nodes in the graph.
Args:
session: Database session
source_node_id: ID of the source node
destination_node_id: ID of the destination node
creator_function: Name of the function creating the relationship
user: User creating the relationship
"""
relationship = graph_relationship_ledger(
source_node_id=source_node_id,
destination_node_id=destination_node_id,
creator_function=creator_function,
user_id=user.id,
)
session.add(relationship)
await session.flush()
async def delete_relationship(
session: AsyncSession,
source_node_id: UUID,
destination_node_id: UUID,
user: User,
) -> None:
"""Mark a relationship as deleted.
Args:
session: Database session
source_node_id: ID of the source node
destination_node_id: ID of the destination node
user: User deleting the relationship
"""
relationship = await session.get(
graph_relationship_ledger, (source_node_id, destination_node_id)
)
if relationship:
relationship.deleted_at = datetime.now(timezone.utc)
session.add(relationship)
await session.flush()

View file

@ -1,7 +1,8 @@
import os
from typing import List
async def get_non_py_files(repo_path):
async def get_non_py_files(repo_data: List):
"""
Get files that are not .py files and their contents.
@ -13,13 +14,15 @@ async def get_non_py_files(repo_path):
Parameters:
-----------
- repo_path: The file system path to the repository to scan for non-Python files.
- repo_data: The data object containing file system path to the repository to scan for non-Python files.
Returns:
--------
A list of file paths that are not Python files and meet the specified criteria.
"""
repo_path = repo_data[0].repo_path
if not os.path.exists(repo_path):
return {}

View file

@ -134,7 +134,7 @@ def run_coroutine(coroutine_func, *args, **kwargs):
async def get_repo_file_dependencies(
repo_path: str,
repo_data: List,
detailed_extraction: bool = False,
supported_languages: list = None,
excluded_paths: Optional[List[str]] = None,
@ -156,8 +156,7 @@ async def get_repo_file_dependencies(
- supported_languages (list | None): Subset of languages to include; if None, use defaults.
"""
if isinstance(repo_path, list) and len(repo_path) == 1:
repo_path = repo_path[0]
repo_path: str = repo_data[0].repo_path
if not os.path.exists(repo_path):
raise FileNotFoundError(f"Repository path {repo_path} does not exist.")

View file

@ -1,316 +0,0 @@
import os
import pathlib
import cognee
from uuid import uuid4
from cognee.modules.users.exceptions import PermissionDeniedError
from cognee.shared.logging_utils import get_logger
from cognee.modules.users.methods import get_default_user, create_user
from cognee.modules.users.permissions.methods import authorized_give_permission_on_datasets
from cognee.modules.data.methods import get_dataset_data, get_datasets_by_name
from cognee.api.v1.exceptions import DocumentNotFoundError, DatasetNotFoundError
logger = get_logger()
async def main():
# Enable permissions feature
os.environ["ENABLE_BACKEND_ACCESS_CONTROL"] = "True"
# Clean up test directories before starting
data_directory_path = str(
pathlib.Path(
os.path.join(pathlib.Path(__file__).parent, ".data_storage/test_delete_by_id")
).resolve()
)
cognee_directory_path = str(
pathlib.Path(
os.path.join(pathlib.Path(__file__).parent, ".cognee_system/test_delete_by_id")
).resolve()
)
cognee.config.data_root_directory(data_directory_path)
cognee.config.system_root_directory(cognee_directory_path)
await cognee.prune.prune_data()
await cognee.prune.prune_system(metadata=True)
# Setup database and tables
from cognee.modules.engine.operations.setup import setup
await setup()
print("🧪 Testing Delete by ID and Dataset Data Endpoints")
print("=" * 60)
# Get the default user first
default_user = await get_default_user()
# Test data
text_1 = """
Apple Inc. is an American multinational technology company that specializes in consumer electronics,
software, and online services. Apple is the world's largest technology company by revenue and,
since January 2021, the world's most valuable company.
"""
text_2 = """
Microsoft Corporation is an American multinational technology corporation which produces computer software,
consumer electronics, personal computers, and related services. Its best known software products are the
Microsoft Windows line of operating systems and the Microsoft Office suite.
"""
text_3 = """
Google LLC is an American multinational technology company that specializes in Internet-related services and products,
which include online advertising technologies, search engine, cloud computing, software, and hardware. Google has been
referred to as the most powerful company in the world and one of the world's most valuable brands.
"""
# Test 1: Setup data and datasets
print("\n📝 Test 1: Setting up test data and datasets")
# Add data for default user
await cognee.add([text_1], dataset_name="tech_companies_1", user=default_user)
# Create test user first for the second dataset
test_user = await create_user("test_user_delete@gmail.com", "test@example.com")
# Add data for test user
await cognee.add([text_2], dataset_name="tech_companies_2", user=test_user)
# Create third user for isolation testing
isolation_user = await create_user("isolation_user@gmail.com", "isolation@example.com")
# Add data for isolation user (should remain unaffected by other deletions)
await cognee.add([text_3], dataset_name="tech_companies_3", user=isolation_user)
tst = await cognee.cognify(["tech_companies_1"], user=default_user)
tst2 = await cognee.cognify(["tech_companies_2"], user=test_user)
tst3 = await cognee.cognify(["tech_companies_3"], user=isolation_user)
print("tst", tst)
print("tst2", tst2)
print("tst3", tst3)
# Extract dataset_ids from cognify results
def extract_dataset_id_from_cognify(cognify_result):
"""Extract dataset_id from cognify output dictionary"""
for dataset_id, pipeline_result in cognify_result.items():
return dataset_id # Return the first (and likely only) dataset_id
return None
# Get dataset IDs from cognify results
dataset_id_1 = extract_dataset_id_from_cognify(tst)
dataset_id_2 = extract_dataset_id_from_cognify(tst2)
dataset_id_3 = extract_dataset_id_from_cognify(tst3)
print(f"📋 Extracted dataset_id from tst: {dataset_id_1}")
print(f"📋 Extracted dataset_id from tst2: {dataset_id_2}")
print(f"📋 Extracted dataset_id from tst3: {dataset_id_3}")
# Get dataset data for deletion testing
dataset_data_1 = await get_dataset_data(dataset_id_1)
dataset_data_2 = await get_dataset_data(dataset_id_2)
dataset_data_3 = await get_dataset_data(dataset_id_3)
print(f"📊 Dataset 1 contains {len(dataset_data_1)} data items")
print(f"📊 Dataset 2 contains {len(dataset_data_2)} data items")
print(f"📊 Dataset 3 (isolation) contains {len(dataset_data_3)} data items")
# Test 2: Get data to delete from the extracted datasets
print("\n📝 Test 2: Preparing data for deletion from cognify results")
# Use the first data item from each dataset for testing
data_to_delete_id = dataset_data_1[0].id if dataset_data_1 else None
data_to_delete_from_test_user = dataset_data_2[0].id if dataset_data_2 else None
# Create datasets objects for testing
from cognee.modules.data.models import Dataset
default_dataset = Dataset(id=dataset_id_1, name="tech_companies_1", owner_id=default_user.id)
# Create dataset object for permission testing (test_user already created above)
test_dataset = Dataset(id=dataset_id_2, name="tech_companies_2", owner_id=test_user.id)
print(f"🔍 Data to delete ID: {data_to_delete_id}")
print(f"🔍 Test user data ID: {data_to_delete_from_test_user}")
print("\n📝 Test 3: Testing delete endpoint with proper permissions")
try:
result = await cognee.delete(data_id=data_to_delete_id, dataset_id=default_dataset.id)
print("✅ Delete successful for data owner")
assert result["status"] == "success", "Delete should succeed for data owner"
except Exception as e:
print(f"❌ Unexpected error in delete test: {e}")
raise
# Test 4: Test delete without permissions (should fail)
print("\n📝 Test 4: Testing delete endpoint without permissions")
delete_permission_error = False
try:
await cognee.delete(
data_id=data_to_delete_from_test_user,
dataset_id=test_dataset.id,
user=default_user, # Wrong user - should fail
)
except (PermissionDeniedError, DatasetNotFoundError):
delete_permission_error = True
print("✅ Delete correctly denied for user without permission")
except Exception as e:
print(f"❌ Unexpected error type: {e}")
assert delete_permission_error, "Delete should fail for user without permission"
# Test 5: Test delete with non-existent data_id
print("\n📝 Test 5: Testing delete endpoint with non-existent data_id")
non_existent_data_id = uuid4()
data_not_found_error = False
try:
await cognee.delete(
data_id=non_existent_data_id, dataset_id=default_dataset.id, user=default_user
)
except DocumentNotFoundError:
data_not_found_error = True
print("✅ Delete correctly failed for non-existent data_id")
except Exception as e:
print(f"❌ Unexpected error type: {e}")
assert data_not_found_error, "Delete should fail for non-existent data_id"
# Test 6: Test delete with non-existent dataset_id
print("\n📝 Test 6: Testing delete endpoint with non-existent dataset_id")
non_existent_dataset_id = uuid4()
dataset_not_found_error = False
try:
await cognee.delete(
data_id=data_to_delete_from_test_user,
dataset_id=non_existent_dataset_id,
user=test_user,
)
except (DatasetNotFoundError, PermissionDeniedError):
dataset_not_found_error = True
print("✅ Delete correctly failed for non-existent dataset_id")
except Exception as e:
print(f"❌ Unexpected error type: {e}")
assert dataset_not_found_error, "Delete should fail for non-existent dataset_id"
# Test 7: Test delete with data that doesn't belong to the dataset
print("\n📝 Test 7: Testing delete endpoint with data not in specified dataset")
# Add more data to create a scenario where data exists but not in the specified dataset
await cognee.add([text_1], dataset_name="another_dataset", user=default_user)
await cognee.cognify(["another_dataset"], user=default_user)
another_datasets = await get_datasets_by_name(["another_dataset"], default_user.id)
another_dataset = another_datasets[0]
data_not_in_dataset_error = False
try:
# Try to delete data from test_user's dataset using default_user's data_id
await cognee.delete(
data_id=data_to_delete_from_test_user, # This data belongs to test_user's dataset
dataset_id=another_dataset.id, # But we're specifying default_user's other dataset
user=default_user,
)
except DocumentNotFoundError:
data_not_in_dataset_error = True
print("✅ Delete correctly failed for data not in specified dataset")
except Exception as e:
print(f"❌ Unexpected error type: {e}")
assert data_not_in_dataset_error, "Delete should fail when data doesn't belong to dataset"
# Test 8: Test permission granting and delete
print("\n📝 Test 8: Testing delete after granting permissions")
# Give default_user delete permission on test_user's dataset
await authorized_give_permission_on_datasets(
default_user.id,
[test_dataset.id],
"delete",
test_user.id,
)
try:
result = await cognee.delete(
data_id=data_to_delete_from_test_user,
dataset_id=test_dataset.id,
user=default_user, # Now should work with granted permission
)
print("✅ Delete successful after granting permission", result)
assert result["status"] == "success", "Delete should succeed after granting permission"
except Exception as e:
print(f"❌ Unexpected error after granting permission: {e}")
raise
# Test 9: Verify graph database cleanup
print("\n📝 Test 9: Verifying comprehensive deletion (graph, vector, relational)")
from cognee.infrastructure.databases.graph import get_graph_engine
graph_engine = await get_graph_engine()
nodes, edges = await graph_engine.get_graph_data()
# We should still have some nodes/edges from the remaining data, but fewer than before
print(f"✅ Graph database state after deletions - Nodes: {len(nodes)}, Edges: {len(edges)}")
# Test 10: Verify isolation user's data remains untouched
print("\n📝 Test 10: Verifying isolation user's data remains intact")
try:
# Get isolation user's data after all deletions
isolation_dataset_data_after = await get_dataset_data(dataset_id_3)
print(
f"📊 Isolation user's dataset still contains {len(isolation_dataset_data_after)} data items"
)
# Verify data count is unchanged
assert len(isolation_dataset_data_after) == len(dataset_data_3), (
f"Isolation user's data count changed! Expected {len(dataset_data_3)}, got {len(isolation_dataset_data_after)}"
)
# Verify specific data items are still there
original_data_ids = {str(data.id) for data in dataset_data_3}
remaining_data_ids = {str(data.id) for data in isolation_dataset_data_after}
assert original_data_ids == remaining_data_ids, "Isolation user's data IDs have changed!"
# Try to search isolation user's data to ensure it's still accessible
isolation_search_results = await cognee.search(
"Google technology company", user=isolation_user
)
assert len(isolation_search_results) > 0, "Isolation user's data should still be searchable"
print("✅ Isolation user's data completely unaffected by other users' deletions")
print(f" - Data count unchanged: {len(isolation_dataset_data_after)} items")
print(" - All original data IDs preserved")
print(f" - Data still searchable: {len(isolation_search_results)} results")
except Exception as e:
print(f"❌ Error verifying isolation user's data: {e}")
raise
print("\n" + "=" * 60)
print("🎉 All tests passed! Delete by ID endpoint working correctly.")
print("=" * 60)
print("""
📋 SUMMARY OF TESTED FUNCTIONALITY:
Delete endpoint accepts data_id and dataset_id parameters
Permission checking works for delete operations
Proper error handling for non-existent data/datasets
Data ownership validation (data must belong to specified dataset)
Permission granting and revocation works correctly
Comprehensive deletion across all databases (graph, vector, relational)
Dataset data endpoint now checks read permissions properly
Data isolation: Other users' data remains completely unaffected by deletions
""")
if __name__ == "__main__":
import asyncio
asyncio.run(main())

View file

@ -4,6 +4,7 @@ from typing import List
from uuid import uuid4
import cognee
from cognee.api.v1.datasets import datasets
from cognee.infrastructure.engine import DataPoint
from cognee.modules.data.models import Data, Dataset
from cognee.modules.engine.operations.setup import setup
@ -90,12 +91,12 @@ async def main():
"Nodes and edges are not correctly added to the graph."
)
await delete_data_nodes_and_edges(dataset.id, data1.id) # type: ignore
await datasets.delete_data(dataset.id, data1.id, user.id) # type: ignore
nodes, edges = await graph_engine.get_graph_data()
assert len(nodes) == 2 and len(edges) == 1, "Nodes and edges are not deleted properly."
await delete_data_nodes_and_edges(dataset.id, data2.id) # type: ignore
await datasets.delete_data(dataset.id, data2.id, user.id) # type: ignore
nodes, edges = await graph_engine.get_graph_data()
assert len(nodes) == 0 and len(edges) == 0, "Nodes and edges are not deleted."

View file

@ -2,6 +2,7 @@ import os
import pathlib
import cognee
from cognee.api.v1.datasets import datasets
from cognee.api.v1.visualize.visualize import visualize_graph
from cognee.infrastructure.databases.vector import get_vector_engine
from cognee.infrastructure.databases.graph import get_graph_engine
@ -10,6 +11,7 @@ from cognee.modules.engine.operations.setup import setup
from cognee.modules.graph.methods import (
delete_data_nodes_and_edges,
)
from cognee.modules.users.methods import get_default_user
from cognee.shared.logging_utils import get_logger
logger = get_logger()
@ -58,9 +60,8 @@ async def main():
nodes, edges = await graph_engine.get_graph_data()
assert len(nodes) >= 12 and len(edges) >= 18, "Nodes and edges are not deleted."
await delete_data_nodes_and_edges(dataset_id, added_data.id) # type: ignore
await delete_data(added_data)
user = await get_default_user()
await datasets.delete_data(dataset_id, added_data.id, user.id) # type: ignore
file_path = os.path.join(
pathlib.Path(__file__).parent, ".artifacts", "graph_visualization_after_delete.html"

View file

@ -1,85 +0,0 @@
import os
import shutil
import cognee
import pathlib
from cognee.shared.logging_utils import get_logger
from cognee.modules.data.methods import get_dataset_data
logger = get_logger()
async def main():
await cognee.prune.prune_data()
await cognee.prune.prune_system(metadata=True)
pdf_document = os.path.join(
pathlib.Path(__file__).parent, "test_data/artificial-intelligence.pdf"
)
txt_document = os.path.join(
pathlib.Path(__file__).parent, "test_data/Natural_language_processing_copy.txt"
)
audio_document = os.path.join(pathlib.Path(__file__).parent, "test_data/text_to_speech.mp3")
image_document = os.path.join(pathlib.Path(__file__).parent, "test_data/example.png")
unstructured_document = os.path.join(pathlib.Path(__file__).parent, "test_data/example.pptx")
text_document_as_literal = """
1. Audi
Audi is known for its modern designs and advanced technology. Founded in the early 1900s, the brand has earned a reputation for precision engineering and innovation. With features like the Quattro all-wheel-drive system, Audi offers a range of vehicles from stylish sedans to high-performance sports cars.
2. BMW
BMW, short for Bayerische Motoren Werke, is celebrated for its focus on performance and driving pleasure. The company's vehicles are designed to provide a dynamic and engaging driving experience, and their slogan, "The Ultimate Driving Machine," reflects that commitment. BMW produces a variety of cars that combine luxury with sporty performance.
3. Mercedes-Benz
Mercedes-Benz is synonymous with luxury and quality. With a history dating back to the early 20th century, the brand is known for its elegant designs, innovative safety features, and high-quality engineering. Mercedes-Benz manufactures not only luxury sedans but also SUVs, sports cars, and commercial vehicles, catering to a wide range of needs.
4. Porsche
Porsche is a name that stands for high-performance sports cars. Founded in 1931, the brand has become famous for models like the iconic Porsche 911. Porsche cars are celebrated for their speed, precision, and distinctive design, appealing to car enthusiasts who value both performance and style.
5. Volkswagen
Volkswagen, which means "people's car" in German, was established with the idea of making affordable and reliable vehicles accessible to everyone. Over the years, Volkswagen has produced several iconic models, such as the Beetle and the Golf. Today, it remains one of the largest car manufacturers in the world, offering a wide range of vehicles that balance practicality with quality.
Each of these car manufacturer contributes to Germany's reputation as a leader in the global automotive industry, showcasing a blend of innovation, performance, and design excellence.
"""
# Add documents and get dataset information
add_result = await cognee.add(
[
pdf_document,
txt_document,
text_document_as_literal,
unstructured_document,
audio_document,
image_document,
]
)
dataset_id = add_result.dataset_id
await cognee.cognify()
from cognee.infrastructure.databases.graph import get_graph_engine
graph_engine = await get_graph_engine()
nodes, edges = await graph_engine.get_graph_data()
assert len(nodes) > 10 and len(edges) > 10, "Graph database is not loaded."
# Get the data IDs from the dataset
dataset_data = await get_dataset_data(dataset_id)
assert len(dataset_data) > 0, "Dataset should contain data"
# Delete each document using its ID
for data_item in dataset_data:
await cognee.delete(data_item.id, dataset_id, mode="hard")
nodes, edges = await graph_engine.get_graph_data()
assert len(nodes) == 0 and len(edges) == 0, "Document is not deleted with hard delete."
if __name__ == "__main__":
import asyncio
asyncio.run(main())

View file

@ -1,85 +0,0 @@
import os
import shutil
import cognee
import pathlib
from cognee.shared.logging_utils import get_logger
from cognee.modules.data.methods import get_dataset_data
logger = get_logger()
async def main():
await cognee.prune.prune_data()
await cognee.prune.prune_system(metadata=True)
pdf_document = os.path.join(
pathlib.Path(__file__).parent, "test_data/artificial-intelligence.pdf"
)
txt_document = os.path.join(
pathlib.Path(__file__).parent, "test_data/Natural_language_processing_copy.txt"
)
audio_document = os.path.join(pathlib.Path(__file__).parent, "test_data/text_to_speech.mp3")
image_document = os.path.join(pathlib.Path(__file__).parent, "test_data/example.png")
unstructured_document = os.path.join(pathlib.Path(__file__).parent, "test_data/example.pptx")
text_document_as_literal = """
1. Audi
Audi is known for its modern designs and advanced technology. Founded in the early 1900s, the brand has earned a reputation for precision engineering and innovation. With features like the Quattro all-wheel-drive system, Audi offers a range of vehicles from stylish sedans to high-performance sports cars.
2. BMW
BMW, short for Bayerische Motoren Werke, is celebrated for its focus on performance and driving pleasure. The company's vehicles are designed to provide a dynamic and engaging driving experience, and their slogan, "The Ultimate Driving Machine," reflects that commitment. BMW produces a variety of cars that combine luxury with sporty performance.
3. Mercedes-Benz
Mercedes-Benz is synonymous with luxury and quality. With a history dating back to the early 20th century, the brand is known for its elegant designs, innovative safety features, and high-quality engineering. Mercedes-Benz manufactures not only luxury sedans but also SUVs, sports cars, and commercial vehicles, catering to a wide range of needs.
4. Porsche
Porsche is a name that stands for high-performance sports cars. Founded in 1931, the brand has become famous for models like the iconic Porsche 911. Porsche cars are celebrated for their speed, precision, and distinctive design, appealing to car enthusiasts who value both performance and style.
5. Volkswagen
Volkswagen, which means "people's car" in German, was established with the idea of making affordable and reliable vehicles accessible to everyone. Over the years, Volkswagen has produced several iconic models, such as the Beetle and the Golf. Today, it remains one of the largest car manufacturers in the world, offering a wide range of vehicles that balance practicality with quality.
Each of these car manufacturer contributes to Germany's reputation as a leader in the global automotive industry, showcasing a blend of innovation, performance, and design excellence.
"""
# Add documents and get dataset information
add_result = await cognee.add(
[
pdf_document,
txt_document,
text_document_as_literal,
unstructured_document,
audio_document,
image_document,
]
)
dataset_id = add_result.dataset_id
await cognee.cognify()
from cognee.infrastructure.databases.graph import get_graph_engine
graph_engine = await get_graph_engine()
nodes, edges = await graph_engine.get_graph_data()
assert len(nodes) > 10 and len(edges) > 10, "Graph database is not loaded."
# Get the data IDs from the dataset
dataset_data = await get_dataset_data(dataset_id)
assert len(dataset_data) > 0, "Dataset should contain data"
# Delete each document using its ID
for data_item in dataset_data:
await cognee.delete(data_item.id, dataset_id, mode="soft")
nodes, edges = await graph_engine.get_graph_data()
assert len(nodes) == 0 and len(edges) == 0, "Document is not deleted with soft delete."
if __name__ == "__main__":
import asyncio
asyncio.run(main())

View file

@ -188,8 +188,10 @@ async def main():
test_user_dataset_data = await get_dataset_data(test_user_dataset_id)
text_data_id = test_user_dataset_data[0].id
await cognee.delete(
data_id=text_data_id, dataset_id=test_user_dataset_id, user=default_user
await cognee.datasets.delete_data(
dataset_id=test_user_dataset_id,
data_id=text_data_id,
user_id=default_user.id,
)
except PermissionDeniedError:
delete_error = True
@ -201,7 +203,9 @@ async def main():
test_user_dataset_data = await get_dataset_data(test_user_dataset_id)
text_data_id = test_user_dataset_data[0].id
await cognee.delete(data_id=text_data_id, dataset_id=test_user_dataset_id, user=test_user)
await cognee.datasets.delete_data(
dataset_id=test_user_dataset_id, data_id=text_data_id, user_id=test_user.id
)
# Actually give permission to default_user to delete data for test_users dataset
await authorized_give_permission_on_datasets(
@ -216,8 +220,10 @@ async def main():
test_user_dataset_data = await get_dataset_data(test_user_dataset_id)
explanation_file_data_id = test_user_dataset_data[0].id
await cognee.delete(
data_id=explanation_file_data_id, dataset_id=test_user_dataset_id, user=default_user
await cognee.datasets.delete_data(
dataset_id=test_user_dataset_id,
data_id=explanation_file_data_id,
user_id=default_user.id,
)

View file

@ -11,7 +11,7 @@ from cognee import prune
# from cognee import visualize_graph
from cognee.infrastructure.databases.graph import get_graph_engine
from cognee.low_level import setup, DataPoint
from cognee.modules.data.models import Data, Dataset
from cognee.modules.data.models import Dataset
from cognee.modules.users.methods import get_default_user
from cognee.pipelines import run_tasks, Task
from cognee.tasks.storage import add_data_points