diff --git a/cognee/api/v1/delete/delete.py b/cognee/api/v1/delete/delete.py index a7eb88892..89f0eeb29 100644 --- a/cognee/api/v1/delete/delete.py +++ b/cognee/api/v1/delete/delete.py @@ -84,6 +84,12 @@ async def delete( # Get the content hash for deletion content_hash = data_point.content_hash + # Debug logging + logger.info( + f"🔍 Retrieved from database - data_id: {data_id}, content_hash: {content_hash}" + ) + logger.info(f"🔍 Document name in database: {data_point.name}") + # Use the existing comprehensive deletion logic return await delete_single_document(content_hash, dataset.id, mode) diff --git a/cognee/tasks/ingestion/adapters/loader_to_ingestion_adapter.py b/cognee/tasks/ingestion/adapters/loader_to_ingestion_adapter.py index bef3ce85f..744313dde 100644 --- a/cognee/tasks/ingestion/adapters/loader_to_ingestion_adapter.py +++ b/cognee/tasks/ingestion/adapters/loader_to_ingestion_adapter.py @@ -27,13 +27,9 @@ class LoaderResultToIngestionData(IngestionData): """ Get content identifier for deduplication. - Uses the loader result's source info or generates hash from content. + Always generates hash from content to ensure consistency with existing system. """ - # Try to get file hash from metadata first - if "content_hash" in self.loader_result.metadata: - return self.loader_result.metadata["content_hash"] - - # Fallback: generate hash from content + # Always generate hash from content for consistency import hashlib content_bytes = self.loader_result.content.encode("utf-8")