fixes to delete

This commit is contained in:
vasilije 2025-07-19 18:50:34 +02:00
parent 411e9a6205
commit 9d423f5e16
2 changed files with 8 additions and 6 deletions

View file

@ -84,6 +84,12 @@ async def delete(
# Get the content hash for deletion # Get the content hash for deletion
content_hash = data_point.content_hash content_hash = data_point.content_hash
# Debug logging
logger.info(
f"🔍 Retrieved from database - data_id: {data_id}, content_hash: {content_hash}"
)
logger.info(f"🔍 Document name in database: {data_point.name}")
# Use the existing comprehensive deletion logic # Use the existing comprehensive deletion logic
return await delete_single_document(content_hash, dataset.id, mode) return await delete_single_document(content_hash, dataset.id, mode)

View file

@ -27,13 +27,9 @@ class LoaderResultToIngestionData(IngestionData):
""" """
Get content identifier for deduplication. Get content identifier for deduplication.
Uses the loader result's source info or generates hash from content. Always generates hash from content to ensure consistency with existing system.
""" """
# Try to get file hash from metadata first # Always generate hash from content for consistency
if "content_hash" in self.loader_result.metadata:
return self.loader_result.metadata["content_hash"]
# Fallback: generate hash from content
import hashlib import hashlib
content_bytes = self.loader_result.content.encode("utf-8") content_bytes = self.loader_result.content.encode("utf-8")