From 2603e99005ed3ecd94cb154ca07b8f66cf2d8f7d Mon Sep 17 00:00:00 2001 From: yangdx Date: Tue, 19 Aug 2025 17:13:58 +0800 Subject: [PATCH] Enhance file deletion to remove files from both input and enqueued dirs --- lightrag/api/routers/document_routes.py | 49 ++++++++++++++++++------- 1 file changed, 36 insertions(+), 13 deletions(-) diff --git a/lightrag/api/routers/document_routes.py b/lightrag/api/routers/document_routes.py index c9dc2617..bc4cb98e 100644 --- a/lightrag/api/routers/document_routes.py +++ b/lightrag/api/routers/document_routes.py @@ -1428,30 +1428,51 @@ async def background_delete_documents( and result.file_path != "unknown_source" ): try: + deleted_files = [] + # check and delete files from input_dir directory file_path = doc_manager.input_dir / result.file_path if file_path.exists(): file_path.unlink() - file_delete_msg = ( - f"Successfully deleted file: {result.file_path}" - ) + deleted_files.append(file_path.name) + file_delete_msg = f"Successfully deleted input_dir file: {result.file_path}" logger.info(file_delete_msg) async with pipeline_status_lock: pipeline_status["latest_message"] = file_delete_msg pipeline_status["history_messages"].append( file_delete_msg ) - else: - file_not_found_msg = ( - f"File not found for deletion: {result.file_path}" - ) - logger.warning(file_not_found_msg) + + # Also check and delete files from __enqueued__ directory + enqueued_dir = doc_manager.input_dir / "__enqueued__" + if enqueued_dir.exists(): + # Look for files with the same name or similar names (with numeric suffixes) + base_name = Path(result.file_path).stem + extension = Path(result.file_path).suffix + + # Search for exact match and files with numeric suffixes + for enqueued_file in enqueued_dir.glob( + f"{base_name}*{extension}" + ): + try: + enqueued_file.unlink() + deleted_files.append(enqueued_file.name) + logger.info( + f"Deleted enqueued file: {enqueued_file.name}" + ) + except Exception as enqueued_error: + logger.error( + f"Failed to delete enqueued file {enqueued_file.name}: {str(enqueued_error)}" + ) + + if not deleted_files: + file_error_msg = f"File deletion skipped, missing file: {result.file_path}" + logger.warning(file_error_msg) async with pipeline_status_lock: - pipeline_status["latest_message"] = ( - file_not_found_msg - ) + pipeline_status["latest_message"] = file_error_msg pipeline_status["history_messages"].append( - file_not_found_msg + file_error_msg ) + except Exception as file_error: file_error_msg = f"Failed to delete file {result.file_path}: {str(file_error)}" logger.error(file_error_msg) @@ -1461,7 +1482,9 @@ async def background_delete_documents( file_error_msg ) elif delete_file: - no_file_msg = f"No valid file path found for document {doc_id}" + no_file_msg = ( + f"File deletion skipped, missing file path: {doc_id}" + ) logger.warning(no_file_msg) async with pipeline_status_lock: pipeline_status["latest_message"] = no_file_msg