From 162370b6e618b4e7528288546e0a38b754e42f64 Mon Sep 17 00:00:00 2001 From: yangdx Date: Wed, 22 Oct 2025 12:19:23 +0800 Subject: [PATCH 1/4] Add optional LLM cache deletion when deleting documents MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit • Add delete_llm_cache parameter to API • Collect cache IDs from text chunks • Delete cache after graph operations • Update UI with new checkbox option • Add i18n translations for cache option --- lightrag/api/routers/document_routes.py | 30 ++++---- lightrag/lightrag.py | 77 ++++++++++++++++++- lightrag_webui/src/api/lightrag.ts | 8 +- .../documents/DeleteDocumentsDialog.tsx | 20 ++++- lightrag_webui/src/locales/ar.json | 1 + lightrag_webui/src/locales/en.json | 1 + lightrag_webui/src/locales/fr.json | 1 + lightrag_webui/src/locales/zh.json | 1 + lightrag_webui/src/locales/zh_TW.json | 1 + 9 files changed, 121 insertions(+), 19 deletions(-) diff --git a/lightrag/api/routers/document_routes.py b/lightrag/api/routers/document_routes.py index 0ed5a711..848d5eb8 100644 --- a/lightrag/api/routers/document_routes.py +++ b/lightrag/api/routers/document_routes.py @@ -336,6 +336,10 @@ class DeleteDocRequest(BaseModel): default=False, description="Whether to delete the corresponding file in the upload directory.", ) + delete_llm_cache: bool = Field( + default=False, + description="Whether to delete cached LLM extraction results for the documents.", + ) @field_validator("doc_ids", mode="after") @classmethod @@ -1487,6 +1491,7 @@ async def background_delete_documents( doc_manager: DocumentManager, doc_ids: List[str], delete_file: bool = False, + delete_llm_cache: bool = False, ): """Background task to delete multiple documents""" from lightrag.kg.shared_storage import ( @@ -1521,6 +1526,10 @@ async def background_delete_documents( ) # Use slice assignment to clear the list in place pipeline_status["history_messages"][:] = ["Starting document deletion process"] + if delete_llm_cache: + pipeline_status["history_messages"].append( + "LLM cache cleanup requested for this deletion job" + ) try: # Loop through each document ID and delete them one by one @@ -1534,7 +1543,9 @@ async def background_delete_documents( file_path = "#" try: - result = await rag.adelete_by_doc_id(doc_id) + result = await rag.adelete_by_doc_id( + doc_id, delete_llm_cache=delete_llm_cache + ) file_path = ( getattr(result, "file_path", "-") if "result" in locals() else "-" ) @@ -2344,21 +2355,20 @@ def create_document_routes( Delete documents and all their associated data by their IDs using background processing. Deletes specific documents and all their associated data, including their status, - text chunks, vector embeddings, and any related graph data. + text chunks, vector embeddings, and any related graph data. When requested, + cached LLM extraction responses are removed after graph deletion/rebuild completes. The deletion process runs in the background to avoid blocking the client connection. - It is disabled when llm cache for entity extraction is disabled. This operation is irreversible and will interact with the pipeline status. Args: - delete_request (DeleteDocRequest): The request containing the document IDs and delete_file options. + delete_request (DeleteDocRequest): The request containing the document IDs and deletion options. background_tasks: FastAPI BackgroundTasks for async processing Returns: DeleteDocByIdResponse: The result of the deletion operation. - status="deletion_started": The document deletion has been initiated in the background. - status="busy": The pipeline is busy with another operation. - - status="not_allowed": Operation not allowed when LLM cache for entity extraction is disabled. Raises: HTTPException: @@ -2366,15 +2376,6 @@ def create_document_routes( """ doc_ids = delete_request.doc_ids - # The rag object is initialized from the server startup args, - # so we can access its properties here. - if not rag.enable_llm_cache_for_entity_extract: - return DeleteDocByIdResponse( - status="not_allowed", - message="Operation not allowed when LLM cache for entity extraction is disabled.", - doc_id=", ".join(delete_request.doc_ids), - ) - try: from lightrag.kg.shared_storage import get_namespace_data @@ -2395,6 +2396,7 @@ def create_document_routes( doc_manager, doc_ids, delete_request.delete_file, + delete_request.delete_llm_cache, ) return DeleteDocByIdResponse( diff --git a/lightrag/lightrag.py b/lightrag/lightrag.py index afd1de76..46d31ca2 100644 --- a/lightrag/lightrag.py +++ b/lightrag/lightrag.py @@ -2793,7 +2793,9 @@ class LightRAG: # Return the dictionary containing statuses only for the found document IDs return found_statuses - async def adelete_by_doc_id(self, doc_id: str) -> DeletionResult: + async def adelete_by_doc_id( + self, doc_id: str, delete_llm_cache: bool = False + ) -> DeletionResult: """Delete a document and all its related data, including chunks, graph elements. This method orchestrates a comprehensive deletion process for a given document ID. @@ -2803,6 +2805,8 @@ class LightRAG: Args: doc_id (str): The unique identifier of the document to be deleted. + delete_llm_cache (bool): Whether to delete cached LLM extraction results + associated with the document. Defaults to False. Returns: DeletionResult: An object containing the outcome of the deletion process. @@ -2814,6 +2818,7 @@ class LightRAG: """ deletion_operations_started = False original_exception = None + doc_llm_cache_ids: list[str] = [] # Get pipeline status shared data and lock for status updates pipeline_status = await get_namespace_data("pipeline_status") @@ -2914,6 +2919,57 @@ class LightRAG: # Mark that deletion operations have started deletion_operations_started = True + if delete_llm_cache and chunk_ids: + if not self.llm_response_cache: + logger.info( + "Skipping LLM cache collection for document %s because cache storage is unavailable", + doc_id, + ) + elif not self.text_chunks: + logger.info( + "Skipping LLM cache collection for document %s because text chunk storage is unavailable", + doc_id, + ) + else: + try: + chunk_data_list = await self.text_chunks.get_by_ids( + list(chunk_ids) + ) + seen_cache_ids: set[str] = set() + for chunk_data in chunk_data_list: + if not chunk_data or not isinstance(chunk_data, dict): + continue + cache_ids = chunk_data.get("llm_cache_list", []) + if not isinstance(cache_ids, list): + continue + for cache_id in cache_ids: + if ( + isinstance(cache_id, str) + and cache_id + and cache_id not in seen_cache_ids + ): + doc_llm_cache_ids.append(cache_id) + seen_cache_ids.add(cache_id) + if doc_llm_cache_ids: + logger.info( + "Collected %d LLM cache entries for document %s", + len(doc_llm_cache_ids), + doc_id, + ) + else: + logger.info( + "No LLM cache entries found for document %s", doc_id + ) + except Exception as cache_collect_error: + logger.error( + "Failed to collect LLM cache ids for document %s: %s", + doc_id, + cache_collect_error, + ) + raise Exception( + f"Failed to collect LLM cache ids for document {doc_id}: {cache_collect_error}" + ) from cache_collect_error + # 4. Analyze entities and relationships that will be affected entities_to_delete = set() entities_to_rebuild = {} # entity_name -> remaining chunk id list @@ -3236,6 +3292,25 @@ class LightRAG: logger.error(f"Failed to delete document and status: {e}") raise Exception(f"Failed to delete document and status: {e}") from e + if delete_llm_cache and doc_llm_cache_ids and self.llm_response_cache: + try: + await self.llm_response_cache.delete(doc_llm_cache_ids) + cache_log_message = f"Successfully deleted {len(doc_llm_cache_ids)} LLM cache entries for document {doc_id}" + logger.info(cache_log_message) + async with pipeline_status_lock: + pipeline_status["latest_message"] = cache_log_message + pipeline_status["history_messages"].append(cache_log_message) + log_message = cache_log_message + except Exception as cache_delete_error: + logger.error( + "Failed to delete LLM cache for document %s: %s", + doc_id, + cache_delete_error, + ) + raise Exception( + f"Failed to delete LLM cache for document {doc_id}: {cache_delete_error}" + ) from cache_delete_error + return DeletionResult( status="success", doc_id=doc_id, diff --git a/lightrag_webui/src/api/lightrag.ts b/lightrag_webui/src/api/lightrag.ts index e0af248e..cf9a7e7a 100644 --- a/lightrag_webui/src/api/lightrag.ts +++ b/lightrag_webui/src/api/lightrag.ts @@ -618,9 +618,13 @@ export const clearCache = async (): Promise<{ return response.data } -export const deleteDocuments = async (docIds: string[], deleteFile: boolean = false): Promise => { +export const deleteDocuments = async ( + docIds: string[], + deleteFile: boolean = false, + deleteLLMCache: boolean = false +): Promise => { const response = await axiosInstance.delete('/documents/delete_document', { - data: { doc_ids: docIds, delete_file: deleteFile } + data: { doc_ids: docIds, delete_file: deleteFile, delete_llm_cache: deleteLLMCache } }) return response.data } diff --git a/lightrag_webui/src/components/documents/DeleteDocumentsDialog.tsx b/lightrag_webui/src/components/documents/DeleteDocumentsDialog.tsx index 65305ccb..eb5b173f 100644 --- a/lightrag_webui/src/components/documents/DeleteDocumentsDialog.tsx +++ b/lightrag_webui/src/components/documents/DeleteDocumentsDialog.tsx @@ -44,6 +44,7 @@ export default function DeleteDocumentsDialog({ selectedDocIds, onDocumentsDelet const [confirmText, setConfirmText] = useState('') const [deleteFile, setDeleteFile] = useState(false) const [isDeleting, setIsDeleting] = useState(false) + const [deleteLLMCache, setDeleteLLMCache] = useState(false) const isConfirmEnabled = confirmText.toLowerCase() === 'yes' && !isDeleting // Reset state when dialog closes @@ -51,6 +52,7 @@ export default function DeleteDocumentsDialog({ selectedDocIds, onDocumentsDelet if (!open) { setConfirmText('') setDeleteFile(false) + setDeleteLLMCache(false) setIsDeleting(false) } }, [open]) @@ -60,7 +62,7 @@ export default function DeleteDocumentsDialog({ selectedDocIds, onDocumentsDelet setIsDeleting(true) try { - const result = await deleteDocuments(selectedDocIds, deleteFile) + const result = await deleteDocuments(selectedDocIds, deleteFile, deleteLLMCache) if (result.status === 'deletion_started') { toast.success(t('documentPanel.deleteDocuments.success', { count: selectedDocIds.length })) @@ -94,7 +96,7 @@ export default function DeleteDocumentsDialog({ selectedDocIds, onDocumentsDelet } finally { setIsDeleting(false) } - }, [isConfirmEnabled, selectedDocIds, deleteFile, setOpen, t, onDocumentsDeleted]) + }, [isConfirmEnabled, selectedDocIds, deleteFile, deleteLLMCache, setOpen, t, onDocumentsDeleted]) return ( @@ -155,6 +157,20 @@ export default function DeleteDocumentsDialog({ selectedDocIds, onDocumentsDelet {t('documentPanel.deleteDocuments.deleteFileOption')} + +
+ setDeleteLLMCache(e.target.checked)} + disabled={isDeleting} + className="h-4 w-4 text-red-600 focus:ring-red-500 border-gray-300 rounded" + /> + +
diff --git a/lightrag_webui/src/locales/ar.json b/lightrag_webui/src/locales/ar.json index 6f2703ca..be0c82cb 100644 --- a/lightrag_webui/src/locales/ar.json +++ b/lightrag_webui/src/locales/ar.json @@ -70,6 +70,7 @@ "confirmButton": "نعم", "deleteFileOption": "حذف الملفات المرفوعة أيضًا", "deleteFileTooltip": "حدد هذا الخيار لحذف الملفات المرفوعة المقابلة على الخادم أيضًا", + "deleteLLMCacheOption": "حذف ذاكرة LLM المؤقتة للاستخراج أيضًا", "success": "تم بدء تشغيل خط معالجة حذف المستندات بنجاح", "failed": "فشل حذف المستندات:\n{{message}}", "error": "فشل حذف المستندات:\n{{error}}", diff --git a/lightrag_webui/src/locales/en.json b/lightrag_webui/src/locales/en.json index 418ac296..5ce4b3df 100644 --- a/lightrag_webui/src/locales/en.json +++ b/lightrag_webui/src/locales/en.json @@ -70,6 +70,7 @@ "confirmButton": "YES", "deleteFileOption": "Also delete uploaded files", "deleteFileTooltip": "Check this option to also delete the corresponding uploaded files on the server", + "deleteLLMCacheOption": "Also delete extracted LLM cache", "success": "Document deletion pipeline started successfully", "failed": "Delete Documents Failed:\n{{message}}", "error": "Delete Documents Failed:\n{{error}}", diff --git a/lightrag_webui/src/locales/fr.json b/lightrag_webui/src/locales/fr.json index 463f05eb..941b55de 100644 --- a/lightrag_webui/src/locales/fr.json +++ b/lightrag_webui/src/locales/fr.json @@ -70,6 +70,7 @@ "confirmButton": "OUI", "deleteFileOption": "Supprimer également les fichiers téléchargés", "deleteFileTooltip": "Cochez cette option pour supprimer également les fichiers téléchargés correspondants sur le serveur", + "deleteLLMCacheOption": "Supprimer également le cache LLM d'extraction", "success": "Pipeline de suppression de documents démarré avec succès", "failed": "Échec de la suppression des documents :\n{{message}}", "error": "Échec de la suppression des documents :\n{{error}}", diff --git a/lightrag_webui/src/locales/zh.json b/lightrag_webui/src/locales/zh.json index 40d8cdb0..3bbb31aa 100644 --- a/lightrag_webui/src/locales/zh.json +++ b/lightrag_webui/src/locales/zh.json @@ -70,6 +70,7 @@ "confirmButton": "确定", "deleteFileOption": "同时删除上传文件", "deleteFileTooltip": "选中此选项将同时删除服务器上对应的上传文件", + "deleteLLMCacheOption": "同时删除实体关系抽取 LLM 缓存", "success": "文档删除流水线启动成功", "failed": "删除文档失败:\n{{message}}", "error": "删除文档失败:\n{{error}}", diff --git a/lightrag_webui/src/locales/zh_TW.json b/lightrag_webui/src/locales/zh_TW.json index 5ea179c2..e4387e98 100644 --- a/lightrag_webui/src/locales/zh_TW.json +++ b/lightrag_webui/src/locales/zh_TW.json @@ -70,6 +70,7 @@ "confirmButton": "確定", "deleteFileOption": "同時刪除上傳檔案", "deleteFileTooltip": "選取此選項將同時刪除伺服器上對應的上傳檔案", + "deleteLLMCacheOption": "同時刪除實體關係擷取 LLM 快取", "success": "文件刪除流水線啟動成功", "failed": "刪除文件失敗:\n{{message}}", "error": "刪除文件失敗:\n{{error}}", From 1101562eaff0b0ddb3f3bb639210b09f9d1590da Mon Sep 17 00:00:00 2001 From: yangdx Date: Wed, 22 Oct 2025 12:30:22 +0800 Subject: [PATCH 2/4] Bump API version to 0243 --- lightrag/api/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lightrag/api/__init__.py b/lightrag/api/__init__.py index e1baefb9..6268052f 100644 --- a/lightrag/api/__init__.py +++ b/lightrag/api/__init__.py @@ -1 +1 @@ -__api_version__ = "0242" +__api_version__ = "0243" From d7e2527e1a2c0306debc0919b13cc819741cf74c Mon Sep 17 00:00:00 2001 From: yangdx Date: Wed, 22 Oct 2025 12:53:19 +0800 Subject: [PATCH 3/4] Handle cache deletion errors gracefully instead of raising exceptions --- lightrag/lightrag.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/lightrag/lightrag.py b/lightrag/lightrag.py index 46d31ca2..fbcfd58d 100644 --- a/lightrag/lightrag.py +++ b/lightrag/lightrag.py @@ -3302,14 +3302,14 @@ class LightRAG: pipeline_status["history_messages"].append(cache_log_message) log_message = cache_log_message except Exception as cache_delete_error: - logger.error( - "Failed to delete LLM cache for document %s: %s", - doc_id, - cache_delete_error, - ) - raise Exception( + log_message = ( f"Failed to delete LLM cache for document {doc_id}: {cache_delete_error}" - ) from cache_delete_error + ) + logger.error(log_message) + logger.error(traceback.format_exc()) + async with pipeline_status_lock: + pipeline_status["latest_message"] = log_message + pipeline_status["history_messages"].append(log_message) return DeletionResult( status="success", From b76350a3bc3d12f64becd2698f9236f762889679 Mon Sep 17 00:00:00 2001 From: yangdx Date: Wed, 22 Oct 2025 12:53:42 +0800 Subject: [PATCH 4/4] Fix linting --- lightrag/lightrag.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/lightrag/lightrag.py b/lightrag/lightrag.py index fbcfd58d..ff9ce8b0 100644 --- a/lightrag/lightrag.py +++ b/lightrag/lightrag.py @@ -3302,9 +3302,7 @@ class LightRAG: pipeline_status["history_messages"].append(cache_log_message) log_message = cache_log_message except Exception as cache_delete_error: - log_message = ( - f"Failed to delete LLM cache for document {doc_id}: {cache_delete_error}" - ) + log_message = f"Failed to delete LLM cache for document {doc_id}: {cache_delete_error}" logger.error(log_message) logger.error(traceback.format_exc()) async with pipeline_status_lock: