Merge pull request #2244 from danielaskdd/del-doc-cache

Feat: Add Optional LLM Cache Deletion for Document Deletion
This commit is contained in:
Daniel.y 2025-10-22 12:58:09 +08:00 committed by GitHub
commit 20edd32950
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
10 changed files with 120 additions and 20 deletions

View file

@ -1 +1 @@
__api_version__ = "0242"
__api_version__ = "0243"

View file

@ -336,6 +336,10 @@ class DeleteDocRequest(BaseModel):
default=False,
description="Whether to delete the corresponding file in the upload directory.",
)
delete_llm_cache: bool = Field(
default=False,
description="Whether to delete cached LLM extraction results for the documents.",
)
@field_validator("doc_ids", mode="after")
@classmethod
@ -1487,6 +1491,7 @@ async def background_delete_documents(
doc_manager: DocumentManager,
doc_ids: List[str],
delete_file: bool = False,
delete_llm_cache: bool = False,
):
"""Background task to delete multiple documents"""
from lightrag.kg.shared_storage import (
@ -1521,6 +1526,10 @@ async def background_delete_documents(
)
# Use slice assignment to clear the list in place
pipeline_status["history_messages"][:] = ["Starting document deletion process"]
if delete_llm_cache:
pipeline_status["history_messages"].append(
"LLM cache cleanup requested for this deletion job"
)
try:
# Loop through each document ID and delete them one by one
@ -1534,7 +1543,9 @@ async def background_delete_documents(
file_path = "#"
try:
result = await rag.adelete_by_doc_id(doc_id)
result = await rag.adelete_by_doc_id(
doc_id, delete_llm_cache=delete_llm_cache
)
file_path = (
getattr(result, "file_path", "-") if "result" in locals() else "-"
)
@ -2344,21 +2355,20 @@ def create_document_routes(
Delete documents and all their associated data by their IDs using background processing.
Deletes specific documents and all their associated data, including their status,
text chunks, vector embeddings, and any related graph data.
text chunks, vector embeddings, and any related graph data. When requested,
cached LLM extraction responses are removed after graph deletion/rebuild completes.
The deletion process runs in the background to avoid blocking the client connection.
It is disabled when llm cache for entity extraction is disabled.
This operation is irreversible and will interact with the pipeline status.
Args:
delete_request (DeleteDocRequest): The request containing the document IDs and delete_file options.
delete_request (DeleteDocRequest): The request containing the document IDs and deletion options.
background_tasks: FastAPI BackgroundTasks for async processing
Returns:
DeleteDocByIdResponse: The result of the deletion operation.
- status="deletion_started": The document deletion has been initiated in the background.
- status="busy": The pipeline is busy with another operation.
- status="not_allowed": Operation not allowed when LLM cache for entity extraction is disabled.
Raises:
HTTPException:
@ -2366,15 +2376,6 @@ def create_document_routes(
"""
doc_ids = delete_request.doc_ids
# The rag object is initialized from the server startup args,
# so we can access its properties here.
if not rag.enable_llm_cache_for_entity_extract:
return DeleteDocByIdResponse(
status="not_allowed",
message="Operation not allowed when LLM cache for entity extraction is disabled.",
doc_id=", ".join(delete_request.doc_ids),
)
try:
from lightrag.kg.shared_storage import get_namespace_data
@ -2395,6 +2396,7 @@ def create_document_routes(
doc_manager,
doc_ids,
delete_request.delete_file,
delete_request.delete_llm_cache,
)
return DeleteDocByIdResponse(

View file

@ -2793,7 +2793,9 @@ class LightRAG:
# Return the dictionary containing statuses only for the found document IDs
return found_statuses
async def adelete_by_doc_id(self, doc_id: str) -> DeletionResult:
async def adelete_by_doc_id(
self, doc_id: str, delete_llm_cache: bool = False
) -> DeletionResult:
"""Delete a document and all its related data, including chunks, graph elements.
This method orchestrates a comprehensive deletion process for a given document ID.
@ -2803,6 +2805,8 @@ class LightRAG:
Args:
doc_id (str): The unique identifier of the document to be deleted.
delete_llm_cache (bool): Whether to delete cached LLM extraction results
associated with the document. Defaults to False.
Returns:
DeletionResult: An object containing the outcome of the deletion process.
@ -2814,6 +2818,7 @@ class LightRAG:
"""
deletion_operations_started = False
original_exception = None
doc_llm_cache_ids: list[str] = []
# Get pipeline status shared data and lock for status updates
pipeline_status = await get_namespace_data("pipeline_status")
@ -2914,6 +2919,57 @@ class LightRAG:
# Mark that deletion operations have started
deletion_operations_started = True
if delete_llm_cache and chunk_ids:
if not self.llm_response_cache:
logger.info(
"Skipping LLM cache collection for document %s because cache storage is unavailable",
doc_id,
)
elif not self.text_chunks:
logger.info(
"Skipping LLM cache collection for document %s because text chunk storage is unavailable",
doc_id,
)
else:
try:
chunk_data_list = await self.text_chunks.get_by_ids(
list(chunk_ids)
)
seen_cache_ids: set[str] = set()
for chunk_data in chunk_data_list:
if not chunk_data or not isinstance(chunk_data, dict):
continue
cache_ids = chunk_data.get("llm_cache_list", [])
if not isinstance(cache_ids, list):
continue
for cache_id in cache_ids:
if (
isinstance(cache_id, str)
and cache_id
and cache_id not in seen_cache_ids
):
doc_llm_cache_ids.append(cache_id)
seen_cache_ids.add(cache_id)
if doc_llm_cache_ids:
logger.info(
"Collected %d LLM cache entries for document %s",
len(doc_llm_cache_ids),
doc_id,
)
else:
logger.info(
"No LLM cache entries found for document %s", doc_id
)
except Exception as cache_collect_error:
logger.error(
"Failed to collect LLM cache ids for document %s: %s",
doc_id,
cache_collect_error,
)
raise Exception(
f"Failed to collect LLM cache ids for document {doc_id}: {cache_collect_error}"
) from cache_collect_error
# 4. Analyze entities and relationships that will be affected
entities_to_delete = set()
entities_to_rebuild = {} # entity_name -> remaining chunk id list
@ -3236,6 +3292,23 @@ class LightRAG:
logger.error(f"Failed to delete document and status: {e}")
raise Exception(f"Failed to delete document and status: {e}") from e
if delete_llm_cache and doc_llm_cache_ids and self.llm_response_cache:
try:
await self.llm_response_cache.delete(doc_llm_cache_ids)
cache_log_message = f"Successfully deleted {len(doc_llm_cache_ids)} LLM cache entries for document {doc_id}"
logger.info(cache_log_message)
async with pipeline_status_lock:
pipeline_status["latest_message"] = cache_log_message
pipeline_status["history_messages"].append(cache_log_message)
log_message = cache_log_message
except Exception as cache_delete_error:
log_message = f"Failed to delete LLM cache for document {doc_id}: {cache_delete_error}"
logger.error(log_message)
logger.error(traceback.format_exc())
async with pipeline_status_lock:
pipeline_status["latest_message"] = log_message
pipeline_status["history_messages"].append(log_message)
return DeletionResult(
status="success",
doc_id=doc_id,

View file

@ -618,9 +618,13 @@ export const clearCache = async (): Promise<{
return response.data
}
export const deleteDocuments = async (docIds: string[], deleteFile: boolean = false): Promise<DeleteDocResponse> => {
export const deleteDocuments = async (
docIds: string[],
deleteFile: boolean = false,
deleteLLMCache: boolean = false
): Promise<DeleteDocResponse> => {
const response = await axiosInstance.delete('/documents/delete_document', {
data: { doc_ids: docIds, delete_file: deleteFile }
data: { doc_ids: docIds, delete_file: deleteFile, delete_llm_cache: deleteLLMCache }
})
return response.data
}

View file

@ -44,6 +44,7 @@ export default function DeleteDocumentsDialog({ selectedDocIds, onDocumentsDelet
const [confirmText, setConfirmText] = useState('')
const [deleteFile, setDeleteFile] = useState(false)
const [isDeleting, setIsDeleting] = useState(false)
const [deleteLLMCache, setDeleteLLMCache] = useState(false)
const isConfirmEnabled = confirmText.toLowerCase() === 'yes' && !isDeleting
// Reset state when dialog closes
@ -51,6 +52,7 @@ export default function DeleteDocumentsDialog({ selectedDocIds, onDocumentsDelet
if (!open) {
setConfirmText('')
setDeleteFile(false)
setDeleteLLMCache(false)
setIsDeleting(false)
}
}, [open])
@ -60,7 +62,7 @@ export default function DeleteDocumentsDialog({ selectedDocIds, onDocumentsDelet
setIsDeleting(true)
try {
const result = await deleteDocuments(selectedDocIds, deleteFile)
const result = await deleteDocuments(selectedDocIds, deleteFile, deleteLLMCache)
if (result.status === 'deletion_started') {
toast.success(t('documentPanel.deleteDocuments.success', { count: selectedDocIds.length }))
@ -94,7 +96,7 @@ export default function DeleteDocumentsDialog({ selectedDocIds, onDocumentsDelet
} finally {
setIsDeleting(false)
}
}, [isConfirmEnabled, selectedDocIds, deleteFile, setOpen, t, onDocumentsDeleted])
}, [isConfirmEnabled, selectedDocIds, deleteFile, deleteLLMCache, setOpen, t, onDocumentsDeleted])
return (
<Dialog open={open} onOpenChange={setOpen}>
@ -155,6 +157,20 @@ export default function DeleteDocumentsDialog({ selectedDocIds, onDocumentsDelet
{t('documentPanel.deleteDocuments.deleteFileOption')}
</Label>
</div>
<div className="flex items-center space-x-2">
<input
type="checkbox"
id="delete-llm-cache"
checked={deleteLLMCache}
onChange={(e) => setDeleteLLMCache(e.target.checked)}
disabled={isDeleting}
className="h-4 w-4 text-red-600 focus:ring-red-500 border-gray-300 rounded"
/>
<Label htmlFor="delete-llm-cache" className="text-sm font-medium cursor-pointer">
{t('documentPanel.deleteDocuments.deleteLLMCacheOption')}
</Label>
</div>
</div>
<DialogFooter>

View file

@ -70,6 +70,7 @@
"confirmButton": "نعم",
"deleteFileOption": "حذف الملفات المرفوعة أيضًا",
"deleteFileTooltip": "حدد هذا الخيار لحذف الملفات المرفوعة المقابلة على الخادم أيضًا",
"deleteLLMCacheOption": "حذف ذاكرة LLM المؤقتة للاستخراج أيضًا",
"success": "تم بدء تشغيل خط معالجة حذف المستندات بنجاح",
"failed": "فشل حذف المستندات:\n{{message}}",
"error": "فشل حذف المستندات:\n{{error}}",

View file

@ -70,6 +70,7 @@
"confirmButton": "YES",
"deleteFileOption": "Also delete uploaded files",
"deleteFileTooltip": "Check this option to also delete the corresponding uploaded files on the server",
"deleteLLMCacheOption": "Also delete extracted LLM cache",
"success": "Document deletion pipeline started successfully",
"failed": "Delete Documents Failed:\n{{message}}",
"error": "Delete Documents Failed:\n{{error}}",

View file

@ -70,6 +70,7 @@
"confirmButton": "OUI",
"deleteFileOption": "Supprimer également les fichiers téléchargés",
"deleteFileTooltip": "Cochez cette option pour supprimer également les fichiers téléchargés correspondants sur le serveur",
"deleteLLMCacheOption": "Supprimer également le cache LLM d'extraction",
"success": "Pipeline de suppression de documents démarré avec succès",
"failed": "Échec de la suppression des documents :\n{{message}}",
"error": "Échec de la suppression des documents :\n{{error}}",

View file

@ -70,6 +70,7 @@
"confirmButton": "确定",
"deleteFileOption": "同时删除上传文件",
"deleteFileTooltip": "选中此选项将同时删除服务器上对应的上传文件",
"deleteLLMCacheOption": "同时删除实体关系抽取 LLM 缓存",
"success": "文档删除流水线启动成功",
"failed": "删除文档失败:\n{{message}}",
"error": "删除文档失败:\n{{error}}",

View file

@ -70,6 +70,7 @@
"confirmButton": "確定",
"deleteFileOption": "同時刪除上傳檔案",
"deleteFileTooltip": "選取此選項將同時刪除伺服器上對應的上傳檔案",
"deleteLLMCacheOption": "同時刪除實體關係擷取 LLM 快取",
"success": "文件刪除流水線啟動成功",
"failed": "刪除文件失敗:\n{{message}}",
"error": "刪除文件失敗:\n{{error}}",