Merge pull request #2244 from danielaskdd/del-doc-cache
Feat: Add Optional LLM Cache Deletion for Document Deletion
This commit is contained in:
commit
20edd32950
10 changed files with 120 additions and 20 deletions
|
|
@ -1 +1 @@
|
|||
__api_version__ = "0242"
|
||||
__api_version__ = "0243"
|
||||
|
|
|
|||
|
|
@ -336,6 +336,10 @@ class DeleteDocRequest(BaseModel):
|
|||
default=False,
|
||||
description="Whether to delete the corresponding file in the upload directory.",
|
||||
)
|
||||
delete_llm_cache: bool = Field(
|
||||
default=False,
|
||||
description="Whether to delete cached LLM extraction results for the documents.",
|
||||
)
|
||||
|
||||
@field_validator("doc_ids", mode="after")
|
||||
@classmethod
|
||||
|
|
@ -1487,6 +1491,7 @@ async def background_delete_documents(
|
|||
doc_manager: DocumentManager,
|
||||
doc_ids: List[str],
|
||||
delete_file: bool = False,
|
||||
delete_llm_cache: bool = False,
|
||||
):
|
||||
"""Background task to delete multiple documents"""
|
||||
from lightrag.kg.shared_storage import (
|
||||
|
|
@ -1521,6 +1526,10 @@ async def background_delete_documents(
|
|||
)
|
||||
# Use slice assignment to clear the list in place
|
||||
pipeline_status["history_messages"][:] = ["Starting document deletion process"]
|
||||
if delete_llm_cache:
|
||||
pipeline_status["history_messages"].append(
|
||||
"LLM cache cleanup requested for this deletion job"
|
||||
)
|
||||
|
||||
try:
|
||||
# Loop through each document ID and delete them one by one
|
||||
|
|
@ -1534,7 +1543,9 @@ async def background_delete_documents(
|
|||
|
||||
file_path = "#"
|
||||
try:
|
||||
result = await rag.adelete_by_doc_id(doc_id)
|
||||
result = await rag.adelete_by_doc_id(
|
||||
doc_id, delete_llm_cache=delete_llm_cache
|
||||
)
|
||||
file_path = (
|
||||
getattr(result, "file_path", "-") if "result" in locals() else "-"
|
||||
)
|
||||
|
|
@ -2344,21 +2355,20 @@ def create_document_routes(
|
|||
Delete documents and all their associated data by their IDs using background processing.
|
||||
|
||||
Deletes specific documents and all their associated data, including their status,
|
||||
text chunks, vector embeddings, and any related graph data.
|
||||
text chunks, vector embeddings, and any related graph data. When requested,
|
||||
cached LLM extraction responses are removed after graph deletion/rebuild completes.
|
||||
The deletion process runs in the background to avoid blocking the client connection.
|
||||
It is disabled when llm cache for entity extraction is disabled.
|
||||
|
||||
This operation is irreversible and will interact with the pipeline status.
|
||||
|
||||
Args:
|
||||
delete_request (DeleteDocRequest): The request containing the document IDs and delete_file options.
|
||||
delete_request (DeleteDocRequest): The request containing the document IDs and deletion options.
|
||||
background_tasks: FastAPI BackgroundTasks for async processing
|
||||
|
||||
Returns:
|
||||
DeleteDocByIdResponse: The result of the deletion operation.
|
||||
- status="deletion_started": The document deletion has been initiated in the background.
|
||||
- status="busy": The pipeline is busy with another operation.
|
||||
- status="not_allowed": Operation not allowed when LLM cache for entity extraction is disabled.
|
||||
|
||||
Raises:
|
||||
HTTPException:
|
||||
|
|
@ -2366,15 +2376,6 @@ def create_document_routes(
|
|||
"""
|
||||
doc_ids = delete_request.doc_ids
|
||||
|
||||
# The rag object is initialized from the server startup args,
|
||||
# so we can access its properties here.
|
||||
if not rag.enable_llm_cache_for_entity_extract:
|
||||
return DeleteDocByIdResponse(
|
||||
status="not_allowed",
|
||||
message="Operation not allowed when LLM cache for entity extraction is disabled.",
|
||||
doc_id=", ".join(delete_request.doc_ids),
|
||||
)
|
||||
|
||||
try:
|
||||
from lightrag.kg.shared_storage import get_namespace_data
|
||||
|
||||
|
|
@ -2395,6 +2396,7 @@ def create_document_routes(
|
|||
doc_manager,
|
||||
doc_ids,
|
||||
delete_request.delete_file,
|
||||
delete_request.delete_llm_cache,
|
||||
)
|
||||
|
||||
return DeleteDocByIdResponse(
|
||||
|
|
|
|||
|
|
@ -2793,7 +2793,9 @@ class LightRAG:
|
|||
# Return the dictionary containing statuses only for the found document IDs
|
||||
return found_statuses
|
||||
|
||||
async def adelete_by_doc_id(self, doc_id: str) -> DeletionResult:
|
||||
async def adelete_by_doc_id(
|
||||
self, doc_id: str, delete_llm_cache: bool = False
|
||||
) -> DeletionResult:
|
||||
"""Delete a document and all its related data, including chunks, graph elements.
|
||||
|
||||
This method orchestrates a comprehensive deletion process for a given document ID.
|
||||
|
|
@ -2803,6 +2805,8 @@ class LightRAG:
|
|||
|
||||
Args:
|
||||
doc_id (str): The unique identifier of the document to be deleted.
|
||||
delete_llm_cache (bool): Whether to delete cached LLM extraction results
|
||||
associated with the document. Defaults to False.
|
||||
|
||||
Returns:
|
||||
DeletionResult: An object containing the outcome of the deletion process.
|
||||
|
|
@ -2814,6 +2818,7 @@ class LightRAG:
|
|||
"""
|
||||
deletion_operations_started = False
|
||||
original_exception = None
|
||||
doc_llm_cache_ids: list[str] = []
|
||||
|
||||
# Get pipeline status shared data and lock for status updates
|
||||
pipeline_status = await get_namespace_data("pipeline_status")
|
||||
|
|
@ -2914,6 +2919,57 @@ class LightRAG:
|
|||
# Mark that deletion operations have started
|
||||
deletion_operations_started = True
|
||||
|
||||
if delete_llm_cache and chunk_ids:
|
||||
if not self.llm_response_cache:
|
||||
logger.info(
|
||||
"Skipping LLM cache collection for document %s because cache storage is unavailable",
|
||||
doc_id,
|
||||
)
|
||||
elif not self.text_chunks:
|
||||
logger.info(
|
||||
"Skipping LLM cache collection for document %s because text chunk storage is unavailable",
|
||||
doc_id,
|
||||
)
|
||||
else:
|
||||
try:
|
||||
chunk_data_list = await self.text_chunks.get_by_ids(
|
||||
list(chunk_ids)
|
||||
)
|
||||
seen_cache_ids: set[str] = set()
|
||||
for chunk_data in chunk_data_list:
|
||||
if not chunk_data or not isinstance(chunk_data, dict):
|
||||
continue
|
||||
cache_ids = chunk_data.get("llm_cache_list", [])
|
||||
if not isinstance(cache_ids, list):
|
||||
continue
|
||||
for cache_id in cache_ids:
|
||||
if (
|
||||
isinstance(cache_id, str)
|
||||
and cache_id
|
||||
and cache_id not in seen_cache_ids
|
||||
):
|
||||
doc_llm_cache_ids.append(cache_id)
|
||||
seen_cache_ids.add(cache_id)
|
||||
if doc_llm_cache_ids:
|
||||
logger.info(
|
||||
"Collected %d LLM cache entries for document %s",
|
||||
len(doc_llm_cache_ids),
|
||||
doc_id,
|
||||
)
|
||||
else:
|
||||
logger.info(
|
||||
"No LLM cache entries found for document %s", doc_id
|
||||
)
|
||||
except Exception as cache_collect_error:
|
||||
logger.error(
|
||||
"Failed to collect LLM cache ids for document %s: %s",
|
||||
doc_id,
|
||||
cache_collect_error,
|
||||
)
|
||||
raise Exception(
|
||||
f"Failed to collect LLM cache ids for document {doc_id}: {cache_collect_error}"
|
||||
) from cache_collect_error
|
||||
|
||||
# 4. Analyze entities and relationships that will be affected
|
||||
entities_to_delete = set()
|
||||
entities_to_rebuild = {} # entity_name -> remaining chunk id list
|
||||
|
|
@ -3236,6 +3292,23 @@ class LightRAG:
|
|||
logger.error(f"Failed to delete document and status: {e}")
|
||||
raise Exception(f"Failed to delete document and status: {e}") from e
|
||||
|
||||
if delete_llm_cache and doc_llm_cache_ids and self.llm_response_cache:
|
||||
try:
|
||||
await self.llm_response_cache.delete(doc_llm_cache_ids)
|
||||
cache_log_message = f"Successfully deleted {len(doc_llm_cache_ids)} LLM cache entries for document {doc_id}"
|
||||
logger.info(cache_log_message)
|
||||
async with pipeline_status_lock:
|
||||
pipeline_status["latest_message"] = cache_log_message
|
||||
pipeline_status["history_messages"].append(cache_log_message)
|
||||
log_message = cache_log_message
|
||||
except Exception as cache_delete_error:
|
||||
log_message = f"Failed to delete LLM cache for document {doc_id}: {cache_delete_error}"
|
||||
logger.error(log_message)
|
||||
logger.error(traceback.format_exc())
|
||||
async with pipeline_status_lock:
|
||||
pipeline_status["latest_message"] = log_message
|
||||
pipeline_status["history_messages"].append(log_message)
|
||||
|
||||
return DeletionResult(
|
||||
status="success",
|
||||
doc_id=doc_id,
|
||||
|
|
|
|||
|
|
@ -618,9 +618,13 @@ export const clearCache = async (): Promise<{
|
|||
return response.data
|
||||
}
|
||||
|
||||
export const deleteDocuments = async (docIds: string[], deleteFile: boolean = false): Promise<DeleteDocResponse> => {
|
||||
export const deleteDocuments = async (
|
||||
docIds: string[],
|
||||
deleteFile: boolean = false,
|
||||
deleteLLMCache: boolean = false
|
||||
): Promise<DeleteDocResponse> => {
|
||||
const response = await axiosInstance.delete('/documents/delete_document', {
|
||||
data: { doc_ids: docIds, delete_file: deleteFile }
|
||||
data: { doc_ids: docIds, delete_file: deleteFile, delete_llm_cache: deleteLLMCache }
|
||||
})
|
||||
return response.data
|
||||
}
|
||||
|
|
|
|||
|
|
@ -44,6 +44,7 @@ export default function DeleteDocumentsDialog({ selectedDocIds, onDocumentsDelet
|
|||
const [confirmText, setConfirmText] = useState('')
|
||||
const [deleteFile, setDeleteFile] = useState(false)
|
||||
const [isDeleting, setIsDeleting] = useState(false)
|
||||
const [deleteLLMCache, setDeleteLLMCache] = useState(false)
|
||||
const isConfirmEnabled = confirmText.toLowerCase() === 'yes' && !isDeleting
|
||||
|
||||
// Reset state when dialog closes
|
||||
|
|
@ -51,6 +52,7 @@ export default function DeleteDocumentsDialog({ selectedDocIds, onDocumentsDelet
|
|||
if (!open) {
|
||||
setConfirmText('')
|
||||
setDeleteFile(false)
|
||||
setDeleteLLMCache(false)
|
||||
setIsDeleting(false)
|
||||
}
|
||||
}, [open])
|
||||
|
|
@ -60,7 +62,7 @@ export default function DeleteDocumentsDialog({ selectedDocIds, onDocumentsDelet
|
|||
|
||||
setIsDeleting(true)
|
||||
try {
|
||||
const result = await deleteDocuments(selectedDocIds, deleteFile)
|
||||
const result = await deleteDocuments(selectedDocIds, deleteFile, deleteLLMCache)
|
||||
|
||||
if (result.status === 'deletion_started') {
|
||||
toast.success(t('documentPanel.deleteDocuments.success', { count: selectedDocIds.length }))
|
||||
|
|
@ -94,7 +96,7 @@ export default function DeleteDocumentsDialog({ selectedDocIds, onDocumentsDelet
|
|||
} finally {
|
||||
setIsDeleting(false)
|
||||
}
|
||||
}, [isConfirmEnabled, selectedDocIds, deleteFile, setOpen, t, onDocumentsDeleted])
|
||||
}, [isConfirmEnabled, selectedDocIds, deleteFile, deleteLLMCache, setOpen, t, onDocumentsDeleted])
|
||||
|
||||
return (
|
||||
<Dialog open={open} onOpenChange={setOpen}>
|
||||
|
|
@ -155,6 +157,20 @@ export default function DeleteDocumentsDialog({ selectedDocIds, onDocumentsDelet
|
|||
{t('documentPanel.deleteDocuments.deleteFileOption')}
|
||||
</Label>
|
||||
</div>
|
||||
|
||||
<div className="flex items-center space-x-2">
|
||||
<input
|
||||
type="checkbox"
|
||||
id="delete-llm-cache"
|
||||
checked={deleteLLMCache}
|
||||
onChange={(e) => setDeleteLLMCache(e.target.checked)}
|
||||
disabled={isDeleting}
|
||||
className="h-4 w-4 text-red-600 focus:ring-red-500 border-gray-300 rounded"
|
||||
/>
|
||||
<Label htmlFor="delete-llm-cache" className="text-sm font-medium cursor-pointer">
|
||||
{t('documentPanel.deleteDocuments.deleteLLMCacheOption')}
|
||||
</Label>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<DialogFooter>
|
||||
|
|
|
|||
|
|
@ -70,6 +70,7 @@
|
|||
"confirmButton": "نعم",
|
||||
"deleteFileOption": "حذف الملفات المرفوعة أيضًا",
|
||||
"deleteFileTooltip": "حدد هذا الخيار لحذف الملفات المرفوعة المقابلة على الخادم أيضًا",
|
||||
"deleteLLMCacheOption": "حذف ذاكرة LLM المؤقتة للاستخراج أيضًا",
|
||||
"success": "تم بدء تشغيل خط معالجة حذف المستندات بنجاح",
|
||||
"failed": "فشل حذف المستندات:\n{{message}}",
|
||||
"error": "فشل حذف المستندات:\n{{error}}",
|
||||
|
|
|
|||
|
|
@ -70,6 +70,7 @@
|
|||
"confirmButton": "YES",
|
||||
"deleteFileOption": "Also delete uploaded files",
|
||||
"deleteFileTooltip": "Check this option to also delete the corresponding uploaded files on the server",
|
||||
"deleteLLMCacheOption": "Also delete extracted LLM cache",
|
||||
"success": "Document deletion pipeline started successfully",
|
||||
"failed": "Delete Documents Failed:\n{{message}}",
|
||||
"error": "Delete Documents Failed:\n{{error}}",
|
||||
|
|
|
|||
|
|
@ -70,6 +70,7 @@
|
|||
"confirmButton": "OUI",
|
||||
"deleteFileOption": "Supprimer également les fichiers téléchargés",
|
||||
"deleteFileTooltip": "Cochez cette option pour supprimer également les fichiers téléchargés correspondants sur le serveur",
|
||||
"deleteLLMCacheOption": "Supprimer également le cache LLM d'extraction",
|
||||
"success": "Pipeline de suppression de documents démarré avec succès",
|
||||
"failed": "Échec de la suppression des documents :\n{{message}}",
|
||||
"error": "Échec de la suppression des documents :\n{{error}}",
|
||||
|
|
|
|||
|
|
@ -70,6 +70,7 @@
|
|||
"confirmButton": "确定",
|
||||
"deleteFileOption": "同时删除上传文件",
|
||||
"deleteFileTooltip": "选中此选项将同时删除服务器上对应的上传文件",
|
||||
"deleteLLMCacheOption": "同时删除实体关系抽取 LLM 缓存",
|
||||
"success": "文档删除流水线启动成功",
|
||||
"failed": "删除文档失败:\n{{message}}",
|
||||
"error": "删除文档失败:\n{{error}}",
|
||||
|
|
|
|||
|
|
@ -70,6 +70,7 @@
|
|||
"confirmButton": "確定",
|
||||
"deleteFileOption": "同時刪除上傳檔案",
|
||||
"deleteFileTooltip": "選取此選項將同時刪除伺服器上對應的上傳檔案",
|
||||
"deleteLLMCacheOption": "同時刪除實體關係擷取 LLM 快取",
|
||||
"success": "文件刪除流水線啟動成功",
|
||||
"failed": "刪除文件失敗:\n{{message}}",
|
||||
"error": "刪除文件失敗:\n{{error}}",
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue