Add optional LLM cache deletion when deleting documents
• Add delete_llm_cache parameter to API • Collect cache IDs from text chunks • Delete cache after graph operations • Update UI with new checkbox option • Add i18n translations for cache option
This commit is contained in:
parent
907204714b
commit
162370b6e6
9 changed files with 121 additions and 19 deletions
|
|
@ -336,6 +336,10 @@ class DeleteDocRequest(BaseModel):
|
||||||
default=False,
|
default=False,
|
||||||
description="Whether to delete the corresponding file in the upload directory.",
|
description="Whether to delete the corresponding file in the upload directory.",
|
||||||
)
|
)
|
||||||
|
delete_llm_cache: bool = Field(
|
||||||
|
default=False,
|
||||||
|
description="Whether to delete cached LLM extraction results for the documents.",
|
||||||
|
)
|
||||||
|
|
||||||
@field_validator("doc_ids", mode="after")
|
@field_validator("doc_ids", mode="after")
|
||||||
@classmethod
|
@classmethod
|
||||||
|
|
@ -1487,6 +1491,7 @@ async def background_delete_documents(
|
||||||
doc_manager: DocumentManager,
|
doc_manager: DocumentManager,
|
||||||
doc_ids: List[str],
|
doc_ids: List[str],
|
||||||
delete_file: bool = False,
|
delete_file: bool = False,
|
||||||
|
delete_llm_cache: bool = False,
|
||||||
):
|
):
|
||||||
"""Background task to delete multiple documents"""
|
"""Background task to delete multiple documents"""
|
||||||
from lightrag.kg.shared_storage import (
|
from lightrag.kg.shared_storage import (
|
||||||
|
|
@ -1521,6 +1526,10 @@ async def background_delete_documents(
|
||||||
)
|
)
|
||||||
# Use slice assignment to clear the list in place
|
# Use slice assignment to clear the list in place
|
||||||
pipeline_status["history_messages"][:] = ["Starting document deletion process"]
|
pipeline_status["history_messages"][:] = ["Starting document deletion process"]
|
||||||
|
if delete_llm_cache:
|
||||||
|
pipeline_status["history_messages"].append(
|
||||||
|
"LLM cache cleanup requested for this deletion job"
|
||||||
|
)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
# Loop through each document ID and delete them one by one
|
# Loop through each document ID and delete them one by one
|
||||||
|
|
@ -1534,7 +1543,9 @@ async def background_delete_documents(
|
||||||
|
|
||||||
file_path = "#"
|
file_path = "#"
|
||||||
try:
|
try:
|
||||||
result = await rag.adelete_by_doc_id(doc_id)
|
result = await rag.adelete_by_doc_id(
|
||||||
|
doc_id, delete_llm_cache=delete_llm_cache
|
||||||
|
)
|
||||||
file_path = (
|
file_path = (
|
||||||
getattr(result, "file_path", "-") if "result" in locals() else "-"
|
getattr(result, "file_path", "-") if "result" in locals() else "-"
|
||||||
)
|
)
|
||||||
|
|
@ -2344,21 +2355,20 @@ def create_document_routes(
|
||||||
Delete documents and all their associated data by their IDs using background processing.
|
Delete documents and all their associated data by their IDs using background processing.
|
||||||
|
|
||||||
Deletes specific documents and all their associated data, including their status,
|
Deletes specific documents and all their associated data, including their status,
|
||||||
text chunks, vector embeddings, and any related graph data.
|
text chunks, vector embeddings, and any related graph data. When requested,
|
||||||
|
cached LLM extraction responses are removed after graph deletion/rebuild completes.
|
||||||
The deletion process runs in the background to avoid blocking the client connection.
|
The deletion process runs in the background to avoid blocking the client connection.
|
||||||
It is disabled when llm cache for entity extraction is disabled.
|
|
||||||
|
|
||||||
This operation is irreversible and will interact with the pipeline status.
|
This operation is irreversible and will interact with the pipeline status.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
delete_request (DeleteDocRequest): The request containing the document IDs and delete_file options.
|
delete_request (DeleteDocRequest): The request containing the document IDs and deletion options.
|
||||||
background_tasks: FastAPI BackgroundTasks for async processing
|
background_tasks: FastAPI BackgroundTasks for async processing
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
DeleteDocByIdResponse: The result of the deletion operation.
|
DeleteDocByIdResponse: The result of the deletion operation.
|
||||||
- status="deletion_started": The document deletion has been initiated in the background.
|
- status="deletion_started": The document deletion has been initiated in the background.
|
||||||
- status="busy": The pipeline is busy with another operation.
|
- status="busy": The pipeline is busy with another operation.
|
||||||
- status="not_allowed": Operation not allowed when LLM cache for entity extraction is disabled.
|
|
||||||
|
|
||||||
Raises:
|
Raises:
|
||||||
HTTPException:
|
HTTPException:
|
||||||
|
|
@ -2366,15 +2376,6 @@ def create_document_routes(
|
||||||
"""
|
"""
|
||||||
doc_ids = delete_request.doc_ids
|
doc_ids = delete_request.doc_ids
|
||||||
|
|
||||||
# The rag object is initialized from the server startup args,
|
|
||||||
# so we can access its properties here.
|
|
||||||
if not rag.enable_llm_cache_for_entity_extract:
|
|
||||||
return DeleteDocByIdResponse(
|
|
||||||
status="not_allowed",
|
|
||||||
message="Operation not allowed when LLM cache for entity extraction is disabled.",
|
|
||||||
doc_id=", ".join(delete_request.doc_ids),
|
|
||||||
)
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
from lightrag.kg.shared_storage import get_namespace_data
|
from lightrag.kg.shared_storage import get_namespace_data
|
||||||
|
|
||||||
|
|
@ -2395,6 +2396,7 @@ def create_document_routes(
|
||||||
doc_manager,
|
doc_manager,
|
||||||
doc_ids,
|
doc_ids,
|
||||||
delete_request.delete_file,
|
delete_request.delete_file,
|
||||||
|
delete_request.delete_llm_cache,
|
||||||
)
|
)
|
||||||
|
|
||||||
return DeleteDocByIdResponse(
|
return DeleteDocByIdResponse(
|
||||||
|
|
|
||||||
|
|
@ -2793,7 +2793,9 @@ class LightRAG:
|
||||||
# Return the dictionary containing statuses only for the found document IDs
|
# Return the dictionary containing statuses only for the found document IDs
|
||||||
return found_statuses
|
return found_statuses
|
||||||
|
|
||||||
async def adelete_by_doc_id(self, doc_id: str) -> DeletionResult:
|
async def adelete_by_doc_id(
|
||||||
|
self, doc_id: str, delete_llm_cache: bool = False
|
||||||
|
) -> DeletionResult:
|
||||||
"""Delete a document and all its related data, including chunks, graph elements.
|
"""Delete a document and all its related data, including chunks, graph elements.
|
||||||
|
|
||||||
This method orchestrates a comprehensive deletion process for a given document ID.
|
This method orchestrates a comprehensive deletion process for a given document ID.
|
||||||
|
|
@ -2803,6 +2805,8 @@ class LightRAG:
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
doc_id (str): The unique identifier of the document to be deleted.
|
doc_id (str): The unique identifier of the document to be deleted.
|
||||||
|
delete_llm_cache (bool): Whether to delete cached LLM extraction results
|
||||||
|
associated with the document. Defaults to False.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
DeletionResult: An object containing the outcome of the deletion process.
|
DeletionResult: An object containing the outcome of the deletion process.
|
||||||
|
|
@ -2814,6 +2818,7 @@ class LightRAG:
|
||||||
"""
|
"""
|
||||||
deletion_operations_started = False
|
deletion_operations_started = False
|
||||||
original_exception = None
|
original_exception = None
|
||||||
|
doc_llm_cache_ids: list[str] = []
|
||||||
|
|
||||||
# Get pipeline status shared data and lock for status updates
|
# Get pipeline status shared data and lock for status updates
|
||||||
pipeline_status = await get_namespace_data("pipeline_status")
|
pipeline_status = await get_namespace_data("pipeline_status")
|
||||||
|
|
@ -2914,6 +2919,57 @@ class LightRAG:
|
||||||
# Mark that deletion operations have started
|
# Mark that deletion operations have started
|
||||||
deletion_operations_started = True
|
deletion_operations_started = True
|
||||||
|
|
||||||
|
if delete_llm_cache and chunk_ids:
|
||||||
|
if not self.llm_response_cache:
|
||||||
|
logger.info(
|
||||||
|
"Skipping LLM cache collection for document %s because cache storage is unavailable",
|
||||||
|
doc_id,
|
||||||
|
)
|
||||||
|
elif not self.text_chunks:
|
||||||
|
logger.info(
|
||||||
|
"Skipping LLM cache collection for document %s because text chunk storage is unavailable",
|
||||||
|
doc_id,
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
try:
|
||||||
|
chunk_data_list = await self.text_chunks.get_by_ids(
|
||||||
|
list(chunk_ids)
|
||||||
|
)
|
||||||
|
seen_cache_ids: set[str] = set()
|
||||||
|
for chunk_data in chunk_data_list:
|
||||||
|
if not chunk_data or not isinstance(chunk_data, dict):
|
||||||
|
continue
|
||||||
|
cache_ids = chunk_data.get("llm_cache_list", [])
|
||||||
|
if not isinstance(cache_ids, list):
|
||||||
|
continue
|
||||||
|
for cache_id in cache_ids:
|
||||||
|
if (
|
||||||
|
isinstance(cache_id, str)
|
||||||
|
and cache_id
|
||||||
|
and cache_id not in seen_cache_ids
|
||||||
|
):
|
||||||
|
doc_llm_cache_ids.append(cache_id)
|
||||||
|
seen_cache_ids.add(cache_id)
|
||||||
|
if doc_llm_cache_ids:
|
||||||
|
logger.info(
|
||||||
|
"Collected %d LLM cache entries for document %s",
|
||||||
|
len(doc_llm_cache_ids),
|
||||||
|
doc_id,
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
logger.info(
|
||||||
|
"No LLM cache entries found for document %s", doc_id
|
||||||
|
)
|
||||||
|
except Exception as cache_collect_error:
|
||||||
|
logger.error(
|
||||||
|
"Failed to collect LLM cache ids for document %s: %s",
|
||||||
|
doc_id,
|
||||||
|
cache_collect_error,
|
||||||
|
)
|
||||||
|
raise Exception(
|
||||||
|
f"Failed to collect LLM cache ids for document {doc_id}: {cache_collect_error}"
|
||||||
|
) from cache_collect_error
|
||||||
|
|
||||||
# 4. Analyze entities and relationships that will be affected
|
# 4. Analyze entities and relationships that will be affected
|
||||||
entities_to_delete = set()
|
entities_to_delete = set()
|
||||||
entities_to_rebuild = {} # entity_name -> remaining chunk id list
|
entities_to_rebuild = {} # entity_name -> remaining chunk id list
|
||||||
|
|
@ -3236,6 +3292,25 @@ class LightRAG:
|
||||||
logger.error(f"Failed to delete document and status: {e}")
|
logger.error(f"Failed to delete document and status: {e}")
|
||||||
raise Exception(f"Failed to delete document and status: {e}") from e
|
raise Exception(f"Failed to delete document and status: {e}") from e
|
||||||
|
|
||||||
|
if delete_llm_cache and doc_llm_cache_ids and self.llm_response_cache:
|
||||||
|
try:
|
||||||
|
await self.llm_response_cache.delete(doc_llm_cache_ids)
|
||||||
|
cache_log_message = f"Successfully deleted {len(doc_llm_cache_ids)} LLM cache entries for document {doc_id}"
|
||||||
|
logger.info(cache_log_message)
|
||||||
|
async with pipeline_status_lock:
|
||||||
|
pipeline_status["latest_message"] = cache_log_message
|
||||||
|
pipeline_status["history_messages"].append(cache_log_message)
|
||||||
|
log_message = cache_log_message
|
||||||
|
except Exception as cache_delete_error:
|
||||||
|
logger.error(
|
||||||
|
"Failed to delete LLM cache for document %s: %s",
|
||||||
|
doc_id,
|
||||||
|
cache_delete_error,
|
||||||
|
)
|
||||||
|
raise Exception(
|
||||||
|
f"Failed to delete LLM cache for document {doc_id}: {cache_delete_error}"
|
||||||
|
) from cache_delete_error
|
||||||
|
|
||||||
return DeletionResult(
|
return DeletionResult(
|
||||||
status="success",
|
status="success",
|
||||||
doc_id=doc_id,
|
doc_id=doc_id,
|
||||||
|
|
|
||||||
|
|
@ -618,9 +618,13 @@ export const clearCache = async (): Promise<{
|
||||||
return response.data
|
return response.data
|
||||||
}
|
}
|
||||||
|
|
||||||
export const deleteDocuments = async (docIds: string[], deleteFile: boolean = false): Promise<DeleteDocResponse> => {
|
export const deleteDocuments = async (
|
||||||
|
docIds: string[],
|
||||||
|
deleteFile: boolean = false,
|
||||||
|
deleteLLMCache: boolean = false
|
||||||
|
): Promise<DeleteDocResponse> => {
|
||||||
const response = await axiosInstance.delete('/documents/delete_document', {
|
const response = await axiosInstance.delete('/documents/delete_document', {
|
||||||
data: { doc_ids: docIds, delete_file: deleteFile }
|
data: { doc_ids: docIds, delete_file: deleteFile, delete_llm_cache: deleteLLMCache }
|
||||||
})
|
})
|
||||||
return response.data
|
return response.data
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -44,6 +44,7 @@ export default function DeleteDocumentsDialog({ selectedDocIds, onDocumentsDelet
|
||||||
const [confirmText, setConfirmText] = useState('')
|
const [confirmText, setConfirmText] = useState('')
|
||||||
const [deleteFile, setDeleteFile] = useState(false)
|
const [deleteFile, setDeleteFile] = useState(false)
|
||||||
const [isDeleting, setIsDeleting] = useState(false)
|
const [isDeleting, setIsDeleting] = useState(false)
|
||||||
|
const [deleteLLMCache, setDeleteLLMCache] = useState(false)
|
||||||
const isConfirmEnabled = confirmText.toLowerCase() === 'yes' && !isDeleting
|
const isConfirmEnabled = confirmText.toLowerCase() === 'yes' && !isDeleting
|
||||||
|
|
||||||
// Reset state when dialog closes
|
// Reset state when dialog closes
|
||||||
|
|
@ -51,6 +52,7 @@ export default function DeleteDocumentsDialog({ selectedDocIds, onDocumentsDelet
|
||||||
if (!open) {
|
if (!open) {
|
||||||
setConfirmText('')
|
setConfirmText('')
|
||||||
setDeleteFile(false)
|
setDeleteFile(false)
|
||||||
|
setDeleteLLMCache(false)
|
||||||
setIsDeleting(false)
|
setIsDeleting(false)
|
||||||
}
|
}
|
||||||
}, [open])
|
}, [open])
|
||||||
|
|
@ -60,7 +62,7 @@ export default function DeleteDocumentsDialog({ selectedDocIds, onDocumentsDelet
|
||||||
|
|
||||||
setIsDeleting(true)
|
setIsDeleting(true)
|
||||||
try {
|
try {
|
||||||
const result = await deleteDocuments(selectedDocIds, deleteFile)
|
const result = await deleteDocuments(selectedDocIds, deleteFile, deleteLLMCache)
|
||||||
|
|
||||||
if (result.status === 'deletion_started') {
|
if (result.status === 'deletion_started') {
|
||||||
toast.success(t('documentPanel.deleteDocuments.success', { count: selectedDocIds.length }))
|
toast.success(t('documentPanel.deleteDocuments.success', { count: selectedDocIds.length }))
|
||||||
|
|
@ -94,7 +96,7 @@ export default function DeleteDocumentsDialog({ selectedDocIds, onDocumentsDelet
|
||||||
} finally {
|
} finally {
|
||||||
setIsDeleting(false)
|
setIsDeleting(false)
|
||||||
}
|
}
|
||||||
}, [isConfirmEnabled, selectedDocIds, deleteFile, setOpen, t, onDocumentsDeleted])
|
}, [isConfirmEnabled, selectedDocIds, deleteFile, deleteLLMCache, setOpen, t, onDocumentsDeleted])
|
||||||
|
|
||||||
return (
|
return (
|
||||||
<Dialog open={open} onOpenChange={setOpen}>
|
<Dialog open={open} onOpenChange={setOpen}>
|
||||||
|
|
@ -155,6 +157,20 @@ export default function DeleteDocumentsDialog({ selectedDocIds, onDocumentsDelet
|
||||||
{t('documentPanel.deleteDocuments.deleteFileOption')}
|
{t('documentPanel.deleteDocuments.deleteFileOption')}
|
||||||
</Label>
|
</Label>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
|
<div className="flex items-center space-x-2">
|
||||||
|
<input
|
||||||
|
type="checkbox"
|
||||||
|
id="delete-llm-cache"
|
||||||
|
checked={deleteLLMCache}
|
||||||
|
onChange={(e) => setDeleteLLMCache(e.target.checked)}
|
||||||
|
disabled={isDeleting}
|
||||||
|
className="h-4 w-4 text-red-600 focus:ring-red-500 border-gray-300 rounded"
|
||||||
|
/>
|
||||||
|
<Label htmlFor="delete-llm-cache" className="text-sm font-medium cursor-pointer">
|
||||||
|
{t('documentPanel.deleteDocuments.deleteLLMCacheOption')}
|
||||||
|
</Label>
|
||||||
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
<DialogFooter>
|
<DialogFooter>
|
||||||
|
|
|
||||||
|
|
@ -70,6 +70,7 @@
|
||||||
"confirmButton": "نعم",
|
"confirmButton": "نعم",
|
||||||
"deleteFileOption": "حذف الملفات المرفوعة أيضًا",
|
"deleteFileOption": "حذف الملفات المرفوعة أيضًا",
|
||||||
"deleteFileTooltip": "حدد هذا الخيار لحذف الملفات المرفوعة المقابلة على الخادم أيضًا",
|
"deleteFileTooltip": "حدد هذا الخيار لحذف الملفات المرفوعة المقابلة على الخادم أيضًا",
|
||||||
|
"deleteLLMCacheOption": "حذف ذاكرة LLM المؤقتة للاستخراج أيضًا",
|
||||||
"success": "تم بدء تشغيل خط معالجة حذف المستندات بنجاح",
|
"success": "تم بدء تشغيل خط معالجة حذف المستندات بنجاح",
|
||||||
"failed": "فشل حذف المستندات:\n{{message}}",
|
"failed": "فشل حذف المستندات:\n{{message}}",
|
||||||
"error": "فشل حذف المستندات:\n{{error}}",
|
"error": "فشل حذف المستندات:\n{{error}}",
|
||||||
|
|
|
||||||
|
|
@ -70,6 +70,7 @@
|
||||||
"confirmButton": "YES",
|
"confirmButton": "YES",
|
||||||
"deleteFileOption": "Also delete uploaded files",
|
"deleteFileOption": "Also delete uploaded files",
|
||||||
"deleteFileTooltip": "Check this option to also delete the corresponding uploaded files on the server",
|
"deleteFileTooltip": "Check this option to also delete the corresponding uploaded files on the server",
|
||||||
|
"deleteLLMCacheOption": "Also delete extracted LLM cache",
|
||||||
"success": "Document deletion pipeline started successfully",
|
"success": "Document deletion pipeline started successfully",
|
||||||
"failed": "Delete Documents Failed:\n{{message}}",
|
"failed": "Delete Documents Failed:\n{{message}}",
|
||||||
"error": "Delete Documents Failed:\n{{error}}",
|
"error": "Delete Documents Failed:\n{{error}}",
|
||||||
|
|
|
||||||
|
|
@ -70,6 +70,7 @@
|
||||||
"confirmButton": "OUI",
|
"confirmButton": "OUI",
|
||||||
"deleteFileOption": "Supprimer également les fichiers téléchargés",
|
"deleteFileOption": "Supprimer également les fichiers téléchargés",
|
||||||
"deleteFileTooltip": "Cochez cette option pour supprimer également les fichiers téléchargés correspondants sur le serveur",
|
"deleteFileTooltip": "Cochez cette option pour supprimer également les fichiers téléchargés correspondants sur le serveur",
|
||||||
|
"deleteLLMCacheOption": "Supprimer également le cache LLM d'extraction",
|
||||||
"success": "Pipeline de suppression de documents démarré avec succès",
|
"success": "Pipeline de suppression de documents démarré avec succès",
|
||||||
"failed": "Échec de la suppression des documents :\n{{message}}",
|
"failed": "Échec de la suppression des documents :\n{{message}}",
|
||||||
"error": "Échec de la suppression des documents :\n{{error}}",
|
"error": "Échec de la suppression des documents :\n{{error}}",
|
||||||
|
|
|
||||||
|
|
@ -70,6 +70,7 @@
|
||||||
"confirmButton": "确定",
|
"confirmButton": "确定",
|
||||||
"deleteFileOption": "同时删除上传文件",
|
"deleteFileOption": "同时删除上传文件",
|
||||||
"deleteFileTooltip": "选中此选项将同时删除服务器上对应的上传文件",
|
"deleteFileTooltip": "选中此选项将同时删除服务器上对应的上传文件",
|
||||||
|
"deleteLLMCacheOption": "同时删除实体关系抽取 LLM 缓存",
|
||||||
"success": "文档删除流水线启动成功",
|
"success": "文档删除流水线启动成功",
|
||||||
"failed": "删除文档失败:\n{{message}}",
|
"failed": "删除文档失败:\n{{message}}",
|
||||||
"error": "删除文档失败:\n{{error}}",
|
"error": "删除文档失败:\n{{error}}",
|
||||||
|
|
|
||||||
|
|
@ -70,6 +70,7 @@
|
||||||
"confirmButton": "確定",
|
"confirmButton": "確定",
|
||||||
"deleteFileOption": "同時刪除上傳檔案",
|
"deleteFileOption": "同時刪除上傳檔案",
|
||||||
"deleteFileTooltip": "選取此選項將同時刪除伺服器上對應的上傳檔案",
|
"deleteFileTooltip": "選取此選項將同時刪除伺服器上對應的上傳檔案",
|
||||||
|
"deleteLLMCacheOption": "同時刪除實體關係擷取 LLM 快取",
|
||||||
"success": "文件刪除流水線啟動成功",
|
"success": "文件刪除流水線啟動成功",
|
||||||
"failed": "刪除文件失敗:\n{{message}}",
|
"failed": "刪除文件失敗:\n{{message}}",
|
||||||
"error": "刪除文件失敗:\n{{error}}",
|
"error": "刪除文件失敗:\n{{error}}",
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue