cherry-pick b76350a3
This commit is contained in:
parent
77a715f61b
commit
0002bb63db
1 changed files with 76 additions and 3 deletions
|
|
@ -887,13 +887,13 @@ class LightRAG:
|
|||
need_entity_migration = await self.entity_chunks.is_empty()
|
||||
except Exception as exc: # pragma: no cover - defensive logging
|
||||
logger.error(f"Failed to check entity chunks storage: {exc}")
|
||||
need_entity_migration = True
|
||||
raise exc
|
||||
|
||||
try:
|
||||
need_relation_migration = await self.relation_chunks.is_empty()
|
||||
except Exception as exc: # pragma: no cover - defensive logging
|
||||
logger.error(f"Failed to check relation chunks storage: {exc}")
|
||||
need_relation_migration = True
|
||||
raise exc
|
||||
|
||||
if not need_entity_migration and not need_relation_migration:
|
||||
return
|
||||
|
|
@ -2793,7 +2793,9 @@ class LightRAG:
|
|||
# Return the dictionary containing statuses only for the found document IDs
|
||||
return found_statuses
|
||||
|
||||
async def adelete_by_doc_id(self, doc_id: str) -> DeletionResult:
|
||||
async def adelete_by_doc_id(
|
||||
self, doc_id: str, delete_llm_cache: bool = False
|
||||
) -> DeletionResult:
|
||||
"""Delete a document and all its related data, including chunks, graph elements.
|
||||
|
||||
This method orchestrates a comprehensive deletion process for a given document ID.
|
||||
|
|
@ -2803,6 +2805,8 @@ class LightRAG:
|
|||
|
||||
Args:
|
||||
doc_id (str): The unique identifier of the document to be deleted.
|
||||
delete_llm_cache (bool): Whether to delete cached LLM extraction results
|
||||
associated with the document. Defaults to False.
|
||||
|
||||
Returns:
|
||||
DeletionResult: An object containing the outcome of the deletion process.
|
||||
|
|
@ -2814,6 +2818,7 @@ class LightRAG:
|
|||
"""
|
||||
deletion_operations_started = False
|
||||
original_exception = None
|
||||
doc_llm_cache_ids: list[str] = []
|
||||
|
||||
# Get pipeline status shared data and lock for status updates
|
||||
pipeline_status = await get_namespace_data("pipeline_status")
|
||||
|
|
@ -2914,6 +2919,57 @@ class LightRAG:
|
|||
# Mark that deletion operations have started
|
||||
deletion_operations_started = True
|
||||
|
||||
if delete_llm_cache and chunk_ids:
|
||||
if not self.llm_response_cache:
|
||||
logger.info(
|
||||
"Skipping LLM cache collection for document %s because cache storage is unavailable",
|
||||
doc_id,
|
||||
)
|
||||
elif not self.text_chunks:
|
||||
logger.info(
|
||||
"Skipping LLM cache collection for document %s because text chunk storage is unavailable",
|
||||
doc_id,
|
||||
)
|
||||
else:
|
||||
try:
|
||||
chunk_data_list = await self.text_chunks.get_by_ids(
|
||||
list(chunk_ids)
|
||||
)
|
||||
seen_cache_ids: set[str] = set()
|
||||
for chunk_data in chunk_data_list:
|
||||
if not chunk_data or not isinstance(chunk_data, dict):
|
||||
continue
|
||||
cache_ids = chunk_data.get("llm_cache_list", [])
|
||||
if not isinstance(cache_ids, list):
|
||||
continue
|
||||
for cache_id in cache_ids:
|
||||
if (
|
||||
isinstance(cache_id, str)
|
||||
and cache_id
|
||||
and cache_id not in seen_cache_ids
|
||||
):
|
||||
doc_llm_cache_ids.append(cache_id)
|
||||
seen_cache_ids.add(cache_id)
|
||||
if doc_llm_cache_ids:
|
||||
logger.info(
|
||||
"Collected %d LLM cache entries for document %s",
|
||||
len(doc_llm_cache_ids),
|
||||
doc_id,
|
||||
)
|
||||
else:
|
||||
logger.info(
|
||||
"No LLM cache entries found for document %s", doc_id
|
||||
)
|
||||
except Exception as cache_collect_error:
|
||||
logger.error(
|
||||
"Failed to collect LLM cache ids for document %s: %s",
|
||||
doc_id,
|
||||
cache_collect_error,
|
||||
)
|
||||
raise Exception(
|
||||
f"Failed to collect LLM cache ids for document {doc_id}: {cache_collect_error}"
|
||||
) from cache_collect_error
|
||||
|
||||
# 4. Analyze entities and relationships that will be affected
|
||||
entities_to_delete = set()
|
||||
entities_to_rebuild = {} # entity_name -> remaining chunk id list
|
||||
|
|
@ -3236,6 +3292,23 @@ class LightRAG:
|
|||
logger.error(f"Failed to delete document and status: {e}")
|
||||
raise Exception(f"Failed to delete document and status: {e}") from e
|
||||
|
||||
if delete_llm_cache and doc_llm_cache_ids and self.llm_response_cache:
|
||||
try:
|
||||
await self.llm_response_cache.delete(doc_llm_cache_ids)
|
||||
cache_log_message = f"Successfully deleted {len(doc_llm_cache_ids)} LLM cache entries for document {doc_id}"
|
||||
logger.info(cache_log_message)
|
||||
async with pipeline_status_lock:
|
||||
pipeline_status["latest_message"] = cache_log_message
|
||||
pipeline_status["history_messages"].append(cache_log_message)
|
||||
log_message = cache_log_message
|
||||
except Exception as cache_delete_error:
|
||||
log_message = f"Failed to delete LLM cache for document {doc_id}: {cache_delete_error}"
|
||||
logger.error(log_message)
|
||||
logger.error(traceback.format_exc())
|
||||
async with pipeline_status_lock:
|
||||
pipeline_status["latest_message"] = log_message
|
||||
pipeline_status["history_messages"].append(log_message)
|
||||
|
||||
return DeletionResult(
|
||||
status="success",
|
||||
doc_id=doc_id,
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue