cherry-pick b76350a3
This commit is contained in:
parent
77a715f61b
commit
0002bb63db
1 changed files with 76 additions and 3 deletions
|
|
@ -887,13 +887,13 @@ class LightRAG:
|
||||||
need_entity_migration = await self.entity_chunks.is_empty()
|
need_entity_migration = await self.entity_chunks.is_empty()
|
||||||
except Exception as exc: # pragma: no cover - defensive logging
|
except Exception as exc: # pragma: no cover - defensive logging
|
||||||
logger.error(f"Failed to check entity chunks storage: {exc}")
|
logger.error(f"Failed to check entity chunks storage: {exc}")
|
||||||
need_entity_migration = True
|
raise exc
|
||||||
|
|
||||||
try:
|
try:
|
||||||
need_relation_migration = await self.relation_chunks.is_empty()
|
need_relation_migration = await self.relation_chunks.is_empty()
|
||||||
except Exception as exc: # pragma: no cover - defensive logging
|
except Exception as exc: # pragma: no cover - defensive logging
|
||||||
logger.error(f"Failed to check relation chunks storage: {exc}")
|
logger.error(f"Failed to check relation chunks storage: {exc}")
|
||||||
need_relation_migration = True
|
raise exc
|
||||||
|
|
||||||
if not need_entity_migration and not need_relation_migration:
|
if not need_entity_migration and not need_relation_migration:
|
||||||
return
|
return
|
||||||
|
|
@ -2793,7 +2793,9 @@ class LightRAG:
|
||||||
# Return the dictionary containing statuses only for the found document IDs
|
# Return the dictionary containing statuses only for the found document IDs
|
||||||
return found_statuses
|
return found_statuses
|
||||||
|
|
||||||
async def adelete_by_doc_id(self, doc_id: str) -> DeletionResult:
|
async def adelete_by_doc_id(
|
||||||
|
self, doc_id: str, delete_llm_cache: bool = False
|
||||||
|
) -> DeletionResult:
|
||||||
"""Delete a document and all its related data, including chunks, graph elements.
|
"""Delete a document and all its related data, including chunks, graph elements.
|
||||||
|
|
||||||
This method orchestrates a comprehensive deletion process for a given document ID.
|
This method orchestrates a comprehensive deletion process for a given document ID.
|
||||||
|
|
@ -2803,6 +2805,8 @@ class LightRAG:
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
doc_id (str): The unique identifier of the document to be deleted.
|
doc_id (str): The unique identifier of the document to be deleted.
|
||||||
|
delete_llm_cache (bool): Whether to delete cached LLM extraction results
|
||||||
|
associated with the document. Defaults to False.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
DeletionResult: An object containing the outcome of the deletion process.
|
DeletionResult: An object containing the outcome of the deletion process.
|
||||||
|
|
@ -2814,6 +2818,7 @@ class LightRAG:
|
||||||
"""
|
"""
|
||||||
deletion_operations_started = False
|
deletion_operations_started = False
|
||||||
original_exception = None
|
original_exception = None
|
||||||
|
doc_llm_cache_ids: list[str] = []
|
||||||
|
|
||||||
# Get pipeline status shared data and lock for status updates
|
# Get pipeline status shared data and lock for status updates
|
||||||
pipeline_status = await get_namespace_data("pipeline_status")
|
pipeline_status = await get_namespace_data("pipeline_status")
|
||||||
|
|
@ -2914,6 +2919,57 @@ class LightRAG:
|
||||||
# Mark that deletion operations have started
|
# Mark that deletion operations have started
|
||||||
deletion_operations_started = True
|
deletion_operations_started = True
|
||||||
|
|
||||||
|
if delete_llm_cache and chunk_ids:
|
||||||
|
if not self.llm_response_cache:
|
||||||
|
logger.info(
|
||||||
|
"Skipping LLM cache collection for document %s because cache storage is unavailable",
|
||||||
|
doc_id,
|
||||||
|
)
|
||||||
|
elif not self.text_chunks:
|
||||||
|
logger.info(
|
||||||
|
"Skipping LLM cache collection for document %s because text chunk storage is unavailable",
|
||||||
|
doc_id,
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
try:
|
||||||
|
chunk_data_list = await self.text_chunks.get_by_ids(
|
||||||
|
list(chunk_ids)
|
||||||
|
)
|
||||||
|
seen_cache_ids: set[str] = set()
|
||||||
|
for chunk_data in chunk_data_list:
|
||||||
|
if not chunk_data or not isinstance(chunk_data, dict):
|
||||||
|
continue
|
||||||
|
cache_ids = chunk_data.get("llm_cache_list", [])
|
||||||
|
if not isinstance(cache_ids, list):
|
||||||
|
continue
|
||||||
|
for cache_id in cache_ids:
|
||||||
|
if (
|
||||||
|
isinstance(cache_id, str)
|
||||||
|
and cache_id
|
||||||
|
and cache_id not in seen_cache_ids
|
||||||
|
):
|
||||||
|
doc_llm_cache_ids.append(cache_id)
|
||||||
|
seen_cache_ids.add(cache_id)
|
||||||
|
if doc_llm_cache_ids:
|
||||||
|
logger.info(
|
||||||
|
"Collected %d LLM cache entries for document %s",
|
||||||
|
len(doc_llm_cache_ids),
|
||||||
|
doc_id,
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
logger.info(
|
||||||
|
"No LLM cache entries found for document %s", doc_id
|
||||||
|
)
|
||||||
|
except Exception as cache_collect_error:
|
||||||
|
logger.error(
|
||||||
|
"Failed to collect LLM cache ids for document %s: %s",
|
||||||
|
doc_id,
|
||||||
|
cache_collect_error,
|
||||||
|
)
|
||||||
|
raise Exception(
|
||||||
|
f"Failed to collect LLM cache ids for document {doc_id}: {cache_collect_error}"
|
||||||
|
) from cache_collect_error
|
||||||
|
|
||||||
# 4. Analyze entities and relationships that will be affected
|
# 4. Analyze entities and relationships that will be affected
|
||||||
entities_to_delete = set()
|
entities_to_delete = set()
|
||||||
entities_to_rebuild = {} # entity_name -> remaining chunk id list
|
entities_to_rebuild = {} # entity_name -> remaining chunk id list
|
||||||
|
|
@ -3236,6 +3292,23 @@ class LightRAG:
|
||||||
logger.error(f"Failed to delete document and status: {e}")
|
logger.error(f"Failed to delete document and status: {e}")
|
||||||
raise Exception(f"Failed to delete document and status: {e}") from e
|
raise Exception(f"Failed to delete document and status: {e}") from e
|
||||||
|
|
||||||
|
if delete_llm_cache and doc_llm_cache_ids and self.llm_response_cache:
|
||||||
|
try:
|
||||||
|
await self.llm_response_cache.delete(doc_llm_cache_ids)
|
||||||
|
cache_log_message = f"Successfully deleted {len(doc_llm_cache_ids)} LLM cache entries for document {doc_id}"
|
||||||
|
logger.info(cache_log_message)
|
||||||
|
async with pipeline_status_lock:
|
||||||
|
pipeline_status["latest_message"] = cache_log_message
|
||||||
|
pipeline_status["history_messages"].append(cache_log_message)
|
||||||
|
log_message = cache_log_message
|
||||||
|
except Exception as cache_delete_error:
|
||||||
|
log_message = f"Failed to delete LLM cache for document {doc_id}: {cache_delete_error}"
|
||||||
|
logger.error(log_message)
|
||||||
|
logger.error(traceback.format_exc())
|
||||||
|
async with pipeline_status_lock:
|
||||||
|
pipeline_status["latest_message"] = log_message
|
||||||
|
pipeline_status["history_messages"].append(log_message)
|
||||||
|
|
||||||
return DeletionResult(
|
return DeletionResult(
|
||||||
status="success",
|
status="success",
|
||||||
doc_id=doc_id,
|
doc_id=doc_id,
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue