Change document deletion API to async
This commit is contained in:
parent
8a365533d7
commit
49baeb7318
1 changed files with 89 additions and 56 deletions
|
|
@ -782,9 +782,66 @@ async def run_scanning_process(rag: LightRAG, doc_manager: DocumentManager):
|
||||||
logger.error(traceback.format_exc())
|
logger.error(traceback.format_exc())
|
||||||
|
|
||||||
|
|
||||||
|
async def background_delete_document(rag: LightRAG, doc_id: str):
|
||||||
|
"""Background task to delete a document"""
|
||||||
|
from lightrag.kg.shared_storage import (
|
||||||
|
get_namespace_data,
|
||||||
|
get_pipeline_status_lock,
|
||||||
|
)
|
||||||
|
|
||||||
|
pipeline_status = await get_namespace_data("pipeline_status")
|
||||||
|
pipeline_status_lock = get_pipeline_status_lock()
|
||||||
|
|
||||||
|
# Set pipeline status to busy for deletion
|
||||||
|
async with pipeline_status_lock:
|
||||||
|
pipeline_status.update(
|
||||||
|
{
|
||||||
|
"busy": True,
|
||||||
|
"job_name": f"Deleting Document: {doc_id}",
|
||||||
|
"job_start": datetime.now().isoformat(),
|
||||||
|
"latest_message": "Starting document deletion process",
|
||||||
|
}
|
||||||
|
)
|
||||||
|
# Use slice assignment to clear the list in place
|
||||||
|
pipeline_status["history_messages"][:] = [
|
||||||
|
f"Starting deletion for doc_id: {doc_id}"
|
||||||
|
]
|
||||||
|
|
||||||
|
try:
|
||||||
|
result = await rag.adelete_by_doc_id(doc_id)
|
||||||
|
if "history_messages" in pipeline_status:
|
||||||
|
pipeline_status["history_messages"].append(result.message)
|
||||||
|
|
||||||
|
logger.info(f"Document deletion completed for {doc_id}: {result.status}")
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
error_msg = f"Error deleting document {doc_id}: {str(e)}"
|
||||||
|
logger.error(error_msg)
|
||||||
|
logger.error(traceback.format_exc())
|
||||||
|
if "history_messages" in pipeline_status:
|
||||||
|
pipeline_status["history_messages"].append(error_msg)
|
||||||
|
finally:
|
||||||
|
async with pipeline_status_lock:
|
||||||
|
pipeline_status["busy"] = False
|
||||||
|
completion_msg = f"Document deletion process for {doc_id} completed."
|
||||||
|
pipeline_status["latest_message"] = completion_msg
|
||||||
|
if "history_messages" in pipeline_status:
|
||||||
|
pipeline_status["history_messages"].append(completion_msg)
|
||||||
|
|
||||||
|
|
||||||
def create_document_routes(
|
def create_document_routes(
|
||||||
rag: LightRAG, doc_manager: DocumentManager, api_key: Optional[str] = None
|
rag: LightRAG, doc_manager: DocumentManager, api_key: Optional[str] = None
|
||||||
):
|
):
|
||||||
|
# Check if doc_id exists in the system - add this check at the beginning
|
||||||
|
async def check_doc_id_exists(doc_id: str) -> bool:
|
||||||
|
"""Check if document ID exists in the system"""
|
||||||
|
try:
|
||||||
|
doc_status = await rag.doc_status.get_by_id(doc_id)
|
||||||
|
return doc_status is not None
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error checking doc_id existence: {str(e)}")
|
||||||
|
return False
|
||||||
|
|
||||||
# Create combined auth dependency for document routes
|
# Create combined auth dependency for document routes
|
||||||
combined_auth = get_combined_auth_dependency(api_key)
|
combined_auth = get_combined_auth_dependency(api_key)
|
||||||
|
|
||||||
|
|
@ -1254,13 +1311,11 @@ def create_document_routes(
|
||||||
class DeleteDocByIdResponse(BaseModel):
|
class DeleteDocByIdResponse(BaseModel):
|
||||||
"""Response model for single document deletion operation."""
|
"""Response model for single document deletion operation."""
|
||||||
|
|
||||||
status: Literal["success", "fail", "not_found", "busy"] = Field(
|
status: Literal["deletion_started", "busy", "not_allowed"] = Field(
|
||||||
description="Status of the deletion operation"
|
description="Status of the deletion operation"
|
||||||
)
|
)
|
||||||
message: str = Field(description="Message describing the operation result")
|
message: str = Field(description="Message describing the operation result")
|
||||||
doc_id: Optional[str] = Field(
|
doc_id: str = Field(description="The ID of the document to delete")
|
||||||
default=None, description="The ID of the document."
|
|
||||||
)
|
|
||||||
|
|
||||||
@router.delete(
|
@router.delete(
|
||||||
"/delete_document",
|
"/delete_document",
|
||||||
|
|
@ -1269,100 +1324,78 @@ def create_document_routes(
|
||||||
summary="Delete a document and all its associated data by its ID.",
|
summary="Delete a document and all its associated data by its ID.",
|
||||||
)
|
)
|
||||||
|
|
||||||
# TODO This method needs to be modified to be asynchronous (please do not use)
|
|
||||||
async def delete_document(
|
async def delete_document(
|
||||||
delete_request: DeleteDocRequest,
|
delete_request: DeleteDocRequest,
|
||||||
|
background_tasks: BackgroundTasks,
|
||||||
) -> DeleteDocByIdResponse:
|
) -> DeleteDocByIdResponse:
|
||||||
"""
|
"""
|
||||||
This method needs to be modified to be asynchronous (please do not use)
|
Delete a document and all its associated data by its ID using background processing.
|
||||||
|
|
||||||
Deletes a specific document and all its associated data, including its status,
|
Deletes a specific document and all its associated data, including its status,
|
||||||
text chunks, vector embeddings, and any related graph data.
|
text chunks, vector embeddings, and any related graph data.
|
||||||
|
The deletion process runs in the background to avoid blocking the client connection.
|
||||||
It is disabled when llm cache for entity extraction is disabled.
|
It is disabled when llm cache for entity extraction is disabled.
|
||||||
|
|
||||||
This operation is irreversible and will interact with the pipeline status.
|
This operation is irreversible and will interact with the pipeline status.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
delete_request (DeleteDocRequest): The request containing the document ID.
|
delete_request (DeleteDocRequest): The request containing the document ID.
|
||||||
|
background_tasks: FastAPI BackgroundTasks for async processing
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
DeleteDocByIdResponse: The result of the deletion operation.
|
DeleteDocByIdResponse: The result of the deletion operation.
|
||||||
- status="success": The document was successfully deleted.
|
- status="deletion_started": The document deletion has been initiated in the background.
|
||||||
- status="not_found": The document with the specified ID was not found.
|
|
||||||
- status="fail": The deletion operation failed.
|
|
||||||
- status="busy": The pipeline is busy with another operation.
|
- status="busy": The pipeline is busy with another operation.
|
||||||
|
- status="not_allowed": Operation not allowed when LLM cache for entity extraction is disabled.
|
||||||
|
|
||||||
Raises:
|
Raises:
|
||||||
HTTPException:
|
HTTPException:
|
||||||
- 500: If an unexpected internal error occurs.
|
- 500: If an unexpected internal error occurs during initialization.
|
||||||
"""
|
"""
|
||||||
|
# Check if doc_id exists first - return error immediately if not found
|
||||||
|
doc_id = delete_request.doc_id
|
||||||
|
if not await check_doc_id_exists(doc_id):
|
||||||
|
raise HTTPException(
|
||||||
|
status_code=404,
|
||||||
|
detail=f"Document {doc_id} not found."
|
||||||
|
)
|
||||||
|
|
||||||
# The rag object is initialized from the server startup args,
|
# The rag object is initialized from the server startup args,
|
||||||
# so we can access its properties here.
|
# so we can access its properties here.
|
||||||
if not rag.enable_llm_cache_for_entity_extract:
|
if not rag.enable_llm_cache_for_entity_extract:
|
||||||
raise HTTPException(
|
return DeleteDocByIdResponse(
|
||||||
status_code=403,
|
status="not_allowed",
|
||||||
detail="Operation not allowed when LLM cache for entity extraction is disabled.",
|
message="Operation not allowed when LLM cache for entity extraction is disabled.",
|
||||||
|
doc_id=delete_request.doc_id,
|
||||||
)
|
)
|
||||||
from lightrag.kg.shared_storage import (
|
|
||||||
get_namespace_data,
|
|
||||||
get_pipeline_status_lock,
|
|
||||||
)
|
|
||||||
|
|
||||||
doc_id = delete_request.doc_id
|
try:
|
||||||
pipeline_status = await get_namespace_data("pipeline_status")
|
from lightrag.kg.shared_storage import get_namespace_data
|
||||||
pipeline_status_lock = get_pipeline_status_lock()
|
|
||||||
|
|
||||||
async with pipeline_status_lock:
|
pipeline_status = await get_namespace_data("pipeline_status")
|
||||||
|
|
||||||
|
# Check if pipeline is busy
|
||||||
if pipeline_status.get("busy", False):
|
if pipeline_status.get("busy", False):
|
||||||
return DeleteDocByIdResponse(
|
return DeleteDocByIdResponse(
|
||||||
status="busy",
|
status="busy",
|
||||||
message="Cannot delete document while pipeline is busy",
|
message="Cannot delete document while pipeline is busy",
|
||||||
doc_id=doc_id,
|
doc_id=doc_id,
|
||||||
)
|
)
|
||||||
pipeline_status.update(
|
|
||||||
{
|
|
||||||
"busy": True,
|
|
||||||
"job_name": f"Deleting Document: {doc_id}",
|
|
||||||
"job_start": datetime.now().isoformat(),
|
|
||||||
"latest_message": "Starting document deletion process",
|
|
||||||
}
|
|
||||||
)
|
|
||||||
# Use slice assignment to clear the list in place
|
|
||||||
pipeline_status["history_messages"][:] = [
|
|
||||||
f"Starting deletion for doc_id: {doc_id}"
|
|
||||||
]
|
|
||||||
|
|
||||||
try:
|
# Add deletion task to background tasks
|
||||||
result = await rag.adelete_by_doc_id(doc_id)
|
background_tasks.add_task(background_delete_document, rag, doc_id)
|
||||||
if "history_messages" in pipeline_status:
|
|
||||||
pipeline_status["history_messages"].append(result.message)
|
|
||||||
|
|
||||||
if result.status == "not_found":
|
|
||||||
raise HTTPException(status_code=404, detail=result.message)
|
|
||||||
if result.status == "fail":
|
|
||||||
raise HTTPException(status_code=500, detail=result.message)
|
|
||||||
|
|
||||||
return DeleteDocByIdResponse(
|
return DeleteDocByIdResponse(
|
||||||
doc_id=result.doc_id,
|
status="deletion_started",
|
||||||
message=result.message,
|
message=f"Document deletion for '{doc_id}' has been initiated. Processing will continue in background.",
|
||||||
status=result.status,
|
doc_id=doc_id,
|
||||||
)
|
)
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
error_msg = f"Error deleting document {doc_id}: {str(e)}"
|
error_msg = f"Error initiating document deletion for {delete_request.doc_id}: {str(e)}"
|
||||||
logger.error(error_msg)
|
logger.error(error_msg)
|
||||||
logger.error(traceback.format_exc())
|
logger.error(traceback.format_exc())
|
||||||
if "history_messages" in pipeline_status:
|
|
||||||
pipeline_status["history_messages"].append(error_msg)
|
|
||||||
# Re-raise as HTTPException for consistent error handling by FastAPI
|
|
||||||
raise HTTPException(status_code=500, detail=error_msg)
|
raise HTTPException(status_code=500, detail=error_msg)
|
||||||
finally:
|
|
||||||
async with pipeline_status_lock:
|
|
||||||
pipeline_status["busy"] = False
|
|
||||||
completion_msg = f"Document deletion process for {doc_id} completed."
|
|
||||||
pipeline_status["latest_message"] = completion_msg
|
|
||||||
if "history_messages" in pipeline_status:
|
|
||||||
pipeline_status["history_messages"].append(completion_msg)
|
|
||||||
|
|
||||||
@router.post(
|
@router.post(
|
||||||
"/clear_cache",
|
"/clear_cache",
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue