Merge pull request #2211 from HKUDS/add-preprocessed-status

Add PREPROCESSED (multimodal_processed) status for multimodal document processing
This commit is contained in:
Daniel.y 2025-10-14 14:08:47 +08:00 committed by GitHub
commit 92a66565af
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
11 changed files with 114 additions and 21 deletions

View file

@ -1 +1 @@
__api_version__ = "0239" __api_version__ = "0240"

View file

@ -406,7 +406,7 @@ class DocStatusResponse(BaseModel):
"id": "doc_123456", "id": "doc_123456",
"content_summary": "Research paper on machine learning", "content_summary": "Research paper on machine learning",
"content_length": 15240, "content_length": 15240,
"status": "PROCESSED", "status": "processed",
"created_at": "2025-03-31T12:34:56", "created_at": "2025-03-31T12:34:56",
"updated_at": "2025-03-31T12:35:30", "updated_at": "2025-03-31T12:35:30",
"track_id": "upload_20250729_170612_abc123", "track_id": "upload_20250729_170612_abc123",
@ -439,7 +439,7 @@ class DocsStatusesResponse(BaseModel):
"id": "doc_123", "id": "doc_123",
"content_summary": "Pending document", "content_summary": "Pending document",
"content_length": 5000, "content_length": 5000,
"status": "PENDING", "status": "pending",
"created_at": "2025-03-31T10:00:00", "created_at": "2025-03-31T10:00:00",
"updated_at": "2025-03-31T10:00:00", "updated_at": "2025-03-31T10:00:00",
"track_id": "upload_20250331_100000_abc123", "track_id": "upload_20250331_100000_abc123",
@ -449,12 +449,27 @@ class DocsStatusesResponse(BaseModel):
"file_path": "pending_doc.pdf", "file_path": "pending_doc.pdf",
} }
], ],
"PREPROCESSED": [
{
"id": "doc_789",
"content_summary": "Document pending final indexing",
"content_length": 7200,
"status": "multimodal_processed",
"created_at": "2025-03-31T09:30:00",
"updated_at": "2025-03-31T09:35:00",
"track_id": "upload_20250331_093000_xyz789",
"chunks_count": 10,
"error": None,
"metadata": None,
"file_path": "preprocessed_doc.pdf",
}
],
"PROCESSED": [ "PROCESSED": [
{ {
"id": "doc_456", "id": "doc_456",
"content_summary": "Processed document", "content_summary": "Processed document",
"content_length": 8000, "content_length": 8000,
"status": "PROCESSED", "status": "processed",
"created_at": "2025-03-31T09:00:00", "created_at": "2025-03-31T09:00:00",
"updated_at": "2025-03-31T09:05:00", "updated_at": "2025-03-31T09:05:00",
"track_id": "insert_20250331_090000_def456", "track_id": "insert_20250331_090000_def456",
@ -626,6 +641,7 @@ class PaginatedDocsResponse(BaseModel):
"status_counts": { "status_counts": {
"PENDING": 10, "PENDING": 10,
"PROCESSING": 5, "PROCESSING": 5,
"PREPROCESSED": 5,
"PROCESSED": 130, "PROCESSED": 130,
"FAILED": 5, "FAILED": 5,
}, },
@ -648,6 +664,7 @@ class StatusCountsResponse(BaseModel):
"status_counts": { "status_counts": {
"PENDING": 10, "PENDING": 10,
"PROCESSING": 5, "PROCESSING": 5,
"PREPROCESSED": 5,
"PROCESSED": 130, "PROCESSED": 130,
"FAILED": 5, "FAILED": 5,
} }
@ -2210,7 +2227,7 @@ def create_document_routes(
To prevent excessive resource consumption, a maximum of 1,000 records is returned. To prevent excessive resource consumption, a maximum of 1,000 records is returned.
This endpoint retrieves the current status of all documents, grouped by their This endpoint retrieves the current status of all documents, grouped by their
processing status (PENDING, PROCESSING, PROCESSED, FAILED). The results are processing status (PENDING, PROCESSING, PREPROCESSED, PROCESSED, FAILED). The results are
limited to 1000 total documents with fair distribution across all statuses. limited to 1000 total documents with fair distribution across all statuses.
Returns: Returns:
@ -2226,6 +2243,7 @@ def create_document_routes(
statuses = ( statuses = (
DocStatus.PENDING, DocStatus.PENDING,
DocStatus.PROCESSING, DocStatus.PROCESSING,
DocStatus.PREPROCESSED,
DocStatus.PROCESSED, DocStatus.PROCESSED,
DocStatus.FAILED, DocStatus.FAILED,
) )

View file

@ -712,6 +712,7 @@ class DocStatus(str, Enum):
PENDING = "pending" PENDING = "pending"
PROCESSING = "processing" PROCESSING = "processing"
PREPROCESSED = "multimodal_processed"
PROCESSED = "processed" PROCESSED = "processed"
FAILED = "failed" FAILED = "failed"

View file

@ -2617,7 +2617,12 @@ class LightRAG:
) )
# Check document status and log warning for non-completed documents # Check document status and log warning for non-completed documents
doc_status = doc_status_data.get("status") raw_status = doc_status_data.get("status")
try:
doc_status = DocStatus(raw_status)
except ValueError:
doc_status = raw_status
if doc_status != DocStatus.PROCESSED: if doc_status != DocStatus.PROCESSED:
if doc_status == DocStatus.PENDING: if doc_status == DocStatus.PENDING:
warning_msg = ( warning_msg = (
@ -2627,12 +2632,23 @@ class LightRAG:
warning_msg = ( warning_msg = (
f"Deleting {doc_id} {file_path}(previous status: PROCESSING)" f"Deleting {doc_id} {file_path}(previous status: PROCESSING)"
) )
elif doc_status == DocStatus.PREPROCESSED:
warning_msg = (
f"Deleting {doc_id} {file_path}(previous status: PREPROCESSED)"
)
elif doc_status == DocStatus.FAILED: elif doc_status == DocStatus.FAILED:
warning_msg = ( warning_msg = (
f"Deleting {doc_id} {file_path}(previous status: FAILED)" f"Deleting {doc_id} {file_path}(previous status: FAILED)"
) )
else: else:
warning_msg = f"Deleting {doc_id} {file_path}(previous status: {doc_status.value})" status_text = (
doc_status.value
if isinstance(doc_status, DocStatus)
else str(doc_status)
)
warning_msg = (
f"Deleting {doc_id} {file_path}(previous status: {status_text})"
)
logger.info(warning_msg) logger.info(warning_msg)
# Update pipeline status for monitoring # Update pipeline status for monitoring
async with pipeline_status_lock: async with pipeline_status_lock:

View file

@ -167,7 +167,7 @@ export type DeleteDocResponse = {
doc_id: string doc_id: string
} }
export type DocStatus = 'pending' | 'processing' | 'processed' | 'failed' export type DocStatus = 'pending' | 'processing' | 'multimodal_processed' | 'processed' | 'failed'
export type DocStatusResponse = { export type DocStatusResponse = {
id: string id: string

View file

@ -38,6 +38,21 @@ import PipelineStatusDialog from '@/components/documents/PipelineStatusDialog'
type StatusFilter = DocStatus | 'all'; type StatusFilter = DocStatus | 'all';
// Utility functions defined outside component for better performance and to avoid dependency issues
const getCountValue = (counts: Record<string, number>, ...keys: string[]): number => {
for (const key of keys) {
const value = counts[key]
if (typeof value === 'number') {
return value
}
}
return 0
}
const hasActiveDocumentsStatus = (counts: Record<string, number>): boolean =>
getCountValue(counts, 'PROCESSING', 'processing') > 0 ||
getCountValue(counts, 'PENDING', 'pending') > 0 ||
getCountValue(counts, 'PREPROCESSED', 'preprocessed', 'multimodal_processed') > 0
const getDisplayFileName = (doc: DocStatusResponse, maxLength: number = 20): string => { const getDisplayFileName = (doc: DocStatusResponse, maxLength: number = 20): string => {
// Check if file_path exists and is a non-empty string // Check if file_path exists and is a non-empty string
@ -242,6 +257,7 @@ export default function DocumentManager() {
const [pageByStatus, setPageByStatus] = useState<Record<StatusFilter, number>>({ const [pageByStatus, setPageByStatus] = useState<Record<StatusFilter, number>>({
all: 1, all: 1,
processed: 1, processed: 1,
multimodal_processed: 1,
processing: 1, processing: 1,
pending: 1, pending: 1,
failed: 1, failed: 1,
@ -308,6 +324,7 @@ export default function DocumentManager() {
setPageByStatus({ setPageByStatus({
all: 1, all: 1,
processed: 1, processed: 1,
'multimodal_processed': 1,
processing: 1, processing: 1,
pending: 1, pending: 1,
failed: 1, failed: 1,
@ -452,9 +469,19 @@ export default function DocumentManager() {
return counts; return counts;
}, [docs]); }, [docs]);
const processedCount = getCountValue(statusCounts, 'PROCESSED', 'processed') || documentCounts.processed || 0;
const preprocessedCount =
getCountValue(statusCounts, 'PREPROCESSED', 'preprocessed', 'multimodal_processed') ||
documentCounts.multimodal_processed ||
0;
const processingCount = getCountValue(statusCounts, 'PROCESSING', 'processing') || documentCounts.processing || 0;
const pendingCount = getCountValue(statusCounts, 'PENDING', 'pending') || documentCounts.pending || 0;
const failedCount = getCountValue(statusCounts, 'FAILED', 'failed') || documentCounts.failed || 0;
// Store previous status counts // Store previous status counts
const prevStatusCounts = useRef({ const prevStatusCounts = useRef({
processed: 0, processed: 0,
multimodal_processed: 0,
processing: 0, processing: 0,
pending: 0, pending: 0,
failed: 0 failed: 0
@ -545,6 +572,7 @@ export default function DocumentManager() {
const legacyDocs: DocsStatusesResponse = { const legacyDocs: DocsStatusesResponse = {
statuses: { statuses: {
processed: response.documents.filter((doc: DocStatusResponse) => doc.status === 'processed'), processed: response.documents.filter((doc: DocStatusResponse) => doc.status === 'processed'),
multimodal_processed: response.documents.filter((doc: DocStatusResponse) => doc.status === 'multimodal_processed'),
processing: response.documents.filter((doc: DocStatusResponse) => doc.status === 'processing'), processing: response.documents.filter((doc: DocStatusResponse) => doc.status === 'processing'),
pending: response.documents.filter((doc: DocStatusResponse) => doc.status === 'pending'), pending: response.documents.filter((doc: DocStatusResponse) => doc.status === 'pending'),
failed: response.documents.filter((doc: DocStatusResponse) => doc.status === 'failed') failed: response.documents.filter((doc: DocStatusResponse) => doc.status === 'failed')
@ -827,7 +855,7 @@ export default function DocumentManager() {
setTimeout(() => { setTimeout(() => {
if (isMountedRef.current && currentTab === 'documents' && health) { if (isMountedRef.current && currentTab === 'documents' && health) {
// Restore intelligent polling interval based on document status // Restore intelligent polling interval based on document status
const hasActiveDocuments = (statusCounts.processing || 0) > 0 || (statusCounts.pending || 0) > 0; const hasActiveDocuments = hasActiveDocumentsStatus(statusCounts);
const normalInterval = hasActiveDocuments ? 5000 : 30000; const normalInterval = hasActiveDocuments ? 5000 : 30000;
startPollingInterval(normalInterval); startPollingInterval(normalInterval);
} }
@ -863,7 +891,7 @@ export default function DocumentManager() {
setTimeout(() => { setTimeout(() => {
if (isMountedRef.current && currentTab === 'documents' && health) { if (isMountedRef.current && currentTab === 'documents' && health) {
// Restore intelligent polling interval based on document status // Restore intelligent polling interval based on document status
const hasActiveDocuments = (statusCounts.processing || 0) > 0 || (statusCounts.pending || 0) > 0; const hasActiveDocuments = hasActiveDocumentsStatus(statusCounts);
const normalInterval = hasActiveDocuments ? 5000 : 30000; const normalInterval = hasActiveDocuments ? 5000 : 30000;
startPollingInterval(normalInterval); startPollingInterval(normalInterval);
} }
@ -887,6 +915,7 @@ export default function DocumentManager() {
setPageByStatus({ setPageByStatus({
all: 1, all: 1,
processed: 1, processed: 1,
multimodal_processed: 1,
processing: 1, processing: 1,
pending: 1, pending: 1,
failed: 1, failed: 1,
@ -927,6 +956,7 @@ export default function DocumentManager() {
const legacyDocs: DocsStatusesResponse = { const legacyDocs: DocsStatusesResponse = {
statuses: { statuses: {
processed: response.documents.filter(doc => doc.status === 'processed'), processed: response.documents.filter(doc => doc.status === 'processed'),
multimodal_processed: response.documents.filter(doc => doc.status === 'multimodal_processed'),
processing: response.documents.filter(doc => doc.status === 'processing'), processing: response.documents.filter(doc => doc.status === 'processing'),
pending: response.documents.filter(doc => doc.status === 'pending'), pending: response.documents.filter(doc => doc.status === 'pending'),
failed: response.documents.filter(doc => doc.status === 'failed') failed: response.documents.filter(doc => doc.status === 'failed')
@ -961,14 +991,21 @@ export default function DocumentManager() {
handleIntelligentRefresh(); handleIntelligentRefresh();
// Reset polling timer after intelligent refresh // Reset polling timer after intelligent refresh
const hasActiveDocuments = (statusCounts.processing || 0) > 0 || (statusCounts.pending || 0) > 0; const hasActiveDocuments = hasActiveDocumentsStatus(statusCounts);
const pollingInterval = hasActiveDocuments ? 5000 : 30000; const pollingInterval = hasActiveDocuments ? 5000 : 30000;
startPollingInterval(pollingInterval); startPollingInterval(pollingInterval);
} }
} }
// Update the previous state // Update the previous state
prevPipelineBusyRef.current = pipelineBusy; prevPipelineBusyRef.current = pipelineBusy;
}, [pipelineBusy, currentTab, health, handleIntelligentRefresh, statusCounts.processing, statusCounts.pending, startPollingInterval]); }, [
pipelineBusy,
currentTab,
health,
handleIntelligentRefresh,
statusCounts,
startPollingInterval
]);
// Set up intelligent polling with dynamic interval based on document status // Set up intelligent polling with dynamic interval based on document status
useEffect(() => { useEffect(() => {
@ -978,7 +1015,7 @@ export default function DocumentManager() {
} }
// Determine polling interval based on document status // Determine polling interval based on document status
const hasActiveDocuments = (statusCounts.processing || 0) > 0 || (statusCounts.pending || 0) > 0; const hasActiveDocuments = hasActiveDocumentsStatus(statusCounts);
const pollingInterval = hasActiveDocuments ? 5000 : 30000; // 5s if active, 30s if idle const pollingInterval = hasActiveDocuments ? 5000 : 30000; // 5s if active, 30s if idle
startPollingInterval(pollingInterval); startPollingInterval(pollingInterval);
@ -995,6 +1032,7 @@ export default function DocumentManager() {
// Get new status counts // Get new status counts
const newStatusCounts = { const newStatusCounts = {
processed: docs?.statuses?.processed?.length || 0, processed: docs?.statuses?.processed?.length || 0,
multimodal_processed: docs?.statuses?.multimodal_processed?.length || 0,
processing: docs?.statuses?.processing?.length || 0, processing: docs?.statuses?.processing?.length || 0,
pending: docs?.statuses?.pending?.length || 0, pending: docs?.statuses?.pending?.length || 0,
failed: docs?.statuses?.failed?.length || 0 failed: docs?.statuses?.failed?.length || 0
@ -1224,11 +1262,23 @@ export default function DocumentManager() {
onClick={() => handleStatusFilterChange('processed')} onClick={() => handleStatusFilterChange('processed')}
disabled={isRefreshing} disabled={isRefreshing}
className={cn( className={cn(
(statusCounts.PROCESSED || statusCounts.processed || documentCounts.processed) > 0 ? 'text-green-600' : 'text-gray-500', processedCount > 0 ? 'text-green-600' : 'text-gray-500',
statusFilter === 'processed' && 'bg-green-100 dark:bg-green-900/30 font-medium border border-green-400 dark:border-green-600 shadow-sm' statusFilter === 'processed' && 'bg-green-100 dark:bg-green-900/30 font-medium border border-green-400 dark:border-green-600 shadow-sm'
)} )}
> >
{t('documentPanel.documentManager.status.completed')} ({statusCounts.PROCESSED || statusCounts.processed || 0}) {t('documentPanel.documentManager.status.completed')} ({processedCount})
</Button>
<Button
size="sm"
variant={statusFilter === 'multimodal_processed' ? 'secondary' : 'outline'}
onClick={() => handleStatusFilterChange('multimodal_processed')}
disabled={isRefreshing}
className={cn(
preprocessedCount > 0 ? 'text-purple-600' : 'text-gray-500',
statusFilter === 'multimodal_processed' && 'bg-purple-100 dark:bg-purple-900/30 font-medium border border-purple-400 dark:border-purple-600 shadow-sm'
)}
>
{t('documentPanel.documentManager.status.preprocessed')} ({preprocessedCount})
</Button> </Button>
<Button <Button
size="sm" size="sm"
@ -1236,11 +1286,11 @@ export default function DocumentManager() {
onClick={() => handleStatusFilterChange('processing')} onClick={() => handleStatusFilterChange('processing')}
disabled={isRefreshing} disabled={isRefreshing}
className={cn( className={cn(
(statusCounts.PROCESSING || statusCounts.processing || documentCounts.processing) > 0 ? 'text-blue-600' : 'text-gray-500', processingCount > 0 ? 'text-blue-600' : 'text-gray-500',
statusFilter === 'processing' && 'bg-blue-100 dark:bg-blue-900/30 font-medium border border-blue-400 dark:border-blue-600 shadow-sm' statusFilter === 'processing' && 'bg-blue-100 dark:bg-blue-900/30 font-medium border border-blue-400 dark:border-blue-600 shadow-sm'
)} )}
> >
{t('documentPanel.documentManager.status.processing')} ({statusCounts.PROCESSING || statusCounts.processing || 0}) {t('documentPanel.documentManager.status.processing')} ({processingCount})
</Button> </Button>
<Button <Button
size="sm" size="sm"
@ -1248,11 +1298,11 @@ export default function DocumentManager() {
onClick={() => handleStatusFilterChange('pending')} onClick={() => handleStatusFilterChange('pending')}
disabled={isRefreshing} disabled={isRefreshing}
className={cn( className={cn(
(statusCounts.PENDING || statusCounts.pending || documentCounts.pending) > 0 ? 'text-yellow-600' : 'text-gray-500', pendingCount > 0 ? 'text-yellow-600' : 'text-gray-500',
statusFilter === 'pending' && 'bg-yellow-100 dark:bg-yellow-900/30 font-medium border border-yellow-400 dark:border-yellow-600 shadow-sm' statusFilter === 'pending' && 'bg-yellow-100 dark:bg-yellow-900/30 font-medium border border-yellow-400 dark:border-yellow-600 shadow-sm'
)} )}
> >
{t('documentPanel.documentManager.status.pending')} ({statusCounts.PENDING || statusCounts.pending || 0}) {t('documentPanel.documentManager.status.pending')} ({pendingCount})
</Button> </Button>
<Button <Button
size="sm" size="sm"
@ -1260,11 +1310,11 @@ export default function DocumentManager() {
onClick={() => handleStatusFilterChange('failed')} onClick={() => handleStatusFilterChange('failed')}
disabled={isRefreshing} disabled={isRefreshing}
className={cn( className={cn(
(statusCounts.FAILED || statusCounts.failed || documentCounts.failed) > 0 ? 'text-red-600' : 'text-gray-500', failedCount > 0 ? 'text-red-600' : 'text-gray-500',
statusFilter === 'failed' && 'bg-red-100 dark:bg-red-900/30 font-medium border border-red-400 dark:border-red-600 shadow-sm' statusFilter === 'failed' && 'bg-red-100 dark:bg-red-900/30 font-medium border border-red-400 dark:border-red-600 shadow-sm'
)} )}
> >
{t('documentPanel.documentManager.status.failed')} ({statusCounts.FAILED || statusCounts.failed || 0}) {t('documentPanel.documentManager.status.failed')} ({failedCount})
</Button> </Button>
</div> </div>
<Button <Button
@ -1410,6 +1460,9 @@ export default function DocumentManager() {
{doc.status === 'processed' && ( {doc.status === 'processed' && (
<span className="text-green-600">{t('documentPanel.documentManager.status.completed')}</span> <span className="text-green-600">{t('documentPanel.documentManager.status.completed')}</span>
)} )}
{doc.status === 'multimodal_processed' && (
<span className="text-purple-600">{t('documentPanel.documentManager.status.preprocessed')}</span>
)}
{doc.status === 'processing' && ( {doc.status === 'processing' && (
<span className="text-blue-600">{t('documentPanel.documentManager.status.processing')}</span> <span className="text-blue-600">{t('documentPanel.documentManager.status.processing')}</span>
)} )}

View file

@ -139,6 +139,7 @@
"status": { "status": {
"all": "الكل", "all": "الكل",
"completed": "مكتمل", "completed": "مكتمل",
"preprocessed": "مُعالج مسبقًا",
"processing": "قيد المعالجة", "processing": "قيد المعالجة",
"pending": "معلق", "pending": "معلق",
"failed": "فشل" "failed": "فشل"

View file

@ -139,6 +139,7 @@
"status": { "status": {
"all": "All", "all": "All",
"completed": "Completed", "completed": "Completed",
"preprocessed": "Preprocessed",
"processing": "Processing", "processing": "Processing",
"pending": "Pending", "pending": "Pending",
"failed": "Failed" "failed": "Failed"

View file

@ -139,6 +139,7 @@
"status": { "status": {
"all": "Tous", "all": "Tous",
"completed": "Terminé", "completed": "Terminé",
"preprocessed": "Prétraité",
"processing": "En traitement", "processing": "En traitement",
"pending": "En attente", "pending": "En attente",
"failed": "Échoué" "failed": "Échoué"

View file

@ -139,6 +139,7 @@
"status": { "status": {
"all": "全部", "all": "全部",
"completed": "已完成", "completed": "已完成",
"preprocessed": "预处理",
"processing": "处理中", "processing": "处理中",
"pending": "等待中", "pending": "等待中",
"failed": "失败" "failed": "失败"

View file

@ -139,6 +139,7 @@
"status": { "status": {
"all": "全部", "all": "全部",
"completed": "已完成", "completed": "已完成",
"preprocessed": "預處理",
"processing": "處理中", "processing": "處理中",
"pending": "等待中", "pending": "等待中",
"failed": "失敗" "failed": "失敗"