Add PREPROCESSED (multimodal_processed) status for multimodal document processing

• Add DocStatus.PREPROCESSED enum value
• Update API routes and response models
• Add preprocessed filter in web UI
• Update localization files
• Handle preprocessed status in deletion
This commit is contained in:
yangdx 2025-10-14 14:02:05 +08:00
parent 64900b5457
commit 130b4959dc
10 changed files with 113 additions and 20 deletions

View file

@ -406,7 +406,7 @@ class DocStatusResponse(BaseModel):
"id": "doc_123456",
"content_summary": "Research paper on machine learning",
"content_length": 15240,
"status": "PROCESSED",
"status": "processed",
"created_at": "2025-03-31T12:34:56",
"updated_at": "2025-03-31T12:35:30",
"track_id": "upload_20250729_170612_abc123",
@ -439,7 +439,7 @@ class DocsStatusesResponse(BaseModel):
"id": "doc_123",
"content_summary": "Pending document",
"content_length": 5000,
"status": "PENDING",
"status": "pending",
"created_at": "2025-03-31T10:00:00",
"updated_at": "2025-03-31T10:00:00",
"track_id": "upload_20250331_100000_abc123",
@ -449,12 +449,27 @@ class DocsStatusesResponse(BaseModel):
"file_path": "pending_doc.pdf",
}
],
"PREPROCESSED": [
{
"id": "doc_789",
"content_summary": "Document pending final indexing",
"content_length": 7200,
"status": "multimodal_processed",
"created_at": "2025-03-31T09:30:00",
"updated_at": "2025-03-31T09:35:00",
"track_id": "upload_20250331_093000_xyz789",
"chunks_count": 10,
"error": None,
"metadata": None,
"file_path": "preprocessed_doc.pdf",
}
],
"PROCESSED": [
{
"id": "doc_456",
"content_summary": "Processed document",
"content_length": 8000,
"status": "PROCESSED",
"status": "processed",
"created_at": "2025-03-31T09:00:00",
"updated_at": "2025-03-31T09:05:00",
"track_id": "insert_20250331_090000_def456",
@ -626,6 +641,7 @@ class PaginatedDocsResponse(BaseModel):
"status_counts": {
"PENDING": 10,
"PROCESSING": 5,
"PREPROCESSED": 5,
"PROCESSED": 130,
"FAILED": 5,
},
@ -648,6 +664,7 @@ class StatusCountsResponse(BaseModel):
"status_counts": {
"PENDING": 10,
"PROCESSING": 5,
"PREPROCESSED": 5,
"PROCESSED": 130,
"FAILED": 5,
}
@ -2210,7 +2227,7 @@ def create_document_routes(
To prevent excessive resource consumption, a maximum of 1,000 records is returned.
This endpoint retrieves the current status of all documents, grouped by their
processing status (PENDING, PROCESSING, PROCESSED, FAILED). The results are
processing status (PENDING, PROCESSING, PREPROCESSED, PROCESSED, FAILED). The results are
limited to 1000 total documents with fair distribution across all statuses.
Returns:
@ -2226,6 +2243,7 @@ def create_document_routes(
statuses = (
DocStatus.PENDING,
DocStatus.PROCESSING,
DocStatus.PREPROCESSED,
DocStatus.PROCESSED,
DocStatus.FAILED,
)

View file

@ -712,6 +712,7 @@ class DocStatus(str, Enum):
PENDING = "pending"
PROCESSING = "processing"
PREPROCESSED = "multimodal_processed"
PROCESSED = "processed"
FAILED = "failed"

View file

@ -2617,7 +2617,12 @@ class LightRAG:
)
# Check document status and log warning for non-completed documents
doc_status = doc_status_data.get("status")
raw_status = doc_status_data.get("status")
try:
doc_status = DocStatus(raw_status)
except ValueError:
doc_status = raw_status
if doc_status != DocStatus.PROCESSED:
if doc_status == DocStatus.PENDING:
warning_msg = (
@ -2627,12 +2632,23 @@ class LightRAG:
warning_msg = (
f"Deleting {doc_id} {file_path}(previous status: PROCESSING)"
)
elif doc_status == DocStatus.PREPROCESSED:
warning_msg = (
f"Deleting {doc_id} {file_path}(previous status: PREPROCESSED)"
)
elif doc_status == DocStatus.FAILED:
warning_msg = (
f"Deleting {doc_id} {file_path}(previous status: FAILED)"
)
else:
warning_msg = f"Deleting {doc_id} {file_path}(previous status: {doc_status.value})"
status_text = (
doc_status.value
if isinstance(doc_status, DocStatus)
else str(doc_status)
)
warning_msg = (
f"Deleting {doc_id} {file_path}(previous status: {status_text})"
)
logger.info(warning_msg)
# Update pipeline status for monitoring
async with pipeline_status_lock:

View file

@ -167,7 +167,7 @@ export type DeleteDocResponse = {
doc_id: string
}
export type DocStatus = 'pending' | 'processing' | 'processed' | 'failed'
export type DocStatus = 'pending' | 'processing' | 'multimodal_processed' | 'processed' | 'failed'
export type DocStatusResponse = {
id: string

View file

@ -38,6 +38,21 @@ import PipelineStatusDialog from '@/components/documents/PipelineStatusDialog'
type StatusFilter = DocStatus | 'all';
// Utility functions defined outside component for better performance and to avoid dependency issues
const getCountValue = (counts: Record<string, number>, ...keys: string[]): number => {
for (const key of keys) {
const value = counts[key]
if (typeof value === 'number') {
return value
}
}
return 0
}
const hasActiveDocumentsStatus = (counts: Record<string, number>): boolean =>
getCountValue(counts, 'PROCESSING', 'processing') > 0 ||
getCountValue(counts, 'PENDING', 'pending') > 0 ||
getCountValue(counts, 'PREPROCESSED', 'preprocessed', 'multimodal_processed') > 0
const getDisplayFileName = (doc: DocStatusResponse, maxLength: number = 20): string => {
// Check if file_path exists and is a non-empty string
@ -242,6 +257,7 @@ export default function DocumentManager() {
const [pageByStatus, setPageByStatus] = useState<Record<StatusFilter, number>>({
all: 1,
processed: 1,
multimodal_processed: 1,
processing: 1,
pending: 1,
failed: 1,
@ -308,6 +324,7 @@ export default function DocumentManager() {
setPageByStatus({
all: 1,
processed: 1,
'multimodal_processed': 1,
processing: 1,
pending: 1,
failed: 1,
@ -452,9 +469,19 @@ export default function DocumentManager() {
return counts;
}, [docs]);
const processedCount = getCountValue(statusCounts, 'PROCESSED', 'processed') || documentCounts.processed || 0;
const preprocessedCount =
getCountValue(statusCounts, 'PREPROCESSED', 'preprocessed', 'multimodal_processed') ||
documentCounts.multimodal_processed ||
0;
const processingCount = getCountValue(statusCounts, 'PROCESSING', 'processing') || documentCounts.processing || 0;
const pendingCount = getCountValue(statusCounts, 'PENDING', 'pending') || documentCounts.pending || 0;
const failedCount = getCountValue(statusCounts, 'FAILED', 'failed') || documentCounts.failed || 0;
// Store previous status counts
const prevStatusCounts = useRef({
processed: 0,
multimodal_processed: 0,
processing: 0,
pending: 0,
failed: 0
@ -545,6 +572,7 @@ export default function DocumentManager() {
const legacyDocs: DocsStatusesResponse = {
statuses: {
processed: response.documents.filter((doc: DocStatusResponse) => doc.status === 'processed'),
multimodal_processed: response.documents.filter((doc: DocStatusResponse) => doc.status === 'multimodal_processed'),
processing: response.documents.filter((doc: DocStatusResponse) => doc.status === 'processing'),
pending: response.documents.filter((doc: DocStatusResponse) => doc.status === 'pending'),
failed: response.documents.filter((doc: DocStatusResponse) => doc.status === 'failed')
@ -827,7 +855,7 @@ export default function DocumentManager() {
setTimeout(() => {
if (isMountedRef.current && currentTab === 'documents' && health) {
// Restore intelligent polling interval based on document status
const hasActiveDocuments = (statusCounts.processing || 0) > 0 || (statusCounts.pending || 0) > 0;
const hasActiveDocuments = hasActiveDocumentsStatus(statusCounts);
const normalInterval = hasActiveDocuments ? 5000 : 30000;
startPollingInterval(normalInterval);
}
@ -863,7 +891,7 @@ export default function DocumentManager() {
setTimeout(() => {
if (isMountedRef.current && currentTab === 'documents' && health) {
// Restore intelligent polling interval based on document status
const hasActiveDocuments = (statusCounts.processing || 0) > 0 || (statusCounts.pending || 0) > 0;
const hasActiveDocuments = hasActiveDocumentsStatus(statusCounts);
const normalInterval = hasActiveDocuments ? 5000 : 30000;
startPollingInterval(normalInterval);
}
@ -887,6 +915,7 @@ export default function DocumentManager() {
setPageByStatus({
all: 1,
processed: 1,
multimodal_processed: 1,
processing: 1,
pending: 1,
failed: 1,
@ -927,6 +956,7 @@ export default function DocumentManager() {
const legacyDocs: DocsStatusesResponse = {
statuses: {
processed: response.documents.filter(doc => doc.status === 'processed'),
multimodal_processed: response.documents.filter(doc => doc.status === 'multimodal_processed'),
processing: response.documents.filter(doc => doc.status === 'processing'),
pending: response.documents.filter(doc => doc.status === 'pending'),
failed: response.documents.filter(doc => doc.status === 'failed')
@ -961,14 +991,21 @@ export default function DocumentManager() {
handleIntelligentRefresh();
// Reset polling timer after intelligent refresh
const hasActiveDocuments = (statusCounts.processing || 0) > 0 || (statusCounts.pending || 0) > 0;
const hasActiveDocuments = hasActiveDocumentsStatus(statusCounts);
const pollingInterval = hasActiveDocuments ? 5000 : 30000;
startPollingInterval(pollingInterval);
}
}
// Update the previous state
prevPipelineBusyRef.current = pipelineBusy;
}, [pipelineBusy, currentTab, health, handleIntelligentRefresh, statusCounts.processing, statusCounts.pending, startPollingInterval]);
}, [
pipelineBusy,
currentTab,
health,
handleIntelligentRefresh,
statusCounts,
startPollingInterval
]);
// Set up intelligent polling with dynamic interval based on document status
useEffect(() => {
@ -978,7 +1015,7 @@ export default function DocumentManager() {
}
// Determine polling interval based on document status
const hasActiveDocuments = (statusCounts.processing || 0) > 0 || (statusCounts.pending || 0) > 0;
const hasActiveDocuments = hasActiveDocumentsStatus(statusCounts);
const pollingInterval = hasActiveDocuments ? 5000 : 30000; // 5s if active, 30s if idle
startPollingInterval(pollingInterval);
@ -995,6 +1032,7 @@ export default function DocumentManager() {
// Get new status counts
const newStatusCounts = {
processed: docs?.statuses?.processed?.length || 0,
multimodal_processed: docs?.statuses?.multimodal_processed?.length || 0,
processing: docs?.statuses?.processing?.length || 0,
pending: docs?.statuses?.pending?.length || 0,
failed: docs?.statuses?.failed?.length || 0
@ -1224,11 +1262,23 @@ export default function DocumentManager() {
onClick={() => handleStatusFilterChange('processed')}
disabled={isRefreshing}
className={cn(
(statusCounts.PROCESSED || statusCounts.processed || documentCounts.processed) > 0 ? 'text-green-600' : 'text-gray-500',
processedCount > 0 ? 'text-green-600' : 'text-gray-500',
statusFilter === 'processed' && 'bg-green-100 dark:bg-green-900/30 font-medium border border-green-400 dark:border-green-600 shadow-sm'
)}
>
{t('documentPanel.documentManager.status.completed')} ({statusCounts.PROCESSED || statusCounts.processed || 0})
{t('documentPanel.documentManager.status.completed')} ({processedCount})
</Button>
<Button
size="sm"
variant={statusFilter === 'multimodal_processed' ? 'secondary' : 'outline'}
onClick={() => handleStatusFilterChange('multimodal_processed')}
disabled={isRefreshing}
className={cn(
preprocessedCount > 0 ? 'text-purple-600' : 'text-gray-500',
statusFilter === 'multimodal_processed' && 'bg-purple-100 dark:bg-purple-900/30 font-medium border border-purple-400 dark:border-purple-600 shadow-sm'
)}
>
{t('documentPanel.documentManager.status.preprocessed')} ({preprocessedCount})
</Button>
<Button
size="sm"
@ -1236,11 +1286,11 @@ export default function DocumentManager() {
onClick={() => handleStatusFilterChange('processing')}
disabled={isRefreshing}
className={cn(
(statusCounts.PROCESSING || statusCounts.processing || documentCounts.processing) > 0 ? 'text-blue-600' : 'text-gray-500',
processingCount > 0 ? 'text-blue-600' : 'text-gray-500',
statusFilter === 'processing' && 'bg-blue-100 dark:bg-blue-900/30 font-medium border border-blue-400 dark:border-blue-600 shadow-sm'
)}
>
{t('documentPanel.documentManager.status.processing')} ({statusCounts.PROCESSING || statusCounts.processing || 0})
{t('documentPanel.documentManager.status.processing')} ({processingCount})
</Button>
<Button
size="sm"
@ -1248,11 +1298,11 @@ export default function DocumentManager() {
onClick={() => handleStatusFilterChange('pending')}
disabled={isRefreshing}
className={cn(
(statusCounts.PENDING || statusCounts.pending || documentCounts.pending) > 0 ? 'text-yellow-600' : 'text-gray-500',
pendingCount > 0 ? 'text-yellow-600' : 'text-gray-500',
statusFilter === 'pending' && 'bg-yellow-100 dark:bg-yellow-900/30 font-medium border border-yellow-400 dark:border-yellow-600 shadow-sm'
)}
>
{t('documentPanel.documentManager.status.pending')} ({statusCounts.PENDING || statusCounts.pending || 0})
{t('documentPanel.documentManager.status.pending')} ({pendingCount})
</Button>
<Button
size="sm"
@ -1260,11 +1310,11 @@ export default function DocumentManager() {
onClick={() => handleStatusFilterChange('failed')}
disabled={isRefreshing}
className={cn(
(statusCounts.FAILED || statusCounts.failed || documentCounts.failed) > 0 ? 'text-red-600' : 'text-gray-500',
failedCount > 0 ? 'text-red-600' : 'text-gray-500',
statusFilter === 'failed' && 'bg-red-100 dark:bg-red-900/30 font-medium border border-red-400 dark:border-red-600 shadow-sm'
)}
>
{t('documentPanel.documentManager.status.failed')} ({statusCounts.FAILED || statusCounts.failed || 0})
{t('documentPanel.documentManager.status.failed')} ({failedCount})
</Button>
</div>
<Button
@ -1410,6 +1460,9 @@ export default function DocumentManager() {
{doc.status === 'processed' && (
<span className="text-green-600">{t('documentPanel.documentManager.status.completed')}</span>
)}
{doc.status === 'multimodal_processed' && (
<span className="text-purple-600">{t('documentPanel.documentManager.status.preprocessed')}</span>
)}
{doc.status === 'processing' && (
<span className="text-blue-600">{t('documentPanel.documentManager.status.processing')}</span>
)}

View file

@ -139,6 +139,7 @@
"status": {
"all": "الكل",
"completed": "مكتمل",
"preprocessed": "مُعالج مسبقًا",
"processing": "قيد المعالجة",
"pending": "معلق",
"failed": "فشل"

View file

@ -139,6 +139,7 @@
"status": {
"all": "All",
"completed": "Completed",
"preprocessed": "Preprocessed",
"processing": "Processing",
"pending": "Pending",
"failed": "Failed"

View file

@ -139,6 +139,7 @@
"status": {
"all": "Tous",
"completed": "Terminé",
"preprocessed": "Prétraité",
"processing": "En traitement",
"pending": "En attente",
"failed": "Échoué"

View file

@ -139,6 +139,7 @@
"status": {
"all": "全部",
"completed": "已完成",
"preprocessed": "预处理",
"processing": "处理中",
"pending": "等待中",
"failed": "失败"

View file

@ -139,6 +139,7 @@
"status": {
"all": "全部",
"completed": "已完成",
"preprocessed": "預處理",
"processing": "處理中",
"pending": "等待中",
"failed": "失敗"