from abc import ABC, abstractmethod from typing import Any, Dict from .tasks import UploadTask, FileTask class TaskProcessor(ABC): """Abstract base class for task processors""" @abstractmethod async def process_item(self, upload_task: UploadTask, item: Any, file_task: FileTask) -> None: """ Process a single item in the task. Args: upload_task: The overall upload task item: The item to process (could be file path, file info, etc.) file_task: The specific file task to update """ pass class DocumentFileProcessor(TaskProcessor): """Default processor for regular file uploads""" def __init__(self, document_service): self.document_service = document_service async def process_item(self, upload_task: UploadTask, item: str, file_task: FileTask) -> None: """Process a regular file path using DocumentService""" # This calls the existing logic await self.document_service.process_single_file_task(upload_task, item) class ConnectorFileProcessor(TaskProcessor): """Processor for connector file uploads""" def __init__(self, connector_service, connection_id: str, files_to_process: list): self.connector_service = connector_service self.connection_id = connection_id self.files_to_process = files_to_process # Create lookup map for file info - handle both file objects and file IDs self.file_info_map = {} for f in files_to_process: if isinstance(f, dict): # Full file info objects self.file_info_map[f['id']] = f else: # Just file IDs - will need to fetch metadata during processing self.file_info_map[f] = None async def process_item(self, upload_task: UploadTask, item: str, file_task: FileTask) -> None: """Process a connector file using ConnectorService""" from models.tasks import TaskStatus import time file_id = item # item is the connector file ID file_info = self.file_info_map.get(file_id) # Get the connector connector = await self.connector_service.get_connector(self.connection_id) if not connector: raise ValueError(f"Connection '{self.connection_id}' not found") # Get file content from connector (the connector will fetch metadata if needed) document = await connector.get_file_content(file_id) # Get user_id from task store lookup user_id = None for uid, tasks in self.connector_service.task_service.task_store.items(): if upload_task.task_id in tasks: user_id = uid break if not user_id: raise ValueError("Could not determine user_id for task") # Process using existing pipeline result = await self.connector_service.process_connector_document(document, user_id) file_task.status = TaskStatus.COMPLETED file_task.result = result upload_task.successful_files += 1