openrag/src/models/processors.py
estevez.sebastian@gmail.com 93b72a19be webhooks work
2025-07-31 00:42:52 -04:00

82 lines
No EOL
3.1 KiB
Python

from abc import ABC, abstractmethod
from typing import Any, Dict
from .tasks import UploadTask, FileTask
class TaskProcessor(ABC):
"""Abstract base class for task processors"""
@abstractmethod
async def process_item(self, upload_task: UploadTask, item: Any, file_task: FileTask) -> None:
"""
Process a single item in the task.
Args:
upload_task: The overall upload task
item: The item to process (could be file path, file info, etc.)
file_task: The specific file task to update
"""
pass
class DocumentFileProcessor(TaskProcessor):
"""Default processor for regular file uploads"""
def __init__(self, document_service):
self.document_service = document_service
async def process_item(self, upload_task: UploadTask, item: str, file_task: FileTask) -> None:
"""Process a regular file path using DocumentService"""
# This calls the existing logic
await self.document_service.process_single_file_task(upload_task, item)
class ConnectorFileProcessor(TaskProcessor):
"""Processor for connector file uploads"""
def __init__(self, connector_service, connection_id: str, files_to_process: list):
self.connector_service = connector_service
self.connection_id = connection_id
self.files_to_process = files_to_process
# Create lookup map for file info - handle both file objects and file IDs
self.file_info_map = {}
for f in files_to_process:
if isinstance(f, dict):
# Full file info objects
self.file_info_map[f['id']] = f
else:
# Just file IDs - will need to fetch metadata during processing
self.file_info_map[f] = None
async def process_item(self, upload_task: UploadTask, item: str, file_task: FileTask) -> None:
"""Process a connector file using ConnectorService"""
from models.tasks import TaskStatus
import time
file_id = item # item is the connector file ID
file_info = self.file_info_map.get(file_id)
# Get the connector
connector = await self.connector_service.get_connector(self.connection_id)
if not connector:
raise ValueError(f"Connection '{self.connection_id}' not found")
# Get file content from connector (the connector will fetch metadata if needed)
document = await connector.get_file_content(file_id)
# Get user_id from task store lookup
user_id = None
for uid, tasks in self.connector_service.task_service.task_store.items():
if upload_task.task_id in tasks:
user_id = uid
break
if not user_id:
raise ValueError("Could not determine user_id for task")
# Process using existing pipeline
result = await self.connector_service.process_connector_document(document, user_id)
file_task.status = TaskStatus.COMPLETED
file_task.result = result
upload_task.successful_files += 1