diff --git a/.DS_Store b/.DS_Store new file mode 100644 index 00000000..e98b18b1 Binary files /dev/null and b/.DS_Store differ diff --git a/documents/ai-human-resources.pdf b/documents/ai-human-resources.pdf new file mode 100644 index 00000000..5e36eab4 Binary files /dev/null and b/documents/ai-human-resources.pdf differ diff --git a/src/main.py b/src/main.py index 90a34bb8..480d233a 100644 --- a/src/main.py +++ b/src/main.py @@ -32,6 +32,7 @@ import torch # Configuration and setup from config.settings import clients, INDEX_NAME, INDEX_BODY, SESSION_SECRET +from config.settings import is_no_auth_mode from utils.gpu_detection import detect_gpu_devices # Services @@ -215,6 +216,54 @@ async def init_index_when_ready(): ) +async def ingest_default_documents_when_ready(services): + """Scan the local documents folder and ingest files like a non-auth upload.""" + try: + logger.info("Ingesting default documents when ready") + base_dir = os.path.abspath(os.path.join(os.getcwd(), "documents")) + if not os.path.isdir(base_dir): + logger.info("Default documents directory not found; skipping ingestion", base_dir=base_dir) + return + + # Collect files recursively + file_paths = [ + os.path.join(root, fn) + for root, _, files in os.walk(base_dir) + for fn in files + ] + + if not file_paths: + logger.info("No default documents found; nothing to ingest", base_dir=base_dir) + return + + # Build a processor that DOES NOT set 'owner' on documents (owner_user_id=None) + from models.processors import DocumentFileProcessor + + processor = DocumentFileProcessor( + services["document_service"], + owner_user_id=None, + jwt_token=None, + owner_name=None, + owner_email=None, + ) + + task_id = await services["task_service"].create_custom_task( + "anonymous", file_paths, processor + ) + logger.info( + "Started default documents ingestion task", + task_id=task_id, + file_count=len(file_paths), + ) + except Exception as e: + logger.error("Default documents ingestion failed", error=str(e)) + +async def startup_tasks(services): + """Startup tasks""" + logger.info("Starting startup tasks") + await init_index() + await ingest_default_documents_when_ready(services) + async def initialize_services(): """Initialize all services and their dependencies""" # Generate JWT keys if they don't exist @@ -655,12 +704,15 @@ async def create_app(): app = Starlette(debug=True, routes=routes) app.state.services = services # Store services for cleanup + app.state.background_tasks = set() # Add startup event handler @app.on_event("startup") async def startup_event(): # Start index initialization in background to avoid blocking OIDC endpoints - asyncio.create_task(init_index_when_ready()) + t1 = asyncio.create_task(startup_tasks(services)) + app.state.background_tasks.add(t1) + t1.add_done_callback(app.state.background_tasks.discard) # Add shutdown event handler @app.on_event("shutdown") diff --git a/src/session_manager.py b/src/session_manager.py index 494d0061..1008d1fd 100644 --- a/src/session_manager.py +++ b/src/session_manager.py @@ -10,7 +10,9 @@ from utils.logging_config import get_logger logger = get_logger(__name__) +from utils.logging_config import get_logger +logger = get_logger(__name__) @dataclass class User: """User information from OAuth provider""" @@ -199,7 +201,7 @@ class SessionManager: ) # In no-auth mode, create anonymous JWT for OpenSearch DLS - if is_no_auth_mode() and jwt_token is None: + if jwt_token is None and (is_no_auth_mode() or user_id in (None, AnonymousUser().user_id)): if not hasattr(self, "_anonymous_jwt"): # Create anonymous JWT token for OpenSearch OIDC logger.debug("Creating anonymous JWT")