diff --git a/.DS_Store b/.DS_Store new file mode 100644 index 00000000..e98b18b1 Binary files /dev/null and b/.DS_Store differ diff --git a/src/main.py b/src/main.py index 08865ac9..b1190a08 100644 --- a/src/main.py +++ b/src/main.py @@ -204,7 +204,7 @@ async def ingest_default_documents_when_ready(services): base_dir = os.path.abspath(os.path.join(os.getcwd(), "documents")) if not os.path.isdir(base_dir): - print(f"[INGEST] Documents directory not found at {base_dir}; skipping") + logger.info("Default documents directory not found; skipping ingestion", base_dir=base_dir) return # Collect files recursively @@ -215,27 +215,53 @@ async def ingest_default_documents_when_ready(services): ] if not file_paths: - print(f"[INGEST] No files found in {base_dir}; nothing to ingest") + logger.info("No default documents found; nothing to ingest", base_dir=base_dir) return - # Use anonymous context to mirror non-auth upload - user_id = "anonymous" - jwt_token = None - owner_name = "Anonymous User" - owner_email = "anonymous@localhost" + # To make default docs visible to all users, omit the 'owner' field when indexing. + # We do this by using a custom processor with owner_user_id=None (DLS rule: documents + # without an 'owner' are readable by everyone), while still authenticating to OpenSearch. + if is_no_auth_mode(): + user_id_for_tasks = "anonymous" + jwt_token = None # SessionManager will inject anonymous JWT for OS client + owner_name = "Anonymous User" + owner_email = "anonymous@localhost" + else: + from session_manager import User - task_id = await services["task_service"].create_upload_task( - user_id, - file_paths, - jwt_token=jwt_token, + system_user = User( + user_id="system_ingest", + email="system@localhost", + name="System Ingest", + picture=None, + provider="system", + ) + jwt_token = services["session_manager"].create_jwt_token(system_user) + user_id_for_tasks = system_user.user_id # For task store only + owner_name = system_user.name + owner_email = system_user.email + + # Build a processor that DOES NOT set 'owner' on documents (owner_user_id=None) + from models.processors import DocumentFileProcessor + + processor = DocumentFileProcessor( + services["document_service"], + owner_user_id=None, # omit 'owner' field -> globally readable per DLS + jwt_token=jwt_token, # still authenticate to OpenSearch owner_name=owner_name, owner_email=owner_email, ) - print( - f"[INGEST] Started default documents ingestion task {task_id} for {len(file_paths)} file(s)" + + task_id = await services["task_service"].create_custom_task( + user_id_for_tasks, file_paths, processor + ) + logger.info( + "Started default documents ingestion task", + task_id=task_id, + file_count=len(file_paths), ) except Exception as e: - print(f"[INGEST] Default documents ingestion failed: {e}") + logger.error("Default documents ingestion failed", error=str(e)) async def initialize_services():