From 08b2b6371c59d2ab544b6489d504315e508ce42a Mon Sep 17 00:00:00 2001 From: Edwin Jose Date: Thu, 4 Sep 2025 15:02:25 -0400 Subject: [PATCH] updated the default file loading task support auth and non auth modes such that owner is None so these files will be considered as default files --- .DS_Store | Bin 0 -> 8196 bytes src/main.py | 54 ++++++++++++++++++++++++++++++++++++++-------------- 2 files changed, 40 insertions(+), 14 deletions(-) create mode 100644 .DS_Store diff --git a/.DS_Store b/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..e98b18b1f3987fb5a012ac82167e178fa2778dad GIT binary patch literal 8196 zcmeHMU2GIp6h7Zl=uF4L6k4`GSXPPwwLn{HDZg%iQ28ygEiEljb~^(dna-4**)GtK z^g%IV;)6yL{E3M~qdxFpqGEVZqb9zfi4qfD{E06{eDEjuWYlx#&Jwzm7bS{uZZh}G zJ@jk_tDm>1V|Ef#8Kumt0YM#v#QS1 z6oDuLQ3Rq0L=lK0a4STB&ur0@8=U)cHtM4YL=pI3MnHTYl6(o73gv{9!KZ_}pafu9 z#_FK)J=XyNCK<|9C?}*0puDE69uPtimKcz5o=*yMmZ?xqNGah22`7Y4Mp&UBcsk>f zfjJ>1Wzx?O{Z?GeX4C7nU|L7>VK z=N_NhGus>MkeW{!KL^tBt%H`CQ`M?6wcE*zm;ovtpEqAo^t!%$);f~6{9|&hcw&*N z=x^>dcnjNILm-XNK2fJA147@CGr5;)ExvRa12WwnI}~oGez~gZUiV>ss}ia;HYuuZ z^w^oKX@{zo)vVguJ0@ciD%Y)7)GE~X4Y?yDyl+pcBsxUv$thDziZTVuI?(e zNm2T};Y`l#Dln~zh|P);yeifU$R@+`-Qr|oyQ1t7i?taBuTwQkpV<|EFwXD5a>gts zF~20qWkvr?QL0Orgdm72pHK^E3ANBh+C_(Gl*Z`^I!!Op8G3`x(+6~cKBq6~EBczg zrSIq``k8*AEA%_cp+JR(THKDssKZLMVhz?}J=(AZTd@s0kj7r@!+spVLAV&j5qKEK z!*~QI@fe=Ob9f#v;67)kO=0v@wLy?14ZLOx4abn5g2JK!=Cy~=~+@9!6 z=ps%23Sr-(cj+T0>?J1bW%`k>pbV9mgN01byRjTA(1<3i!v<_bDxm2$Y)2P%Vh;wG zs{1jBVLXJx7=eQVj^TJf*OPdZ>3RxJ;b}aB(|8tV@G@S(t9T8s<2>Fgq2-pzw8Z;U z({e1Gb8Oc+#Iu6X)9E1Dd}I~G56%DXLA9!BO?~p7)|S6{P+j9uiLa9bbW=yuSw2RA5q}oRE?=-2U+&0!$-1|0z2EZ&IQ9pFRKo1j7NIegFUf literal 0 HcmV?d00001 diff --git a/src/main.py b/src/main.py index 08865ac9..b1190a08 100644 --- a/src/main.py +++ b/src/main.py @@ -204,7 +204,7 @@ async def ingest_default_documents_when_ready(services): base_dir = os.path.abspath(os.path.join(os.getcwd(), "documents")) if not os.path.isdir(base_dir): - print(f"[INGEST] Documents directory not found at {base_dir}; skipping") + logger.info("Default documents directory not found; skipping ingestion", base_dir=base_dir) return # Collect files recursively @@ -215,27 +215,53 @@ async def ingest_default_documents_when_ready(services): ] if not file_paths: - print(f"[INGEST] No files found in {base_dir}; nothing to ingest") + logger.info("No default documents found; nothing to ingest", base_dir=base_dir) return - # Use anonymous context to mirror non-auth upload - user_id = "anonymous" - jwt_token = None - owner_name = "Anonymous User" - owner_email = "anonymous@localhost" + # To make default docs visible to all users, omit the 'owner' field when indexing. + # We do this by using a custom processor with owner_user_id=None (DLS rule: documents + # without an 'owner' are readable by everyone), while still authenticating to OpenSearch. + if is_no_auth_mode(): + user_id_for_tasks = "anonymous" + jwt_token = None # SessionManager will inject anonymous JWT for OS client + owner_name = "Anonymous User" + owner_email = "anonymous@localhost" + else: + from session_manager import User - task_id = await services["task_service"].create_upload_task( - user_id, - file_paths, - jwt_token=jwt_token, + system_user = User( + user_id="system_ingest", + email="system@localhost", + name="System Ingest", + picture=None, + provider="system", + ) + jwt_token = services["session_manager"].create_jwt_token(system_user) + user_id_for_tasks = system_user.user_id # For task store only + owner_name = system_user.name + owner_email = system_user.email + + # Build a processor that DOES NOT set 'owner' on documents (owner_user_id=None) + from models.processors import DocumentFileProcessor + + processor = DocumentFileProcessor( + services["document_service"], + owner_user_id=None, # omit 'owner' field -> globally readable per DLS + jwt_token=jwt_token, # still authenticate to OpenSearch owner_name=owner_name, owner_email=owner_email, ) - print( - f"[INGEST] Started default documents ingestion task {task_id} for {len(file_paths)} file(s)" + + task_id = await services["task_service"].create_custom_task( + user_id_for_tasks, file_paths, processor + ) + logger.info( + "Started default documents ingestion task", + task_id=task_id, + file_count=len(file_paths), ) except Exception as e: - print(f"[INGEST] Default documents ingestion failed: {e}") + logger.error("Default documents ingestion failed", error=str(e)) async def initialize_services():