updated the default file loading task

support auth and non auth modes such that owner is None so these files will be considered as default files
This commit is contained in:
Edwin Jose 2025-09-04 15:02:25 -04:00
parent e4e3535fe2
commit 08b2b6371c
2 changed files with 40 additions and 14 deletions

BIN
.DS_Store vendored Normal file

Binary file not shown.

View file

@ -204,7 +204,7 @@ async def ingest_default_documents_when_ready(services):
base_dir = os.path.abspath(os.path.join(os.getcwd(), "documents"))
if not os.path.isdir(base_dir):
print(f"[INGEST] Documents directory not found at {base_dir}; skipping")
logger.info("Default documents directory not found; skipping ingestion", base_dir=base_dir)
return
# Collect files recursively
@ -215,27 +215,53 @@ async def ingest_default_documents_when_ready(services):
]
if not file_paths:
print(f"[INGEST] No files found in {base_dir}; nothing to ingest")
logger.info("No default documents found; nothing to ingest", base_dir=base_dir)
return
# Use anonymous context to mirror non-auth upload
user_id = "anonymous"
jwt_token = None
owner_name = "Anonymous User"
owner_email = "anonymous@localhost"
# To make default docs visible to all users, omit the 'owner' field when indexing.
# We do this by using a custom processor with owner_user_id=None (DLS rule: documents
# without an 'owner' are readable by everyone), while still authenticating to OpenSearch.
if is_no_auth_mode():
user_id_for_tasks = "anonymous"
jwt_token = None # SessionManager will inject anonymous JWT for OS client
owner_name = "Anonymous User"
owner_email = "anonymous@localhost"
else:
from session_manager import User
task_id = await services["task_service"].create_upload_task(
user_id,
file_paths,
jwt_token=jwt_token,
system_user = User(
user_id="system_ingest",
email="system@localhost",
name="System Ingest",
picture=None,
provider="system",
)
jwt_token = services["session_manager"].create_jwt_token(system_user)
user_id_for_tasks = system_user.user_id # For task store only
owner_name = system_user.name
owner_email = system_user.email
# Build a processor that DOES NOT set 'owner' on documents (owner_user_id=None)
from models.processors import DocumentFileProcessor
processor = DocumentFileProcessor(
services["document_service"],
owner_user_id=None, # omit 'owner' field -> globally readable per DLS
jwt_token=jwt_token, # still authenticate to OpenSearch
owner_name=owner_name,
owner_email=owner_email,
)
print(
f"[INGEST] Started default documents ingestion task {task_id} for {len(file_paths)} file(s)"
task_id = await services["task_service"].create_custom_task(
user_id_for_tasks, file_paths, processor
)
logger.info(
"Started default documents ingestion task",
task_id=task_id,
file_count=len(file_paths),
)
except Exception as e:
print(f"[INGEST] Default documents ingestion failed: {e}")
logger.error("Default documents ingestion failed", error=str(e))
async def initialize_services():