updated the default file loading task
support auth and non auth modes such that owner is None so these files will be considered as default files
This commit is contained in:
parent
e4e3535fe2
commit
08b2b6371c
2 changed files with 40 additions and 14 deletions
BIN
.DS_Store
vendored
Normal file
BIN
.DS_Store
vendored
Normal file
Binary file not shown.
54
src/main.py
54
src/main.py
|
|
@ -204,7 +204,7 @@ async def ingest_default_documents_when_ready(services):
|
|||
|
||||
base_dir = os.path.abspath(os.path.join(os.getcwd(), "documents"))
|
||||
if not os.path.isdir(base_dir):
|
||||
print(f"[INGEST] Documents directory not found at {base_dir}; skipping")
|
||||
logger.info("Default documents directory not found; skipping ingestion", base_dir=base_dir)
|
||||
return
|
||||
|
||||
# Collect files recursively
|
||||
|
|
@ -215,27 +215,53 @@ async def ingest_default_documents_when_ready(services):
|
|||
]
|
||||
|
||||
if not file_paths:
|
||||
print(f"[INGEST] No files found in {base_dir}; nothing to ingest")
|
||||
logger.info("No default documents found; nothing to ingest", base_dir=base_dir)
|
||||
return
|
||||
|
||||
# Use anonymous context to mirror non-auth upload
|
||||
user_id = "anonymous"
|
||||
jwt_token = None
|
||||
owner_name = "Anonymous User"
|
||||
owner_email = "anonymous@localhost"
|
||||
# To make default docs visible to all users, omit the 'owner' field when indexing.
|
||||
# We do this by using a custom processor with owner_user_id=None (DLS rule: documents
|
||||
# without an 'owner' are readable by everyone), while still authenticating to OpenSearch.
|
||||
if is_no_auth_mode():
|
||||
user_id_for_tasks = "anonymous"
|
||||
jwt_token = None # SessionManager will inject anonymous JWT for OS client
|
||||
owner_name = "Anonymous User"
|
||||
owner_email = "anonymous@localhost"
|
||||
else:
|
||||
from session_manager import User
|
||||
|
||||
task_id = await services["task_service"].create_upload_task(
|
||||
user_id,
|
||||
file_paths,
|
||||
jwt_token=jwt_token,
|
||||
system_user = User(
|
||||
user_id="system_ingest",
|
||||
email="system@localhost",
|
||||
name="System Ingest",
|
||||
picture=None,
|
||||
provider="system",
|
||||
)
|
||||
jwt_token = services["session_manager"].create_jwt_token(system_user)
|
||||
user_id_for_tasks = system_user.user_id # For task store only
|
||||
owner_name = system_user.name
|
||||
owner_email = system_user.email
|
||||
|
||||
# Build a processor that DOES NOT set 'owner' on documents (owner_user_id=None)
|
||||
from models.processors import DocumentFileProcessor
|
||||
|
||||
processor = DocumentFileProcessor(
|
||||
services["document_service"],
|
||||
owner_user_id=None, # omit 'owner' field -> globally readable per DLS
|
||||
jwt_token=jwt_token, # still authenticate to OpenSearch
|
||||
owner_name=owner_name,
|
||||
owner_email=owner_email,
|
||||
)
|
||||
print(
|
||||
f"[INGEST] Started default documents ingestion task {task_id} for {len(file_paths)} file(s)"
|
||||
|
||||
task_id = await services["task_service"].create_custom_task(
|
||||
user_id_for_tasks, file_paths, processor
|
||||
)
|
||||
logger.info(
|
||||
"Started default documents ingestion task",
|
||||
task_id=task_id,
|
||||
file_count=len(file_paths),
|
||||
)
|
||||
except Exception as e:
|
||||
print(f"[INGEST] Default documents ingestion failed: {e}")
|
||||
logger.error("Default documents ingestion failed", error=str(e))
|
||||
|
||||
|
||||
async def initialize_services():
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue