update to default files using anonymous users

This commit is contained in:
Edwin Jose 2025-09-04 16:57:43 -04:00
parent 34739193eb
commit 651dc22635
2 changed files with 12 additions and 33 deletions

View file

@ -216,42 +216,19 @@ async def ingest_default_documents_when_ready(services):
logger.info("No default documents found; nothing to ingest", base_dir=base_dir)
return
# To make default docs visible to all users, omit the 'owner' field when indexing.
# We do this by using a custom processor with owner_user_id=None (DLS rule: documents
# without an 'owner' are readable by everyone), while still authenticating to OpenSearch.
if is_no_auth_mode():
user_id_for_tasks = "anonymous"
jwt_token = None # SessionManager will inject anonymous JWT for OS client
owner_name = "Anonymous User"
owner_email = "anonymous@localhost"
else:
from session_manager import User
system_user = User(
user_id="system_ingest",
email="system@localhost",
name="System Ingest",
picture=None,
provider="system",
)
jwt_token = services["session_manager"].create_jwt_token(system_user)
user_id_for_tasks = system_user.user_id # For task store only
owner_name = system_user.name
owner_email = system_user.email
# Build a processor that DOES NOT set 'owner' on documents (owner_user_id=None)
from models.processors import DocumentFileProcessor
processor = DocumentFileProcessor(
services["document_service"],
owner_user_id=None, # omit 'owner' field -> globally readable per DLS
jwt_token=jwt_token, # still authenticate to OpenSearch
owner_name=owner_name,
owner_email=owner_email,
owner_user_id=None,
jwt_token=None,
owner_name=None,
owner_email=None,
)
task_id = await services["task_service"].create_custom_task(
user_id_for_tasks, file_paths, processor
"anonymous", file_paths, processor
)
logger.info(
"Started default documents ingestion task",

View file

@ -7,7 +7,9 @@ from dataclasses import dataclass, asdict
from cryptography.hazmat.primitives import serialization
import os
from utils.logging_config import get_logger
logger = get_logger(__name__)
@dataclass
class User:
"""User information from OAuth provider"""
@ -173,19 +175,19 @@ class SessionManager:
"""Get or create OpenSearch client for user with their JWT"""
from config.settings import is_no_auth_mode
print(
logger.info(
f"[DEBUG] get_user_opensearch_client: user_id={user_id}, jwt_token={'None' if jwt_token is None else 'present'}, no_auth_mode={is_no_auth_mode()}"
)
# In no-auth mode, create anonymous JWT for OpenSearch DLS
if is_no_auth_mode() and jwt_token is None:
if jwt_token is None and (is_no_auth_mode() or user_id in (None, "anonymous","")):
if not hasattr(self, "_anonymous_jwt"):
# Create anonymous JWT token for OpenSearch OIDC
print(f"[DEBUG] Creating anonymous JWT...")
logger.info("[DEBUG] Creating anonymous JWT...")
self._anonymous_jwt = self._create_anonymous_jwt()
print(f"[DEBUG] Anonymous JWT created: {self._anonymous_jwt[:50]}...")
logger.info(f"[DEBUG] Anonymous JWT created: {self._anonymous_jwt[:50]}...")
jwt_token = self._anonymous_jwt
print(f"[DEBUG] Using anonymous JWT for OpenSearch")
logger.info(f"[DEBUG] Using anonymous JWT for OpenSearch")
# Check if we have a cached client for this user
if user_id not in self.user_opensearch_clients: