Merge branch 'ingest-settings' of github.com:phact/gendb into ingest-settings

This commit is contained in:
phact 2025-09-09 14:12:11 -04:00
commit f85dce6b3f
2 changed files with 35 additions and 49 deletions

View file

@ -1,8 +1,7 @@
# Configure structured logging early # Configure structured logging early
from services.flows_service import FlowsService
from connectors.langflow_connector_service import LangflowConnectorService from connectors.langflow_connector_service import LangflowConnectorService
from connectors.service import ConnectorService from connectors.service import ConnectorService
from services.flows_service import FlowsService
from utils.logging_config import configure_from_env, get_logger from utils.logging_config import configure_from_env, get_logger
configure_from_env() configure_from_env()
@ -23,24 +22,28 @@ from starlette.routing import Route
multiprocessing.set_start_method("spawn", force=True) multiprocessing.set_start_method("spawn", force=True)
# Create process pool FIRST, before any torch/CUDA imports # Create process pool FIRST, before any torch/CUDA imports
from utils.process_pool import process_pool from utils.process_pool import process_pool # isort: skip
import torch import torch
# API endpoints # API endpoints
from api import ( from api import (
router,
auth, auth,
chat, chat,
connectors, connectors,
flows,
knowledge_filter, knowledge_filter,
langflow_files, langflow_files,
nudges,
oidc, oidc,
router,
search, search,
settings, settings,
tasks, tasks,
upload, upload,
) )
# Existing services
from api.connector_router import ConnectorRouter
from auth_middleware import optional_auth, require_auth from auth_middleware import optional_auth, require_auth
# Configuration and setup # Configuration and setup
@ -53,9 +56,6 @@ from config.settings import (
clients, clients,
is_no_auth_mode, is_no_auth_mode,
) )
# Existing services
from api.connector_router import ConnectorRouter
from services.auth_service import AuthService from services.auth_service import AuthService
from services.chat_service import ChatService from services.chat_service import ChatService
@ -70,24 +70,6 @@ from services.monitor_service import MonitorService
from services.search_service import SearchService from services.search_service import SearchService
from services.task_service import TaskService from services.task_service import TaskService
from session_manager import SessionManager from session_manager import SessionManager
from utils.process_pool import process_pool
# API endpoints
from api import (
flows,
router,
nudges,
upload,
search,
chat,
auth,
connectors,
tasks,
oidc,
knowledge_filter,
settings,
)
logger.info( logger.info(
"CUDA device information", "CUDA device information",
@ -246,7 +228,10 @@ async def init_index_when_ready():
async def ingest_default_documents_when_ready(services): async def ingest_default_documents_when_ready(services):
"""Scan the local documents folder and ingest files like a non-auth upload.""" """Scan the local documents folder and ingest files like a non-auth upload."""
try: try:
logger.info("Ingesting default documents when ready", disable_langflow_ingest=DISABLE_INGEST_WITH_LANGFLOW) logger.info(
"Ingesting default documents when ready",
disable_langflow_ingest=DISABLE_INGEST_WITH_LANGFLOW,
)
base_dir = os.path.abspath(os.path.join(os.getcwd(), "documents")) base_dir = os.path.abspath(os.path.join(os.getcwd(), "documents"))
if not os.path.isdir(base_dir): if not os.path.isdir(base_dir):
logger.info( logger.info(
@ -295,7 +280,7 @@ async def _ingest_default_documents_langflow(services, file_paths):
logger.debug("Processing file with Langflow pipeline", file_path=file_path) logger.debug("Processing file with Langflow pipeline", file_path=file_path)
# Read file content # Read file content
with open(file_path, 'rb') as f: with open(file_path, "rb") as f:
content = f.read() content = f.read()
# Create file tuple for upload # Create file tuple for upload
@ -303,12 +288,13 @@ async def _ingest_default_documents_langflow(services, file_paths):
# Determine content type based on file extension # Determine content type based on file extension
content_type, _ = mimetypes.guess_type(filename) content_type, _ = mimetypes.guess_type(filename)
if not content_type: if not content_type:
content_type = 'application/octet-stream' content_type = "application/octet-stream"
file_tuple = (filename, content, content_type) file_tuple = (filename, content, content_type)
# Use AnonymousUser details for default documents # Use AnonymousUser details for default documents
from session_manager import AnonymousUser from session_manager import AnonymousUser
anonymous_user = AnonymousUser() anonymous_user = AnonymousUser()
# Get JWT token using same logic as DocumentFileProcessor # Get JWT token using same logic as DocumentFileProcessor
@ -322,7 +308,7 @@ async def _ingest_default_documents_langflow(services, file_paths):
anonymous_user.user_id, effective_jwt anonymous_user.user_id, effective_jwt
) )
# Get the JWT that was created by session manager # Get the JWT that was created by session manager
if hasattr(session_manager, '_anonymous_jwt'): if hasattr(session_manager, "_anonymous_jwt"):
effective_jwt = session_manager._anonymous_jwt effective_jwt = session_manager._anonymous_jwt
# Prepare tweaks for default documents with anonymous user metadata # Prepare tweaks for default documents with anonymous user metadata
@ -332,7 +318,7 @@ async def _ingest_default_documents_langflow(services, file_paths):
{"key": "owner", "value": None}, {"key": "owner", "value": None},
{"key": "owner_name", "value": anonymous_user.name}, {"key": "owner_name", "value": anonymous_user.name},
{"key": "owner_email", "value": anonymous_user.email}, {"key": "owner_email", "value": anonymous_user.email},
{"key": "connector_type", "value": "system_default"} {"key": "connector_type", "value": "system_default"},
] ]
} }
} }
@ -447,7 +433,7 @@ async def initialize_services():
# Create connector router that chooses based on configuration # Create connector router that chooses based on configuration
connector_service = ConnectorRouter( connector_service = ConnectorRouter(
langflow_connector_service=langflow_connector_service, langflow_connector_service=langflow_connector_service,
openrag_connector_service=openrag_connector_service openrag_connector_service=openrag_connector_service,
) )
# Initialize auth service # Initialize auth service

2
uv.lock generated
View file

@ -1405,7 +1405,7 @@ wheels = [
[[package]] [[package]]
name = "openrag" name = "openrag"
version = "0.1.1" version = "0.1.2"
source = { editable = "." } source = { editable = "." }
dependencies = [ dependencies = [
{ name = "agentd" }, { name = "agentd" },