openrag/src/api/settings.py
Cole Goldsmith b88c8b20df
Feat/provider validation banner (#353)
* models query combined

* make endpoint to handle provider health

* provider health banner

* update-pdf-to-include-provider-selection (#344)

* polishing the error fixing experience

* fix agent instructions and up char limit

* fix provider

* disable tracing in langflow

* improve docling serve banner remove false positives

* Changed pyproject.toml docling versions

* Added another uv lock revision

* version bump

* unused things and fix bad conflicts

* add isFetching to the hook

* put back settings for models queries to never cache results

* update banner refetching indicator

* validate provider settings when saving

* fix settings page layout issue

* Added retry as false on the get models, to not take a long time

---------

Co-authored-by: Mendon Kissling <59585235+mendonk@users.noreply.github.com>
Co-authored-by: Mike Fortman <michael.fortman@datastax.com>
Co-authored-by: phact <estevezsebastian@gmail.com>
Co-authored-by: Lucas Oliveira <lucas.edu.oli@hotmail.com>
2025-11-06 13:03:50 -06:00

944 lines
39 KiB
Python

import json
import platform
import time
from starlette.responses import JSONResponse
from utils.container_utils import transform_localhost_url
from utils.logging_config import get_logger
from config.settings import (
DISABLE_INGEST_WITH_LANGFLOW,
LANGFLOW_URL,
LANGFLOW_CHAT_FLOW_ID,
LANGFLOW_INGEST_FLOW_ID,
LANGFLOW_PUBLIC_URL,
LOCALHOST_URL,
clients,
get_openrag_config,
config_manager,
)
from api.provider_validation import validate_provider_setup
logger = get_logger(__name__)
# Docling preset configurations
def get_docling_preset_configs(table_structure=False, ocr=False, picture_descriptions=False):
"""Get docling preset configurations based on toggle settings
Args:
table_structure: Enable table structure parsing (default: False)
ocr: Enable OCR for text extraction from images (default: False)
picture_descriptions: Enable picture descriptions/captions (default: False)
"""
is_macos = platform.system() == "Darwin"
config = {
"do_ocr": ocr,
"ocr_engine": "ocrmac" if is_macos else "easyocr",
"do_table_structure": table_structure,
"do_picture_classification": picture_descriptions,
"do_picture_description": picture_descriptions,
"picture_description_local": {
"repo_id": "HuggingFaceTB/SmolVLM-256M-Instruct",
"prompt": "Describe this image in a few sentences.",
}
}
return config
async def get_settings(request, session_manager):
"""Get application settings"""
try:
openrag_config = get_openrag_config()
provider_config = openrag_config.provider
knowledge_config = openrag_config.knowledge
agent_config = openrag_config.agent
# Return public settings that are safe to expose to frontend
settings = {
"langflow_url": LANGFLOW_URL,
"flow_id": LANGFLOW_CHAT_FLOW_ID,
"ingest_flow_id": LANGFLOW_INGEST_FLOW_ID,
"langflow_public_url": LANGFLOW_PUBLIC_URL,
"edited": openrag_config.edited,
# OpenRAG configuration
"provider": {
"model_provider": provider_config.model_provider,
"endpoint": provider_config.endpoint if provider_config.endpoint else None,
"project_id": provider_config.project_id if provider_config.project_id else None,
# Note: API key is not exposed for security
},
"knowledge": {
"embedding_model": knowledge_config.embedding_model,
"chunk_size": knowledge_config.chunk_size,
"chunk_overlap": knowledge_config.chunk_overlap,
"table_structure": knowledge_config.table_structure,
"ocr": knowledge_config.ocr,
"picture_descriptions": knowledge_config.picture_descriptions,
},
"agent": {
"llm_model": agent_config.llm_model,
"system_prompt": agent_config.system_prompt,
},
"localhost_url": LOCALHOST_URL,
}
# Only expose edit URLs when a public URL is configured
if LANGFLOW_PUBLIC_URL and LANGFLOW_CHAT_FLOW_ID:
settings["langflow_edit_url"] = (
f"{LANGFLOW_PUBLIC_URL.rstrip('/')}/flow/{LANGFLOW_CHAT_FLOW_ID}"
)
if LANGFLOW_PUBLIC_URL and LANGFLOW_INGEST_FLOW_ID:
settings["langflow_ingest_edit_url"] = (
f"{LANGFLOW_PUBLIC_URL.rstrip('/')}/flow/{LANGFLOW_INGEST_FLOW_ID}"
)
# Fetch ingestion flow configuration to get actual component defaults
if LANGFLOW_INGEST_FLOW_ID and openrag_config.edited:
try:
response = await clients.langflow_request(
"GET", f"/api/v1/flows/{LANGFLOW_INGEST_FLOW_ID}"
)
if response.status_code == 200:
flow_data = response.json()
# Extract component defaults (ingestion-specific settings only)
# Start with configured defaults
ingestion_defaults = {
"chunkSize": knowledge_config.chunk_size,
"chunkOverlap": knowledge_config.chunk_overlap,
"separator": "\\n", # Keep hardcoded for now as it's not in config
"embeddingModel": knowledge_config.embedding_model,
}
if flow_data.get("data", {}).get("nodes"):
for node in flow_data["data"]["nodes"]:
node_template = (
node.get("data", {}).get("node", {}).get("template", {})
)
# Split Text component (SplitText-QIKhg)
if node.get("id") == "SplitText-QIKhg":
if node_template.get("chunk_size", {}).get("value"):
ingestion_defaults["chunkSize"] = node_template[
"chunk_size"
]["value"]
if node_template.get("chunk_overlap", {}).get("value"):
ingestion_defaults["chunkOverlap"] = node_template[
"chunk_overlap"
]["value"]
if node_template.get("separator", {}).get("value"):
ingestion_defaults["separator"] = node_template[
"separator"
]["value"]
# OpenAI Embeddings component (OpenAIEmbeddings-joRJ6)
elif node.get("id") == "OpenAIEmbeddings-joRJ6":
if node_template.get("model", {}).get("value"):
ingestion_defaults["embeddingModel"] = (
node_template["model"]["value"]
)
# Note: OpenSearch component settings are not exposed for ingestion
# (search-related parameters like number_of_results, score_threshold
# are for retrieval, not ingestion)
settings["ingestion_defaults"] = ingestion_defaults
except Exception as e:
logger.warning(f"Failed to fetch ingestion flow defaults: {e}")
# Continue without ingestion defaults
return JSONResponse(settings)
except Exception as e:
return JSONResponse(
{"error": f"Failed to retrieve settings: {str(e)}"}, status_code=500
)
async def update_settings(request, session_manager):
"""Update application settings"""
try:
# Get current configuration
current_config = get_openrag_config()
# Check if config is marked as edited
if not current_config.edited:
return JSONResponse(
{
"error": "Configuration must be marked as edited before updates are allowed"
},
status_code=403,
)
# Parse request body
body = await request.json()
# Validate allowed fields
allowed_fields = {
"llm_model",
"system_prompt",
"chunk_size",
"chunk_overlap",
"table_structure",
"ocr",
"picture_descriptions",
"embedding_model",
"model_provider",
"api_key",
"endpoint",
"project_id",
}
# Check for invalid fields
invalid_fields = set(body.keys()) - allowed_fields
if invalid_fields:
return JSONResponse(
{
"error": f"Invalid fields: {', '.join(invalid_fields)}. Allowed fields: {', '.join(allowed_fields)}"
},
status_code=400,
)
# Validate types early before modifying config
if "embedding_model" in body:
if (
not isinstance(body["embedding_model"], str)
or not body["embedding_model"].strip()
):
return JSONResponse(
{"error": "embedding_model must be a non-empty string"},
status_code=400,
)
if "table_structure" in body:
if not isinstance(body["table_structure"], bool):
return JSONResponse(
{"error": "table_structure must be a boolean"}, status_code=400
)
if "ocr" in body:
if not isinstance(body["ocr"], bool):
return JSONResponse(
{"error": "ocr must be a boolean"}, status_code=400
)
if "picture_descriptions" in body:
if not isinstance(body["picture_descriptions"], bool):
return JSONResponse(
{"error": "picture_descriptions must be a boolean"}, status_code=400
)
if "chunk_size" in body:
if not isinstance(body["chunk_size"], int) or body["chunk_size"] <= 0:
return JSONResponse(
{"error": "chunk_size must be a positive integer"}, status_code=400
)
if "chunk_overlap" in body:
if not isinstance(body["chunk_overlap"], int) or body["chunk_overlap"] < 0:
return JSONResponse(
{"error": "chunk_overlap must be a non-negative integer"},
status_code=400,
)
if "model_provider" in body:
if (
not isinstance(body["model_provider"], str)
or not body["model_provider"].strip()
):
return JSONResponse(
{"error": "model_provider must be a non-empty string"},
status_code=400,
)
if "api_key" in body:
if not isinstance(body["api_key"], str):
return JSONResponse(
{"error": "api_key must be a string"}, status_code=400
)
if "endpoint" in body:
if not isinstance(body["endpoint"], str) or not body["endpoint"].strip():
return JSONResponse(
{"error": "endpoint must be a non-empty string"}, status_code=400
)
if "project_id" in body:
if (
not isinstance(body["project_id"], str)
or not body["project_id"].strip()
):
return JSONResponse(
{"error": "project_id must be a non-empty string"}, status_code=400
)
# Validate provider setup if provider-related fields are being updated
# Do this BEFORE modifying any config
provider_fields = ["model_provider", "api_key", "endpoint", "project_id", "llm_model", "embedding_model"]
should_validate = any(field in body for field in provider_fields)
if should_validate:
try:
logger.info("Running provider validation before modifying config")
provider = body.get("model_provider", current_config.provider.model_provider)
api_key = body.get("api_key") if "api_key" in body and body["api_key"].strip() else current_config.provider.api_key
endpoint = body.get("endpoint", current_config.provider.endpoint)
project_id = body.get("project_id", current_config.provider.project_id)
llm_model = body.get("llm_model", current_config.agent.llm_model)
embedding_model = body.get("embedding_model", current_config.knowledge.embedding_model)
await validate_provider_setup(
provider=provider,
api_key=api_key,
embedding_model=embedding_model,
llm_model=llm_model,
endpoint=endpoint,
project_id=project_id,
)
logger.info(f"Provider validation successful for {provider}")
except Exception as e:
logger.error(f"Provider validation failed: {str(e)}")
return JSONResponse(
{"error": f"{str(e)}"},
status_code=400
)
# Update configuration
# Only reached if validation passed or wasn't needed
config_updated = False
# Update agent settings
if "llm_model" in body:
current_config.agent.llm_model = body["llm_model"]
config_updated = True
# Also update the chat flow with the new model
try:
flows_service = _get_flows_service()
await flows_service.update_chat_flow_model(body["llm_model"], current_config.provider.model_provider.lower())
logger.info(
f"Successfully updated chat flow model to '{body['llm_model']}'"
)
except Exception as e:
logger.error(f"Failed to update chat flow model: {str(e)}")
# Don't fail the entire settings update if flow update fails
# The config will still be saved
if "system_prompt" in body:
current_config.agent.system_prompt = body["system_prompt"]
config_updated = True
# Also update the chat flow with the new system prompt
try:
flows_service = _get_flows_service()
await flows_service.update_chat_flow_system_prompt(
body["system_prompt"],
current_config.agent.system_prompt
)
logger.info(f"Successfully updated chat flow system prompt")
except Exception as e:
logger.error(f"Failed to update chat flow system prompt: {str(e)}")
# Don't fail the entire settings update if flow update fails
# The config will still be saved
# Update knowledge settings
if "embedding_model" in body:
new_embedding_model = body["embedding_model"].strip()
current_config.knowledge.embedding_model = new_embedding_model
config_updated = True
# Also update the ingest flow with the new embedding model
try:
flows_service = _get_flows_service()
await flows_service.update_ingest_flow_embedding_model(
new_embedding_model,
current_config.provider.model_provider.lower()
)
logger.info(
f"Successfully updated ingest flow embedding model to '{body['embedding_model'].strip()}'"
)
provider = (
current_config.provider.model_provider.lower()
if current_config.provider.model_provider
else "openai"
)
endpoint = current_config.provider.endpoint or None
llm_model = current_config.agent.llm_model
change_result = await flows_service.change_langflow_model_value(
provider=provider,
embedding_model=new_embedding_model,
llm_model=llm_model,
endpoint=endpoint,
)
if not change_result.get("success", False):
logger.warning(
"Change embedding model across flows completed with issues",
provider=provider,
embedding_model=new_embedding_model,
change_result=change_result,
)
else:
logger.info(
"Successfully updated embedding model across Langflow flows",
provider=provider,
embedding_model=new_embedding_model,
)
except Exception as e:
logger.error(f"Failed to update ingest flow embedding model: {str(e)}")
# Don't fail the entire settings update if flow update fails
# The config will still be saved
if "table_structure" in body:
current_config.knowledge.table_structure = body["table_structure"]
config_updated = True
# Also update the flow with the new docling settings
try:
flows_service = _get_flows_service()
preset_config = get_docling_preset_configs(
table_structure=body["table_structure"],
ocr=current_config.knowledge.ocr,
picture_descriptions=current_config.knowledge.picture_descriptions
)
await flows_service.update_flow_docling_preset("custom", preset_config)
logger.info(f"Successfully updated table_structure setting in flow")
except Exception as e:
logger.error(f"Failed to update docling settings in flow: {str(e)}")
if "ocr" in body:
current_config.knowledge.ocr = body["ocr"]
config_updated = True
# Also update the flow with the new docling settings
try:
flows_service = _get_flows_service()
preset_config = get_docling_preset_configs(
table_structure=current_config.knowledge.table_structure,
ocr=body["ocr"],
picture_descriptions=current_config.knowledge.picture_descriptions
)
await flows_service.update_flow_docling_preset("custom", preset_config)
logger.info(f"Successfully updated ocr setting in flow")
except Exception as e:
logger.error(f"Failed to update docling settings in flow: {str(e)}")
if "picture_descriptions" in body:
current_config.knowledge.picture_descriptions = body["picture_descriptions"]
config_updated = True
# Also update the flow with the new docling settings
try:
flows_service = _get_flows_service()
preset_config = get_docling_preset_configs(
table_structure=current_config.knowledge.table_structure,
ocr=current_config.knowledge.ocr,
picture_descriptions=body["picture_descriptions"]
)
await flows_service.update_flow_docling_preset("custom", preset_config)
logger.info(f"Successfully updated picture_descriptions setting in flow")
except Exception as e:
logger.error(f"Failed to update docling settings in flow: {str(e)}")
if "chunk_size" in body:
current_config.knowledge.chunk_size = body["chunk_size"]
config_updated = True
# Also update the ingest flow with the new chunk size
try:
flows_service = _get_flows_service()
await flows_service.update_ingest_flow_chunk_size(body["chunk_size"])
logger.info(
f"Successfully updated ingest flow chunk size to {body['chunk_size']}"
)
except Exception as e:
logger.error(f"Failed to update ingest flow chunk size: {str(e)}")
# Don't fail the entire settings update if flow update fails
# The config will still be saved
if "chunk_overlap" in body:
current_config.knowledge.chunk_overlap = body["chunk_overlap"]
config_updated = True
# Also update the ingest flow with the new chunk overlap
try:
flows_service = _get_flows_service()
await flows_service.update_ingest_flow_chunk_overlap(
body["chunk_overlap"]
)
logger.info(
f"Successfully updated ingest flow chunk overlap to {body['chunk_overlap']}"
)
except Exception as e:
logger.error(f"Failed to update ingest flow chunk overlap: {str(e)}")
# Don't fail the entire settings update if flow update fails
# The config will still be saved
# Update provider settings
if "model_provider" in body:
current_config.provider.model_provider = body["model_provider"].strip()
config_updated = True
if "api_key" in body:
# Only update if non-empty string (empty string means keep current value)
if body["api_key"].strip():
current_config.provider.api_key = body["api_key"]
config_updated = True
if "endpoint" in body:
current_config.provider.endpoint = body["endpoint"].strip()
config_updated = True
if "project_id" in body:
current_config.provider.project_id = body["project_id"].strip()
config_updated = True
if not config_updated:
return JSONResponse(
{"error": "No valid fields provided for update"}, status_code=400
)
# Save the updated configuration
if not config_manager.save_config_file(current_config):
return JSONResponse(
{"error": "Failed to save configuration"}, status_code=500
)
# Update Langflow global variables if provider settings changed
if any(key in body for key in ["model_provider", "api_key", "endpoint", "project_id"]):
try:
provider = current_config.provider.model_provider.lower() if current_config.provider.model_provider else "openai"
# Set API key for IBM/Watson providers
if (provider == "watsonx") and "api_key" in body:
api_key = body["api_key"]
await clients._create_langflow_global_variable(
"WATSONX_API_KEY", api_key, modify=True
)
logger.info("Set WATSONX_API_KEY global variable in Langflow")
# Set project ID for IBM/Watson providers
if (provider == "watsonx") and "project_id" in body:
project_id = body["project_id"]
await clients._create_langflow_global_variable(
"WATSONX_PROJECT_ID", project_id, modify=True
)
logger.info("Set WATSONX_PROJECT_ID global variable in Langflow")
# Set API key for OpenAI provider
if provider == "openai" and "api_key" in body:
api_key = body["api_key"]
await clients._create_langflow_global_variable(
"OPENAI_API_KEY", api_key, modify=True
)
logger.info("Set OPENAI_API_KEY global variable in Langflow")
# Set base URL for Ollama provider
if provider == "ollama" and "endpoint" in body:
endpoint = transform_localhost_url(body["endpoint"])
await clients._create_langflow_global_variable(
"OLLAMA_BASE_URL", endpoint, modify=True
)
logger.info("Set OLLAMA_BASE_URL global variable in Langflow")
# Update model values across flows if provider changed
if "model_provider" in body:
flows_service = _get_flows_service()
await flows_service.change_langflow_model_value(
provider,
current_config.knowledge.embedding_model,
current_config.agent.llm_model,
current_config.provider.endpoint,
)
logger.info(f"Successfully updated Langflow flows for provider {provider}")
except Exception as e:
logger.error(f"Failed to update Langflow settings: {str(e)}")
# Don't fail the entire settings update if Langflow update fails
# The config was still saved
logger.info(
"Configuration updated successfully", updated_fields=list(body.keys())
)
return JSONResponse({"message": "Configuration updated successfully"})
except Exception as e:
logger.error("Failed to update settings", error=str(e))
return JSONResponse(
{"error": f"Failed to update settings: {str(e)}"}, status_code=500
)
async def onboarding(request, flows_service):
"""Handle onboarding configuration setup"""
try:
# Get current configuration
current_config = get_openrag_config()
# Warn if config was already edited (onboarding being re-run)
if current_config.edited:
logger.warning(
"Onboarding is being run although configuration was already edited before"
)
# Parse request body
body = await request.json()
# Validate allowed fields
allowed_fields = {
"model_provider",
"api_key",
"embedding_model",
"llm_model",
"sample_data",
"endpoint",
"project_id",
}
# Check for invalid fields
invalid_fields = set(body.keys()) - allowed_fields
if invalid_fields:
return JSONResponse(
{
"error": f"Invalid fields: {', '.join(invalid_fields)}. Allowed fields: {', '.join(allowed_fields)}"
},
status_code=400,
)
# Update configuration
config_updated = False
# Update provider settings
if "model_provider" in body:
if (
not isinstance(body["model_provider"], str)
or not body["model_provider"].strip()
):
return JSONResponse(
{"error": "model_provider must be a non-empty string"},
status_code=400,
)
current_config.provider.model_provider = body["model_provider"].strip()
config_updated = True
if "api_key" in body:
if not isinstance(body["api_key"], str):
return JSONResponse(
{"error": "api_key must be a string"}, status_code=400
)
current_config.provider.api_key = body["api_key"]
config_updated = True
# Update knowledge settings
if "embedding_model" in body and not DISABLE_INGEST_WITH_LANGFLOW:
if (
not isinstance(body["embedding_model"], str)
or not body["embedding_model"].strip()
):
return JSONResponse(
{"error": "embedding_model must be a non-empty string"},
status_code=400,
)
current_config.knowledge.embedding_model = body["embedding_model"].strip()
config_updated = True
# Update agent settings
if "llm_model" in body:
if not isinstance(body["llm_model"], str) or not body["llm_model"].strip():
return JSONResponse(
{"error": "llm_model must be a non-empty string"}, status_code=400
)
current_config.agent.llm_model = body["llm_model"].strip()
config_updated = True
if "endpoint" in body:
if not isinstance(body["endpoint"], str) or not body["endpoint"].strip():
return JSONResponse(
{"error": "endpoint must be a non-empty string"}, status_code=400
)
current_config.provider.endpoint = body["endpoint"].strip()
config_updated = True
if "project_id" in body:
if (
not isinstance(body["project_id"], str)
or not body["project_id"].strip()
):
return JSONResponse(
{"error": "project_id must be a non-empty string"}, status_code=400
)
current_config.provider.project_id = body["project_id"].strip()
config_updated = True
# Handle sample_data
should_ingest_sample_data = False
if "sample_data" in body:
if not isinstance(body["sample_data"], bool):
return JSONResponse(
{"error": "sample_data must be a boolean value"}, status_code=400
)
should_ingest_sample_data = body["sample_data"]
if not config_updated:
return JSONResponse(
{"error": "No valid fields provided for update"}, status_code=400
)
# Validate provider setup before initializing OpenSearch index
try:
from api.provider_validation import validate_provider_setup
provider = current_config.provider.model_provider.lower() if current_config.provider.model_provider else "openai"
logger.info(f"Validating provider setup for {provider}")
await validate_provider_setup(
provider=provider,
api_key=current_config.provider.api_key,
embedding_model=current_config.knowledge.embedding_model,
llm_model=current_config.agent.llm_model,
endpoint=current_config.provider.endpoint,
project_id=current_config.provider.project_id,
)
logger.info(f"Provider setup validation completed successfully for {provider}")
except Exception as e:
logger.error(f"Provider validation failed: {str(e)}")
return JSONResponse(
{"error": str(e)},
status_code=400,
)
# Initialize the OpenSearch index now that we have the embedding model configured
try:
# Import here to avoid circular imports
from main import init_index
logger.info(
"Initializing OpenSearch index after onboarding configuration"
)
await init_index()
logger.info("OpenSearch index initialization completed successfully")
except Exception as e:
if isinstance(e, ValueError):
logger.error(
"Failed to initialize OpenSearch index after onboarding",
error=str(e),
)
return JSONResponse(
{
"error": str(e),
"edited": True,
},
status_code=400,
)
logger.error(
"Failed to initialize OpenSearch index after onboarding",
error=str(e),
)
# Don't fail the entire onboarding process if index creation fails
# The application can still work, but document operations may fail
# Save the updated configuration (this will mark it as edited)
# If model_provider was updated, assign the new provider to flows
if "model_provider" in body:
provider = body["model_provider"].strip().lower()
try:
flow_result = await flows_service.assign_model_provider(provider)
if flow_result.get("success"):
logger.info(
f"Successfully assigned {provider} to flows",
flow_result=flow_result,
)
else:
logger.warning(
f"Failed to assign {provider} to flows",
flow_result=flow_result,
)
# Continue even if flow assignment fails - configuration was still saved
except Exception as e:
logger.error(
"Error assigning model provider to flows",
provider=provider,
error=str(e),
)
raise
# Set Langflow global variables based on provider
try:
# Set API key for IBM/Watson providers
if (provider == "watsonx") and "api_key" in body:
api_key = body["api_key"]
await clients._create_langflow_global_variable(
"WATSONX_API_KEY", api_key, modify=True
)
logger.info("Set WATSONX_API_KEY global variable in Langflow")
# Set project ID for IBM/Watson providers
if (provider == "watsonx") and "project_id" in body:
project_id = body["project_id"]
await clients._create_langflow_global_variable(
"WATSONX_PROJECT_ID", project_id, modify=True
)
logger.info(
"Set WATSONX_PROJECT_ID global variable in Langflow"
)
# Set API key for OpenAI provider
if provider == "openai" and "api_key" in body:
api_key = body["api_key"]
await clients._create_langflow_global_variable(
"OPENAI_API_KEY", api_key, modify=True
)
logger.info("Set OPENAI_API_KEY global variable in Langflow")
# Set base URL for Ollama provider
if provider == "ollama" and "endpoint" in body:
endpoint = transform_localhost_url(body["endpoint"])
await clients._create_langflow_global_variable(
"OLLAMA_BASE_URL", endpoint, modify=True
)
logger.info("Set OLLAMA_BASE_URL global variable in Langflow")
await flows_service.change_langflow_model_value(
provider,
body.get("embedding_model"),
body.get("llm_model"),
body.get("endpoint"),
)
except Exception as e:
logger.error(
"Failed to set Langflow global variables",
provider=provider,
error=str(e),
)
raise
# Handle sample data ingestion if requested
if should_ingest_sample_data:
try:
# Import the function here to avoid circular imports
from main import ingest_default_documents_when_ready
# Get services from the current app state
# We need to access the app instance to get services
app = request.scope.get("app")
if app and hasattr(app.state, "services"):
services = app.state.services
logger.info(
"Starting sample data ingestion as requested in onboarding"
)
await ingest_default_documents_when_ready(services)
logger.info("Sample data ingestion completed successfully")
else:
logger.error(
"Could not access services for sample data ingestion"
)
except Exception as e:
logger.error(
"Failed to complete sample data ingestion", error=str(e)
)
# Don't fail the entire onboarding process if sample data fails
if config_manager.save_config_file(current_config):
updated_fields = [
k for k in body.keys() if k != "sample_data"
] # Exclude sample_data from log
logger.info(
"Onboarding configuration updated successfully",
updated_fields=updated_fields,
)
else:
return JSONResponse(
{"error": "Failed to save configuration"}, status_code=500
)
return JSONResponse(
{
"message": "Onboarding configuration updated successfully",
"edited": True, # Confirm that config is now marked as edited
"sample_data_ingested": should_ingest_sample_data,
}
)
except Exception as e:
logger.error("Failed to update onboarding settings", error=str(e))
return JSONResponse(
{"error": str(e)},
status_code=500,
)
def _get_flows_service():
"""Helper function to get flows service instance"""
from services.flows_service import FlowsService
return FlowsService()
async def update_docling_preset(request, session_manager):
"""Update docling settings in the ingest flow - deprecated endpoint, use /settings instead"""
try:
# Parse request body
body = await request.json()
# Support old preset-based API for backwards compatibility
if "preset" in body:
# Map old presets to new toggle settings
preset_map = {
"standard": {"table_structure": False, "ocr": False, "picture_descriptions": False},
"ocr": {"table_structure": False, "ocr": True, "picture_descriptions": False},
"picture_description": {"table_structure": False, "ocr": True, "picture_descriptions": True},
"VLM": {"table_structure": False, "ocr": False, "picture_descriptions": False},
}
preset = body["preset"]
if preset not in preset_map:
return JSONResponse(
{"error": f"Invalid preset '{preset}'. Valid presets: {', '.join(preset_map.keys())}"},
status_code=400,
)
settings = preset_map[preset]
else:
# Support new toggle-based API
settings = {
"table_structure": body.get("table_structure", False),
"ocr": body.get("ocr", False),
"picture_descriptions": body.get("picture_descriptions", False),
}
# Get the preset configuration
preset_config = get_docling_preset_configs(**settings)
# Use the helper function to update the flow
flows_service = _get_flows_service()
await flows_service.update_flow_docling_preset("custom", preset_config)
logger.info(f"Successfully updated docling settings in ingest flow")
return JSONResponse(
{
"message": f"Successfully updated docling settings",
"settings": settings,
"preset_config": preset_config,
}
)
except Exception as e:
logger.error("Failed to update docling settings", error=str(e))
return JSONResponse(
{"error": f"Failed to update docling settings: {str(e)}"}, status_code=500
)