From e67fc21a9bcc46931ff97527eb90efa6feb0e1ec Mon Sep 17 00:00:00 2001 From: Mike Fortman Date: Mon, 22 Sep 2025 16:58:12 -0500 Subject: [PATCH] update flow based on docling presets --- src/api/settings.py | 99 ++++++++++++++++++++++++++++++++++++++++++ src/config/settings.py | 3 ++ src/main.py | 13 +++++- 3 files changed, 114 insertions(+), 1 deletion(-) diff --git a/src/api/settings.py b/src/api/settings.py index c169b263..9723cdeb 100644 --- a/src/api/settings.py +++ b/src/api/settings.py @@ -7,6 +7,7 @@ from config.settings import ( LANGFLOW_CHAT_FLOW_ID, LANGFLOW_INGEST_FLOW_ID, LANGFLOW_PUBLIC_URL, + DOCLING_COMPONENT_ID, clients, get_openrag_config, config_manager, @@ -234,6 +235,15 @@ async def update_settings(request, session_manager): current_config.knowledge.doclingPresets = body["doclingPresets"] config_updated = True + # Also update the flow with the new docling preset + try: + await _update_flow_docling_preset(body["doclingPresets"], preset_configs[body["doclingPresets"]]) + logger.info(f"Successfully updated docling preset in flow to '{body['doclingPresets']}'") + except Exception as e: + logger.error(f"Failed to update docling preset in flow: {str(e)}") + # Don't fail the entire settings update if flow update fails + # The config will still be saved + if "chunk_size" in body: if not isinstance(body["chunk_size"], int) or body["chunk_size"] <= 0: return JSONResponse( @@ -527,3 +537,92 @@ async def onboarding(request, flows_service): {"error": f"Failed to update onboarding settings: {str(e)}"}, status_code=500, ) + + +async def _update_flow_docling_preset(preset: str, preset_config: dict): + """Helper function to update docling preset in the ingest flow""" + if not LANGFLOW_INGEST_FLOW_ID: + raise ValueError("LANGFLOW_INGEST_FLOW_ID is not configured") + + # Get the current flow data from Langflow + response = await clients.langflow_request( + "GET", f"/api/v1/flows/{LANGFLOW_INGEST_FLOW_ID}" + ) + + if response.status_code != 200: + raise Exception(f"Failed to get ingest flow: HTTP {response.status_code} - {response.text}") + + flow_data = response.json() + + # Find the target node in the flow using environment variable + nodes = flow_data.get("data", {}).get("nodes", []) + target_node = None + target_node_index = None + + for i, node in enumerate(nodes): + if node.get("id") == DOCLING_COMPONENT_ID: + target_node = node + target_node_index = i + break + + if target_node is None: + raise Exception(f"Docling component '{DOCLING_COMPONENT_ID}' not found in ingest flow") + + # Update the docling_serve_opts value directly in the existing node + if (target_node.get("data", {}).get("node", {}).get("template", {}).get("docling_serve_opts")): + flow_data["data"]["nodes"][target_node_index]["data"]["node"]["template"]["docling_serve_opts"]["value"] = preset_config + else: + raise Exception(f"docling_serve_opts field not found in node '{DOCLING_COMPONENT_ID}'") + + # Update the flow via PATCH request + patch_response = await clients.langflow_request( + "PATCH", f"/api/v1/flows/{LANGFLOW_INGEST_FLOW_ID}", json=flow_data + ) + + if patch_response.status_code != 200: + raise Exception(f"Failed to update ingest flow: HTTP {patch_response.status_code} - {patch_response.text}") + + +async def update_docling_preset(request, session_manager): + """Update docling preset in the ingest flow""" + try: + # Parse request body + body = await request.json() + + # Validate preset parameter + if "preset" not in body: + return JSONResponse( + {"error": "preset parameter is required"}, + status_code=400 + ) + + preset = body["preset"] + preset_configs = get_docling_preset_configs() + + if preset not in preset_configs: + valid_presets = list(preset_configs.keys()) + return JSONResponse( + {"error": f"Invalid preset '{preset}'. Valid presets: {', '.join(valid_presets)}"}, + status_code=400 + ) + + # Get the preset configuration + preset_config = preset_configs[preset] + + # Use the helper function to update the flow + await _update_flow_docling_preset(preset, preset_config) + + logger.info(f"Successfully updated docling preset to '{preset}' in ingest flow") + + return JSONResponse({ + "message": f"Successfully updated docling preset to '{preset}'", + "preset": preset, + "preset_config": preset_config + }) + + except Exception as e: + logger.error("Failed to update docling preset", error=str(e)) + return JSONResponse( + {"error": f"Failed to update docling preset: {str(e)}"}, + status_code=500 + ) diff --git a/src/config/settings.py b/src/config/settings.py index 66f78ce5..11e4b835 100644 --- a/src/config/settings.py +++ b/src/config/settings.py @@ -528,6 +528,9 @@ OLLAMA_EMBEDDING_COMPONENT_ID = os.getenv( ) OLLAMA_LLM_COMPONENT_ID = os.getenv("OLLAMA_LLM_COMPONENT_ID", "OllamaModel-eCsJx") +# Docling component ID for ingest flow +DOCLING_COMPONENT_ID = os.getenv("DOCLING_COMPONENT_ID", "DoclingRemote-78KoX") + # Global clients instance clients = AppClients() diff --git a/src/main.py b/src/main.py index e7cca718..f78e07bc 100644 --- a/src/main.py +++ b/src/main.py @@ -971,12 +971,23 @@ async def create_app(): "/onboarding", require_auth(services["session_manager"])( partial( - settings.onboarding, + settings.onboarding, flows_service=services["flows_service"] ) ), methods=["POST"], ), + # Docling preset update endpoint + Route( + "/settings/docling-preset", + require_auth(services["session_manager"])( + partial( + settings.update_docling_preset, + session_manager=services["session_manager"] + ) + ), + methods=["PATCH"], + ), Route( "/nudges", require_auth(services["session_manager"])(