Update docling preset options

2025-09-29 16:40:29 -05:00 · 2025-09-29 16:40:29 -05:00 · 2341bf4700
commit 2341bf4700
parent 8933131b4b
7 changed files with 211 additions and 153 deletions
--- a/config.yaml
+++ b/config.yaml
@ -21,7 +21,9 @@ knowledge:
  # Overlap between chunks
  chunk_overlap: 200
  # Docling preset setting
-  doclingPresets: standard
+  ocr: false
+  picture_descriptions: false
+  table_structure: false

 # AI agent configuration
 agent:
--- a/frontend/src/app/api/mutations/useUpdateFlowSettingMutation.ts
+++ b/frontend/src/app/api/mutations/useUpdateFlowSettingMutation.ts
@ -8,7 +8,9 @@ interface UpdateFlowSettingVariables {
  llm_model?: string;
  system_prompt?: string;
  embedding_model?: string;
-  doclingPresets?: string;
+  table_structure?: boolean;
+  ocr?: boolean;
+  picture_descriptions?: boolean;
  chunk_size?: number;
  chunk_overlap?: number;
 }
--- a/frontend/src/app/api/queries/useGetSettingsQuery.ts
+++ b/frontend/src/app/api/queries/useGetSettingsQuery.ts
@ -13,7 +13,9 @@ export interface KnowledgeSettings {
  embedding_model?: string;
  chunk_size?: number;
  chunk_overlap?: number;
-  doclingPresets?: string;
+  table_structure?: boolean;
+  ocr?: boolean;
+  picture_descriptions?: boolean;
 }

 export interface Settings {
--- a/frontend/src/app/settings/page.tsx
+++ b/frontend/src/app/settings/page.tsx
@ -22,9 +22,9 @@ import {
  CardTitle,
 } from "@/components/ui/card";
 import { Checkbox } from "@/components/ui/checkbox";
+import { Switch } from "@/components/ui/switch";
 import { Input } from "@/components/ui/input";
 import { Label } from "@/components/ui/label";
-import { RadioGroup, RadioGroupItem } from "@/components/ui/radio-group";
 import {
  Select,
  SelectContent,
@ -39,11 +39,6 @@ import { DEFAULT_AGENT_SETTINGS, DEFAULT_KNOWLEDGE_SETTINGS, UI_CONSTANTS } from
 import { getFallbackModels, type ModelProvider } from "./helpers/model-helpers";
 import { ModelSelectItems } from "./helpers/model-select-item";
 import { LabelWrapper } from "@/components/label-wrapper";
-import {
-  Tooltip,
-  TooltipContent,
-  TooltipTrigger,
-} from "@radix-ui/react-tooltip";

 const { MAX_SYSTEM_PROMPT_CHARS } = UI_CONSTANTS;

@ -112,7 +107,9 @@ function KnowledgeSourcesPage() {
  const [systemPrompt, setSystemPrompt] = useState<string>("");
  const [chunkSize, setChunkSize] = useState<number>(1024);
  const [chunkOverlap, setChunkOverlap] = useState<number>(50);
-  const [processingMode, setProcessingMode] = useState<string>("standard");
+  const [tableStructure, setTableStructure] = useState<boolean>(false);
+  const [ocr, setOcr] = useState<boolean>(false);
+  const [pictureDescriptions, setPictureDescriptions] = useState<boolean>(false);

  // Fetch settings using React Query
  const { data: settings = {} } = useGetSettingsQuery({
@ -195,12 +192,24 @@ function KnowledgeSourcesPage() {
    }
  }, [settings.knowledge?.chunk_overlap]);

-  // Sync processing mode with settings data
+  // Sync docling settings with settings data
  useEffect(() => {
-    if (settings.knowledge?.doclingPresets) {
-      setProcessingMode(settings.knowledge.doclingPresets);
+    if (settings.knowledge?.table_structure !== undefined) {
+      setTableStructure(settings.knowledge.table_structure);
    }
-  }, [settings.knowledge?.doclingPresets]);
+  }, [settings.knowledge?.table_structure]);
+
+  useEffect(() => {
+    if (settings.knowledge?.ocr !== undefined) {
+      setOcr(settings.knowledge.ocr);
+    }
+  }, [settings.knowledge?.ocr]);
+
+  useEffect(() => {
+    if (settings.knowledge?.picture_descriptions !== undefined) {
+      setPictureDescriptions(settings.knowledge.picture_descriptions);
+    }
+  }, [settings.knowledge?.picture_descriptions]);

  // Update model selection immediately
  const handleModelChange = (newModel: string) => {
@ -231,11 +240,20 @@ function KnowledgeSourcesPage() {
    debouncedUpdate({ chunk_overlap: numValue });
  };

-  // Update processing mode
-  const handleProcessingModeChange = (mode: string) => {
-    setProcessingMode(mode);
-    // Update the configuration setting (backend will also update the flow automatically)
-    debouncedUpdate({ doclingPresets: mode });
+  // Update docling settings
+  const handleTableStructureChange = (checked: boolean) => {
+    setTableStructure(checked);
+    updateFlowSettingMutation.mutate({ table_structure: checked });
+  };
+
+  const handleOcrChange = (checked: boolean) => {
+    setOcr(checked);
+    updateFlowSettingMutation.mutate({ ocr: checked });
+  };
+
+  const handlePictureDescriptionsChange = (checked: boolean) => {
+    setPictureDescriptions(checked);
+    updateFlowSettingMutation.mutate({ picture_descriptions: checked });
  };

  // Helper function to get connector icon
@ -569,7 +587,9 @@ function KnowledgeSourcesPage() {
        // Only reset form values if the API call was successful
        setChunkSize(DEFAULT_KNOWLEDGE_SETTINGS.chunk_size);
        setChunkOverlap(DEFAULT_KNOWLEDGE_SETTINGS.chunk_overlap);
-        setProcessingMode(DEFAULT_KNOWLEDGE_SETTINGS.processing_mode);
+        setTableStructure(false);
+        setOcr(false);
+        setPictureDescriptions(false);
        closeDialog(); // Close after successful completion
      })
      .catch((error) => {
@ -1064,75 +1084,60 @@ function KnowledgeSourcesPage() {
              </div>
            </div>
            <div className="space-y-3">
-              <Label className="text-base font-medium">Ingestion presets</Label>
-              <RadioGroup
-                value={processingMode}
-                onValueChange={handleProcessingModeChange}
-                className="space-y-3"
-              >
-                <div className="flex items-center space-x-3">
-                  <RadioGroupItem value="standard" id="standard" />
-                  <div className="flex-1">
-                    <Label
-                      htmlFor="standard"
-                      className="text-base font-medium cursor-pointer"
-                    >
-                      No OCR
-                    </Label>
-                    <div className="text-sm text-muted-foreground">
-                      Fast ingest for documents with selectable text. Images are
-                      ignored.
-                    </div>
+              <div className="flex items-center justify-between">
+                <div className="flex-1">
+                  <Label
+                    htmlFor="table-structure"
+                    className="text-base font-medium cursor-pointer pb-3"
+                  >
+                    Table Structure
+                  </Label>
+                  <div className="text-sm text-muted-foreground">
+                    Capture table structure during ingest.
                  </div>
                </div>
-                <div className="flex items-center space-x-3">
-                  <RadioGroupItem value="ocr" id="ocr" />
-                  <div className="flex-1">
-                    <Label
-                      htmlFor="ocr"
-                      className="text-base font-medium cursor-pointer"
-                    >
-                      OCR
-                    </Label>
-                    <div className="text-sm text-muted-foreground">
-                      Extracts text from images and scanned pages.
-                    </div>
+                <Switch
+                  id="table-structure"
+                  checked={tableStructure}
+                  onCheckedChange={handleTableStructureChange}
+                />
+              </div>
+              <div className="flex items-center justify-between">
+                <div className="flex-1">
+                  <Label
+                    htmlFor="ocr"
+                    className="text-base font-medium cursor-pointer pb-3"
+                  >
+                    OCR
+                  </Label>
+                  <div className="text-sm text-muted-foreground">
+                    Extracts text from images/PDFs. Ingest is slower when enabled.
                  </div>
                </div>
-                <div className="flex items-center space-x-3">
-                  <RadioGroupItem
-                    value="picture_description"
-                    id="picture_description"
-                  />
-                  <div className="flex-1">
-                    <Label
-                      htmlFor="picture_description"
-                      className="text-base font-medium cursor-pointer"
-                    >
-                      OCR + Captions
-                    </Label>
-                    <div className="text-sm text-muted-foreground">
-                      Extracts text from images and scanned pages. Generates
-                      short image captions.
-                    </div>
+                <Switch
+                  id="ocr"
+                  checked={ocr}
+                  onCheckedChange={handleOcrChange}
+                />
+              </div>
+              <div className="flex items-center justify-between">
+                <div className="flex-1">
+                  <Label
+                    htmlFor="picture-descriptions"
+                    className="text-base font-medium cursor-pointer pb-3"
+                  >
+                    Picture Descriptions
+                  </Label>
+                  <div className="text-sm text-muted-foreground">
+                    Adds captions for images. Ingest is slower when enabled.
                  </div>
                </div>
-                <div className="flex items-center space-x-3">
-                  <RadioGroupItem value="VLM" id="VLM" />
-                  <div className="flex-1">
-                    <Label
-                      htmlFor="VLM"
-                      className="text-base font-medium cursor-pointer"
-                    >
-                      VLM
-                    </Label>
-                    <div className="text-sm text-muted-foreground">
-                      Extracts text from layout-aware parsing of text, tables,
-                      and sections.
-                    </div>
-                  </div>
-                </div>
-              </RadioGroup>
+                <Switch
+                  id="picture-descriptions"
+                  checked={pictureDescriptions}
+                  onCheckedChange={handlePictureDescriptionsChange}
+                />
+              </div>
            </div>
          </div>
        </CardContent>
--- a/frontend/src/lib/constants.ts
+++ b/frontend/src/lib/constants.ts
@ -12,7 +12,9 @@ export const DEFAULT_AGENT_SETTINGS = {
 export const DEFAULT_KNOWLEDGE_SETTINGS = {
  chunk_size: 1000,
  chunk_overlap: 200,
-  processing_mode: "standard"
+  table_structure: false,
+  ocr: false,
+  picture_descriptions: false
 } as const;

 /**
--- a/src/api/settings.py
+++ b/src/api/settings.py
@ -17,35 +17,30 @@ logger = get_logger(__name__)


 # Docling preset configurations
-def get_docling_preset_configs():
-    """Get docling preset configurations with platform-specific settings"""
+def get_docling_preset_configs(table_structure=False, ocr=False, picture_descriptions=False):
+    """Get docling preset configurations based on toggle settings
+
+    Args:
+        table_structure: Enable table structure parsing (default: False)
+        ocr: Enable OCR for text extraction from images (default: False)
+        picture_descriptions: Enable picture descriptions/captions (default: False)
+    """
    is_macos = platform.system() == "Darwin"

-    return {
-        "standard": {"do_ocr": False},
-        "ocr": {"do_ocr": True, "ocr_engine": "ocrmac" if is_macos else "easyocr"},
-        "picture_description": {
-            "do_ocr": True,
-            "ocr_engine": "ocrmac" if is_macos else "easyocr",
-            "do_picture_classification": True,
-            "do_picture_description": True,
-            "picture_description_local": {
-                "repo_id": "HuggingFaceTB/SmolVLM-256M-Instruct",
-                "prompt": "Describe this image in a few sentences.",
-            },
-        },
-        "VLM": {
-            "pipeline": "vlm",
-            "vlm_pipeline_model_local": {
-                "repo_id": "ds4sd/SmolDocling-256M-preview-mlx-bf16"
-                if is_macos
-                else "ds4sd/SmolDocling-256M-preview",
-                "response_format": "doctags",
-                "inference_framework": "mlx",
-            },
-        },
+    config = {
+        "do_ocr": ocr,
+        "ocr_engine": "ocrmac" if is_macos else "easyocr",
+        "do_table_structure": table_structure,
+        "do_picture_classification": picture_descriptions,
+        "do_picture_description": picture_descriptions,
+        "picture_description_local": {
+            "repo_id": "HuggingFaceTB/SmolVLM-256M-Instruct",
+            "prompt": "Describe this image in a few sentences.",
+        }
    }

+    return config
+

 async def get_settings(request, session_manager):
    """Get application settings"""
@ -71,7 +66,9 @@ async def get_settings(request, session_manager):
                "embedding_model": knowledge_config.embedding_model,
                "chunk_size": knowledge_config.chunk_size,
                "chunk_overlap": knowledge_config.chunk_overlap,
-                "doclingPresets": knowledge_config.doclingPresets,
+                "table_structure": knowledge_config.table_structure,
+                "ocr": knowledge_config.ocr,
+                "picture_descriptions": knowledge_config.picture_descriptions,
            },
            "agent": {
                "llm_model": agent_config.llm_model,
@ -178,7 +175,9 @@ async def update_settings(request, session_manager):
            "system_prompt",
            "chunk_size",
            "chunk_overlap",
-            "doclingPresets",
+            "table_structure",
+            "ocr",
+            "picture_descriptions",
            "embedding_model",
        }

@ -255,32 +254,68 @@ async def update_settings(request, session_manager):
                # Don't fail the entire settings update if flow update fails
                # The config will still be saved

-        if "doclingPresets" in body:
-            preset_configs = get_docling_preset_configs()
-            valid_presets = list(preset_configs.keys())
-            if body["doclingPresets"] not in valid_presets:
+        if "table_structure" in body:
+            if not isinstance(body["table_structure"], bool):
                return JSONResponse(
-                    {
-                        "error": f"doclingPresets must be one of: {', '.join(valid_presets)}"
-                    },
-                    status_code=400,
+                    {"error": "table_structure must be a boolean"}, status_code=400
                )
-            current_config.knowledge.doclingPresets = body["doclingPresets"]
+            current_config.knowledge.table_structure = body["table_structure"]
            config_updated = True

-            # Also update the flow with the new docling preset
+            # Also update the flow with the new docling settings
            try:
                flows_service = _get_flows_service()
-                await flows_service.update_flow_docling_preset(
-                    body["doclingPresets"], preset_configs[body["doclingPresets"]]
-                )
-                logger.info(
-                    f"Successfully updated docling preset in flow to '{body['doclingPresets']}'"
+                preset_config = get_docling_preset_configs(
+                    table_structure=body["table_structure"],
+                    ocr=current_config.knowledge.ocr,
+                    picture_descriptions=current_config.knowledge.picture_descriptions
                )
+                await flows_service.update_flow_docling_preset("custom", preset_config)
+                logger.info(f"Successfully updated table_structure setting in flow")
            except Exception as e:
-                logger.error(f"Failed to update docling preset in flow: {str(e)}")
-                # Don't fail the entire settings update if flow update fails
-                # The config will still be saved
+                logger.error(f"Failed to update docling settings in flow: {str(e)}")
+
+        if "ocr" in body:
+            if not isinstance(body["ocr"], bool):
+                return JSONResponse(
+                    {"error": "ocr must be a boolean"}, status_code=400
+                )
+            current_config.knowledge.ocr = body["ocr"]
+            config_updated = True
+
+            # Also update the flow with the new docling settings
+            try:
+                flows_service = _get_flows_service()
+                preset_config = get_docling_preset_configs(
+                    table_structure=current_config.knowledge.table_structure,
+                    ocr=body["ocr"],
+                    picture_descriptions=current_config.knowledge.picture_descriptions
+                )
+                await flows_service.update_flow_docling_preset("custom", preset_config)
+                logger.info(f"Successfully updated ocr setting in flow")
+            except Exception as e:
+                logger.error(f"Failed to update docling settings in flow: {str(e)}")
+
+        if "picture_descriptions" in body:
+            if not isinstance(body["picture_descriptions"], bool):
+                return JSONResponse(
+                    {"error": "picture_descriptions must be a boolean"}, status_code=400
+                )
+            current_config.knowledge.picture_descriptions = body["picture_descriptions"]
+            config_updated = True
+
+            # Also update the flow with the new docling settings
+            try:
+                flows_service = _get_flows_service()
+                preset_config = get_docling_preset_configs(
+                    table_structure=current_config.knowledge.table_structure,
+                    ocr=current_config.knowledge.ocr,
+                    picture_descriptions=body["picture_descriptions"]
+                )
+                await flows_service.update_flow_docling_preset("custom", preset_config)
+                logger.info(f"Successfully updated picture_descriptions setting in flow")
+            except Exception as e:
+                logger.error(f"Failed to update docling settings in flow: {str(e)}")

        if "chunk_size" in body:
            if not isinstance(body["chunk_size"], int) or body["chunk_size"] <= 0:
@ -624,48 +659,56 @@ def _get_flows_service():


 async def update_docling_preset(request, session_manager):
-    """Update docling preset in the ingest flow"""
+    """Update docling settings in the ingest flow - deprecated endpoint, use /settings instead"""
    try:
        # Parse request body
        body = await request.json()

-        # Validate preset parameter
-        if "preset" not in body:
-            return JSONResponse(
-                {"error": "preset parameter is required"}, status_code=400
-            )
+        # Support old preset-based API for backwards compatibility
+        if "preset" in body:
+            # Map old presets to new toggle settings
+            preset_map = {
+                "standard": {"table_structure": False, "ocr": False, "picture_descriptions": False},
+                "ocr": {"table_structure": False, "ocr": True, "picture_descriptions": False},
+                "picture_description": {"table_structure": False, "ocr": True, "picture_descriptions": True},
+                "VLM": {"table_structure": False, "ocr": False, "picture_descriptions": False},
+            }

-        preset = body["preset"]
-        preset_configs = get_docling_preset_configs()
+            preset = body["preset"]
+            if preset not in preset_map:
+                return JSONResponse(
+                    {"error": f"Invalid preset '{preset}'. Valid presets: {', '.join(preset_map.keys())}"},
+                    status_code=400,
+                )

-        if preset not in preset_configs:
-            valid_presets = list(preset_configs.keys())
-            return JSONResponse(
-                {
-                    "error": f"Invalid preset '{preset}'. Valid presets: {', '.join(valid_presets)}"
-                },
-                status_code=400,
-            )
+            settings = preset_map[preset]
+        else:
+            # Support new toggle-based API
+            settings = {
+                "table_structure": body.get("table_structure", False),
+                "ocr": body.get("ocr", False),
+                "picture_descriptions": body.get("picture_descriptions", False),
+            }

        # Get the preset configuration
-        preset_config = preset_configs[preset]
+        preset_config = get_docling_preset_configs(**settings)

        # Use the helper function to update the flow
        flows_service = _get_flows_service()
-        await flows_service.update_flow_docling_preset(preset, preset_config)
+        await flows_service.update_flow_docling_preset("custom", preset_config)

-        logger.info(f"Successfully updated docling preset to '{preset}' in ingest flow")
+        logger.info(f"Successfully updated docling settings in ingest flow")

        return JSONResponse(
            {
-                "message": f"Successfully updated docling preset to '{preset}'",
-                "preset": preset,
+                "message": f"Successfully updated docling settings",
+                "settings": settings,
                "preset_config": preset_config,
            }
        )

    except Exception as e:
-        logger.error("Failed to update docling preset", error=str(e))
+        logger.error("Failed to update docling settings", error=str(e))
        return JSONResponse(
-            {"error": f"Failed to update docling preset: {str(e)}"}, status_code=500
+            {"error": f"Failed to update docling settings: {str(e)}"}, status_code=500
        )
--- a/src/config/config_manager.py
+++ b/src/config/config_manager.py
@ -27,7 +27,9 @@ class KnowledgeConfig:
    embedding_model: str = "text-embedding-3-small"
    chunk_size: int = 1000
    chunk_overlap: int = 200
-    doclingPresets: str = "standard"
+    table_structure: bool = False
+    ocr: bool = False
+    picture_descriptions: bool = False


@dataclass