Merge branch 'main' into feat/filters-design-sweep

2025-09-30 13:55:07 -05:00 · 2025-09-30 13:55:07 -05:00 · f6ae18f2bd
commit f6ae18f2bd
parent 60cb732ce2 f54479cf48
12 changed files with 322 additions and 212 deletions
--- a/README.md
+++ b/README.md
@ -138,7 +138,7 @@ podman machine start

 ### Common Issues

-See common issues and fixes: [docs/reference/troubleshooting.mdx](docs/docs/reference/troubleshooting.mdx)
+See common issues and fixes: [docs/support/troubleshoot.mdx](docs/docs/reference/troubleshoot.mdx)



--- a/docs/docs/reference/troubleshooting.mdx
+++ b/docs/docs/reference/troubleshooting.mdx
@ -1,24 +0,0 @@
---
-title: Troubleshooting
-slug: /reference/troubleshooting
---
-
-# Troubleshooting
-
-## Podman on macOS
-
-If using Podman on macOS, you may need to increase VM memory:
-
-```bash
-podman machine stop
-podman machine rm
-podman machine init --memory 8192   # 8 GB example
-podman machine start
-```
-
-## Common Issues
-
-1. OpenSearch fails to start: Check that `OPENSEARCH_PASSWORD` is set and meets requirements
-2. Langflow connection issues: Verify `LANGFLOW_SUPERUSER` credentials are correct
-3. Out of memory errors: Increase Docker memory allocation or use CPU-only mode
-4. Port conflicts: Ensure ports 3000, 7860, 8000, 9200, 5601 are available
--- a/docs/docs/support/troubleshoot.mdx
+++ b/docs/docs/support/troubleshoot.mdx
@ -0,0 +1,107 @@
+---
+title: Troubleshoot
+slug: /support/troubleshoot
+---
+
+import Tabs from '@theme/Tabs';
+import TabItem from '@theme/TabItem';
+
+This page provides troubleshooting advice for issues you might encounter when using OpenRAG or contributing to OpenRAG.
+
+## OpenSearch fails to start
+
+Check that `OPENSEARCH_PASSWORD` is set and meets requirements.
+The password must contain at least 8 characters, and must contain at least one uppercase letter, one lowercase letter, one digit, and one special character that is strong.
+
+## Langflow connection issues
+
+Verify the `LANGFLOW_SUPERUSER` credentials are correct.
+
+## Memory errors
+
+### Container out of memory errors
+
+Increase Docker memory allocation or use [docker-compose-cpu.yml](https://github.com/langflow-ai/openrag/blob/main/docker-compose-cpu.yml) to deploy OpenRAG.
+
+### Podman on macOS memory issues
+
+If you're using Podman on macOS, you may need to increase VM memory on your Podman machine.
+This example increases the machine size to 8 GB of RAM, which should be sufficient to run OpenRAG.
+   ```bash
+   podman machine stop
+   podman machine rm
+   podman machine init --memory 8192   # 8 GB example
+   podman machine start
+   ```
+   
+## Port conflicts
+
+Ensure ports 3000, 7860, 8000, 9200, 5601 are available.
+
+## Langflow container already exists
+
+If you are running other versions of Langflow containers on your machine, you may encounter an issue where Docker or Podman  thinks Langflow is already up.
+
+Remove just the problem container, or clean up all containers and start fresh. 
+
+To reset your local containers and pull new images, do the following:
+
+1. Stop your containers and completely remove them.
+
+<Tabs groupId="Container software">
+  <TabItem value="Docker" label="Docker" default>
+
+    ```bash
+    # Stop all running containers
+    docker stop $(docker ps -q)
+    
+    # Remove all containers (including stopped ones)
+    docker rm --force $(docker ps -aq)
+    
+    # Remove all images
+    docker rmi --force $(docker images -q)
+    
+    # Remove all volumes
+    docker volume prune --force
+    
+    # Remove all networks (except default)
+    docker network prune --force
+    
+    # Clean up any leftover data
+    docker system prune --all --force --volumes
+    ```
+
+   </TabItem>
+   <TabItem value="Podman" label="Podman">
+
+    ```bash
+    # Stop all running containers
+    podman stop --all
+    
+    # Remove all containers (including stopped ones)
+    podman rm --all --force
+    
+    # Remove all images
+    podman rmi --all --force
+    
+    # Remove all volumes
+    podman volume prune --force
+    
+    # Remove all networks (except default)
+    podman network prune --force
+    
+    # Clean up any leftover data
+    podman system prune --all --force --volumes
+    ```
+
+  </TabItem>
+</Tabs>
+
+2. Restart OpenRAG and upgrade to get the latest images for your containers.
+   ```bash
+   uv run openrag
+   ```
+
+3. In the OpenRAG TUI, click **Status**, and then click **Upgrade**.
+When the **Close** button is active, the upgrade is complete.
+Close the window and open the OpenRAG appplication. 
--- a/docs/sidebars.js
+++ b/docs/sidebars.js
@ -76,12 +76,12 @@ const sidebars = {
    },
    {
      type: "category",
-      label: "Reference",
+      label: "Support",
      items: [
        {
          type: "doc",
-          id: "reference/troubleshooting",
-          label: "Troubleshooting"
+          id: "support/troubleshoot",
+          label: "Troubleshoot"
        },
      ],
    },
--- a/frontend/components/ui/switch.tsx
+++ b/frontend/components/ui/switch.tsx
@ -11,7 +11,7 @@ const Switch = React.forwardRef<
 >(({ className, ...props }, ref) => (
  <SwitchPrimitives.Root
    className={cn(
-      "peer inline-flex h-6 w-11 shrink-0 cursor-pointer items-center rounded-full border-2 border-transparent transition-colors focus-visible:outline-none focus-visible:ring-2 focus-visible:ring-ring focus-visible:ring-offset-2 focus-visible:ring-offset-background disabled:cursor-not-allowed disabled:opacity-50 data-[state=checked]:bg-primary data-[state=unchecked]:bg-input",
+      "peer inline-flex h-6 w-11 shrink-0 cursor-pointer items-center rounded-full border-2 border-transparent transition-colors focus-visible:outline-none focus-visible:ring-2 focus-visible:ring-ring focus-visible:ring-offset-2 focus-visible:ring-offset-background disabled:cursor-not-allowed disabled:opacity-50 data-[state=checked]:bg-primary data-[state=unchecked]:bg-muted",
      className
    )}
    {...props}
@ -19,7 +19,7 @@ const Switch = React.forwardRef<
  >
    <SwitchPrimitives.Thumb
      className={cn(
-        "pointer-events-none block h-5 w-5 rounded-full bg-background shadow-lg ring-0 transition-transform data-[state=checked]:translate-x-5 data-[state=unchecked]:translate-x-0"
+        "pointer-events-none block h-5 w-5 rounded-full bg-background shadow-lg ring-0 transition-transform data-[state=checked]:translate-x-5 data-[state=unchecked]:translate-x-0 data-[state=unchecked]:bg-primary"
      )}
    />
  </SwitchPrimitives.Root>
--- a/frontend/src/app/api/mutations/useUpdateFlowSettingMutation.ts
+++ b/frontend/src/app/api/mutations/useUpdateFlowSettingMutation.ts
@ -8,7 +8,9 @@ interface UpdateFlowSettingVariables {
  llm_model?: string;
  system_prompt?: string;
  embedding_model?: string;
-  doclingPresets?: string;
+  table_structure?: boolean;
+  ocr?: boolean;
+  picture_descriptions?: boolean;
  chunk_size?: number;
  chunk_overlap?: number;
 }
--- a/frontend/src/app/api/queries/useGetSettingsQuery.ts
+++ b/frontend/src/app/api/queries/useGetSettingsQuery.ts
@ -13,7 +13,9 @@ export interface KnowledgeSettings {
  embedding_model?: string;
  chunk_size?: number;
  chunk_overlap?: number;
-  doclingPresets?: string;
+  table_structure?: boolean;
+  ocr?: boolean;
+  picture_descriptions?: boolean;
 }

 export interface Settings {
--- a/frontend/src/app/settings/page.tsx
+++ b/frontend/src/app/settings/page.tsx
@ -22,9 +22,9 @@ import {
  CardTitle,
 } from "@/components/ui/card";
 import { Checkbox } from "@/components/ui/checkbox";
+import { Switch } from "@/components/ui/switch";
 import { Input } from "@/components/ui/input";
 import { Label } from "@/components/ui/label";
-import { RadioGroup, RadioGroupItem } from "@/components/ui/radio-group";
 import {
  Select,
  SelectContent,
@ -39,11 +39,6 @@ import { DEFAULT_AGENT_SETTINGS, DEFAULT_KNOWLEDGE_SETTINGS, UI_CONSTANTS } from
 import { getFallbackModels, type ModelProvider } from "./helpers/model-helpers";
 import { ModelSelectItems } from "./helpers/model-select-item";
 import { LabelWrapper } from "@/components/label-wrapper";
-import {
-  Tooltip,
-  TooltipContent,
-  TooltipTrigger,
-} from "@radix-ui/react-tooltip";

 const { MAX_SYSTEM_PROMPT_CHARS } = UI_CONSTANTS;

@ -112,7 +107,9 @@ function KnowledgeSourcesPage() {
  const [systemPrompt, setSystemPrompt] = useState<string>("");
  const [chunkSize, setChunkSize] = useState<number>(1024);
  const [chunkOverlap, setChunkOverlap] = useState<number>(50);
-  const [processingMode, setProcessingMode] = useState<string>("standard");
+  const [tableStructure, setTableStructure] = useState<boolean>(false);
+  const [ocr, setOcr] = useState<boolean>(false);
+  const [pictureDescriptions, setPictureDescriptions] = useState<boolean>(false);

  // Fetch settings using React Query
  const { data: settings = {} } = useGetSettingsQuery({
@ -195,12 +192,24 @@ function KnowledgeSourcesPage() {
    }
  }, [settings.knowledge?.chunk_overlap]);

-  // Sync processing mode with settings data
+  // Sync docling settings with settings data
  useEffect(() => {
-    if (settings.knowledge?.doclingPresets) {
-      setProcessingMode(settings.knowledge.doclingPresets);
+    if (settings.knowledge?.table_structure !== undefined) {
+      setTableStructure(settings.knowledge.table_structure);
    }
-  }, [settings.knowledge?.doclingPresets]);
+  }, [settings.knowledge?.table_structure]);
+
+  useEffect(() => {
+    if (settings.knowledge?.ocr !== undefined) {
+      setOcr(settings.knowledge.ocr);
+    }
+  }, [settings.knowledge?.ocr]);
+
+  useEffect(() => {
+    if (settings.knowledge?.picture_descriptions !== undefined) {
+      setPictureDescriptions(settings.knowledge.picture_descriptions);
+    }
+  }, [settings.knowledge?.picture_descriptions]);

  // Update model selection immediately
  const handleModelChange = (newModel: string) => {
@ -231,11 +240,20 @@ function KnowledgeSourcesPage() {
    debouncedUpdate({ chunk_overlap: numValue });
  };

-  // Update processing mode
-  const handleProcessingModeChange = (mode: string) => {
-    setProcessingMode(mode);
-    // Update the configuration setting (backend will also update the flow automatically)
-    debouncedUpdate({ doclingPresets: mode });
+  // Update docling settings
+  const handleTableStructureChange = (checked: boolean) => {
+    setTableStructure(checked);
+    updateFlowSettingMutation.mutate({ table_structure: checked });
+  };
+
+  const handleOcrChange = (checked: boolean) => {
+    setOcr(checked);
+    updateFlowSettingMutation.mutate({ ocr: checked });
+  };
+
+  const handlePictureDescriptionsChange = (checked: boolean) => {
+    setPictureDescriptions(checked);
+    updateFlowSettingMutation.mutate({ picture_descriptions: checked });
  };

  // Helper function to get connector icon
@ -569,7 +587,9 @@ function KnowledgeSourcesPage() {
        // Only reset form values if the API call was successful
        setChunkSize(DEFAULT_KNOWLEDGE_SETTINGS.chunk_size);
        setChunkOverlap(DEFAULT_KNOWLEDGE_SETTINGS.chunk_overlap);
-        setProcessingMode(DEFAULT_KNOWLEDGE_SETTINGS.processing_mode);
+        setTableStructure(false);
+        setOcr(false);
+        setPictureDescriptions(false);
        closeDialog(); // Close after successful completion
      })
      .catch((error) => {
@ -1063,76 +1083,61 @@ function KnowledgeSourcesPage() {
                </div>
              </div>
            </div>
-            <div className="space-y-3">
-              <Label className="text-base font-medium">Ingestion presets</Label>
-              <RadioGroup
-                value={processingMode}
-                onValueChange={handleProcessingModeChange}
-                className="space-y-3"
-              >
-                <div className="flex items-center space-x-3">
-                  <RadioGroupItem value="standard" id="standard" />
-                  <div className="flex-1">
-                    <Label
-                      htmlFor="standard"
-                      className="text-base font-medium cursor-pointer"
-                    >
-                      No OCR
-                    </Label>
-                    <div className="text-sm text-muted-foreground">
-                      Fast ingest for documents with selectable text. Images are
-                      ignored.
-                    </div>
+            <div className="">
+              <div className="flex items-center justify-between py-3 border-b border-border">
+                <div className="flex-1">
+                  <Label
+                    htmlFor="table-structure"
+                    className="text-base font-medium cursor-pointer pb-3"
+                  >
+                    Table Structure
+                  </Label>
+                  <div className="text-sm text-muted-foreground">
+                    Capture table structure during ingest.
                  </div>
                </div>
-                <div className="flex items-center space-x-3">
-                  <RadioGroupItem value="ocr" id="ocr" />
-                  <div className="flex-1">
-                    <Label
-                      htmlFor="ocr"
-                      className="text-base font-medium cursor-pointer"
-                    >
-                      OCR
-                    </Label>
-                    <div className="text-sm text-muted-foreground">
-                      Extracts text from images and scanned pages.
-                    </div>
+                <Switch
+                  id="table-structure"
+                  checked={tableStructure}
+                  onCheckedChange={handleTableStructureChange}
+                />
+              </div>
+              <div className="flex items-center justify-between py-3 border-b border-border">
+                <div className="flex-1">
+                  <Label
+                    htmlFor="ocr"
+                    className="text-base font-medium cursor-pointer pb-3"
+                  >
+                    OCR
+                  </Label>
+                  <div className="text-sm text-muted-foreground">
+                    Extracts text from images/PDFs. Ingest is slower when enabled.
                  </div>
                </div>
-                <div className="flex items-center space-x-3">
-                  <RadioGroupItem
-                    value="picture_description"
-                    id="picture_description"
-                  />
-                  <div className="flex-1">
-                    <Label
-                      htmlFor="picture_description"
-                      className="text-base font-medium cursor-pointer"
-                    >
-                      OCR + Captions
-                    </Label>
-                    <div className="text-sm text-muted-foreground">
-                      Extracts text from images and scanned pages. Generates
-                      short image captions.
-                    </div>
+                <Switch
+                  id="ocr"
+                  checked={ocr}
+                  onCheckedChange={handleOcrChange}
+                />
+              </div>
+              <div className="flex items-center justify-between py-3">
+                <div className="flex-1">
+                  <Label
+                    htmlFor="picture-descriptions"
+                    className="text-base font-medium cursor-pointer pb-3"
+                  >
+                    Picture Descriptions
+                  </Label>
+                  <div className="text-sm text-muted-foreground">
+                    Adds captions for images. Ingest is slower when enabled.
                  </div>
                </div>
-                <div className="flex items-center space-x-3">
-                  <RadioGroupItem value="VLM" id="VLM" />
-                  <div className="flex-1">
-                    <Label
-                      htmlFor="VLM"
-                      className="text-base font-medium cursor-pointer"
-                    >
-                      VLM
-                    </Label>
-                    <div className="text-sm text-muted-foreground">
-                      Extracts text from layout-aware parsing of text, tables,
-                      and sections.
-                    </div>
-                  </div>
-                </div>
-              </RadioGroup>
+                <Switch
+                  id="picture-descriptions"
+                  checked={pictureDescriptions}
+                  onCheckedChange={handlePictureDescriptionsChange}
+                />
+              </div>
            </div>
          </div>
        </CardContent>
--- a/frontend/src/components/ui/switch.tsx
+++ b/frontend/src/components/ui/switch.tsx
@ -1,29 +0,0 @@
-"use client"
-
-import * as React from "react"
-import * as SwitchPrimitives from "@radix-ui/react-switch"
-
-import { cn } from "@/lib/utils"
-
-const Switch = React.forwardRef<
-  React.ElementRef<typeof SwitchPrimitives.Root>,
-  React.ComponentPropsWithoutRef<typeof SwitchPrimitives.Root>
->(({ className, ...props }, ref) => (
-  <SwitchPrimitives.Root
-    className={cn(
-      "peer inline-flex h-6 w-11 shrink-0 cursor-pointer items-center rounded-full border-2 border-transparent transition-colors focus-visible:outline-none focus-visible:ring-2 focus-visible:ring-ring focus-visible:ring-offset-2 focus-visible:ring-offset-background disabled:cursor-not-allowed disabled:opacity-50 data-[state=checked]:bg-primary data-[state=unchecked]:bg-input",
-      className
-    )}
-    {...props}
-    ref={ref}
-  >
-    <SwitchPrimitives.Thumb
-      className={cn(
-        "pointer-events-none block h-5 w-5 rounded-full bg-background shadow-lg ring-0 transition-transform data-[state=checked]:translate-x-5 data-[state=unchecked]:translate-x-0"
-      )}
-    />
-  </SwitchPrimitives.Root>
-))
-Switch.displayName = SwitchPrimitives.Root.displayName
-
-export { Switch }
--- a/frontend/src/lib/constants.ts
+++ b/frontend/src/lib/constants.ts
@ -12,7 +12,9 @@ export const DEFAULT_AGENT_SETTINGS = {
 export const DEFAULT_KNOWLEDGE_SETTINGS = {
  chunk_size: 1000,
  chunk_overlap: 200,
-  processing_mode: "standard"
+  table_structure: false,
+  ocr: false,
+  picture_descriptions: false
 } as const;

 /**
--- a/src/api/settings.py
+++ b/src/api/settings.py
@ -17,35 +17,30 @@ logger = get_logger(__name__)


 # Docling preset configurations
-def get_docling_preset_configs():
-    """Get docling preset configurations with platform-specific settings"""
+def get_docling_preset_configs(table_structure=False, ocr=False, picture_descriptions=False):
+    """Get docling preset configurations based on toggle settings
+
+    Args:
+        table_structure: Enable table structure parsing (default: False)
+        ocr: Enable OCR for text extraction from images (default: False)
+        picture_descriptions: Enable picture descriptions/captions (default: False)
+    """
    is_macos = platform.system() == "Darwin"

-    return {
-        "standard": {"do_ocr": False},
-        "ocr": {"do_ocr": True, "ocr_engine": "ocrmac" if is_macos else "easyocr"},
-        "picture_description": {
-            "do_ocr": True,
-            "ocr_engine": "ocrmac" if is_macos else "easyocr",
-            "do_picture_classification": True,
-            "do_picture_description": True,
-            "picture_description_local": {
-                "repo_id": "HuggingFaceTB/SmolVLM-256M-Instruct",
-                "prompt": "Describe this image in a few sentences.",
-            },
-        },
-        "VLM": {
-            "pipeline": "vlm",
-            "vlm_pipeline_model_local": {
-                "repo_id": "ds4sd/SmolDocling-256M-preview-mlx-bf16"
-                if is_macos
-                else "ds4sd/SmolDocling-256M-preview",
-                "response_format": "doctags",
-                "inference_framework": "mlx",
-            },
-        },
+    config = {
+        "do_ocr": ocr,
+        "ocr_engine": "ocrmac" if is_macos else "easyocr",
+        "do_table_structure": table_structure,
+        "do_picture_classification": picture_descriptions,
+        "do_picture_description": picture_descriptions,
+        "picture_description_local": {
+            "repo_id": "HuggingFaceTB/SmolVLM-256M-Instruct",
+            "prompt": "Describe this image in a few sentences.",
+        }
    }

+    return config
+

 async def get_settings(request, session_manager):
    """Get application settings"""
@ -71,7 +66,9 @@ async def get_settings(request, session_manager):
                "embedding_model": knowledge_config.embedding_model,
                "chunk_size": knowledge_config.chunk_size,
                "chunk_overlap": knowledge_config.chunk_overlap,
-                "doclingPresets": knowledge_config.doclingPresets,
+                "table_structure": knowledge_config.table_structure,
+                "ocr": knowledge_config.ocr,
+                "picture_descriptions": knowledge_config.picture_descriptions,
            },
            "agent": {
                "llm_model": agent_config.llm_model,
@ -178,7 +175,9 @@ async def update_settings(request, session_manager):
            "system_prompt",
            "chunk_size",
            "chunk_overlap",
-            "doclingPresets",
+            "table_structure",
+            "ocr",
+            "picture_descriptions",
            "embedding_model",
        }

@ -255,32 +254,68 @@ async def update_settings(request, session_manager):
                # Don't fail the entire settings update if flow update fails
                # The config will still be saved

-        if "doclingPresets" in body:
-            preset_configs = get_docling_preset_configs()
-            valid_presets = list(preset_configs.keys())
-            if body["doclingPresets"] not in valid_presets:
+        if "table_structure" in body:
+            if not isinstance(body["table_structure"], bool):
                return JSONResponse(
-                    {
-                        "error": f"doclingPresets must be one of: {', '.join(valid_presets)}"
-                    },
-                    status_code=400,
+                    {"error": "table_structure must be a boolean"}, status_code=400
                )
-            current_config.knowledge.doclingPresets = body["doclingPresets"]
+            current_config.knowledge.table_structure = body["table_structure"]
            config_updated = True

-            # Also update the flow with the new docling preset
+            # Also update the flow with the new docling settings
            try:
                flows_service = _get_flows_service()
-                await flows_service.update_flow_docling_preset(
-                    body["doclingPresets"], preset_configs[body["doclingPresets"]]
-                )
-                logger.info(
-                    f"Successfully updated docling preset in flow to '{body['doclingPresets']}'"
+                preset_config = get_docling_preset_configs(
+                    table_structure=body["table_structure"],
+                    ocr=current_config.knowledge.ocr,
+                    picture_descriptions=current_config.knowledge.picture_descriptions
                )
+                await flows_service.update_flow_docling_preset("custom", preset_config)
+                logger.info(f"Successfully updated table_structure setting in flow")
            except Exception as e:
-                logger.error(f"Failed to update docling preset in flow: {str(e)}")
-                # Don't fail the entire settings update if flow update fails
-                # The config will still be saved
+                logger.error(f"Failed to update docling settings in flow: {str(e)}")
+
+        if "ocr" in body:
+            if not isinstance(body["ocr"], bool):
+                return JSONResponse(
+                    {"error": "ocr must be a boolean"}, status_code=400
+                )
+            current_config.knowledge.ocr = body["ocr"]
+            config_updated = True
+
+            # Also update the flow with the new docling settings
+            try:
+                flows_service = _get_flows_service()
+                preset_config = get_docling_preset_configs(
+                    table_structure=current_config.knowledge.table_structure,
+                    ocr=body["ocr"],
+                    picture_descriptions=current_config.knowledge.picture_descriptions
+                )
+                await flows_service.update_flow_docling_preset("custom", preset_config)
+                logger.info(f"Successfully updated ocr setting in flow")
+            except Exception as e:
+                logger.error(f"Failed to update docling settings in flow: {str(e)}")
+
+        if "picture_descriptions" in body:
+            if not isinstance(body["picture_descriptions"], bool):
+                return JSONResponse(
+                    {"error": "picture_descriptions must be a boolean"}, status_code=400
+                )
+            current_config.knowledge.picture_descriptions = body["picture_descriptions"]
+            config_updated = True
+
+            # Also update the flow with the new docling settings
+            try:
+                flows_service = _get_flows_service()
+                preset_config = get_docling_preset_configs(
+                    table_structure=current_config.knowledge.table_structure,
+                    ocr=current_config.knowledge.ocr,
+                    picture_descriptions=body["picture_descriptions"]
+                )
+                await flows_service.update_flow_docling_preset("custom", preset_config)
+                logger.info(f"Successfully updated picture_descriptions setting in flow")
+            except Exception as e:
+                logger.error(f"Failed to update docling settings in flow: {str(e)}")

        if "chunk_size" in body:
            if not isinstance(body["chunk_size"], int) or body["chunk_size"] <= 0:
@ -624,48 +659,56 @@ def _get_flows_service():


 async def update_docling_preset(request, session_manager):
-    """Update docling preset in the ingest flow"""
+    """Update docling settings in the ingest flow - deprecated endpoint, use /settings instead"""
    try:
        # Parse request body
        body = await request.json()

-        # Validate preset parameter
-        if "preset" not in body:
-            return JSONResponse(
-                {"error": "preset parameter is required"}, status_code=400
-            )
+        # Support old preset-based API for backwards compatibility
+        if "preset" in body:
+            # Map old presets to new toggle settings
+            preset_map = {
+                "standard": {"table_structure": False, "ocr": False, "picture_descriptions": False},
+                "ocr": {"table_structure": False, "ocr": True, "picture_descriptions": False},
+                "picture_description": {"table_structure": False, "ocr": True, "picture_descriptions": True},
+                "VLM": {"table_structure": False, "ocr": False, "picture_descriptions": False},
+            }

-        preset = body["preset"]
-        preset_configs = get_docling_preset_configs()
+            preset = body["preset"]
+            if preset not in preset_map:
+                return JSONResponse(
+                    {"error": f"Invalid preset '{preset}'. Valid presets: {', '.join(preset_map.keys())}"},
+                    status_code=400,
+                )

-        if preset not in preset_configs:
-            valid_presets = list(preset_configs.keys())
-            return JSONResponse(
-                {
-                    "error": f"Invalid preset '{preset}'. Valid presets: {', '.join(valid_presets)}"
-                },
-                status_code=400,
-            )
+            settings = preset_map[preset]
+        else:
+            # Support new toggle-based API
+            settings = {
+                "table_structure": body.get("table_structure", False),
+                "ocr": body.get("ocr", False),
+                "picture_descriptions": body.get("picture_descriptions", False),
+            }

        # Get the preset configuration
-        preset_config = preset_configs[preset]
+        preset_config = get_docling_preset_configs(**settings)

        # Use the helper function to update the flow
        flows_service = _get_flows_service()
-        await flows_service.update_flow_docling_preset(preset, preset_config)
+        await flows_service.update_flow_docling_preset("custom", preset_config)

-        logger.info(f"Successfully updated docling preset to '{preset}' in ingest flow")
+        logger.info(f"Successfully updated docling settings in ingest flow")

        return JSONResponse(
            {
-                "message": f"Successfully updated docling preset to '{preset}'",
-                "preset": preset,
+                "message": f"Successfully updated docling settings",
+                "settings": settings,
                "preset_config": preset_config,
            }
        )

    except Exception as e:
-        logger.error("Failed to update docling preset", error=str(e))
+        logger.error("Failed to update docling settings", error=str(e))
        return JSONResponse(
-            {"error": f"Failed to update docling preset: {str(e)}"}, status_code=500
+            {"error": f"Failed to update docling settings: {str(e)}"}, status_code=500
        )
--- a/src/config/config_manager.py
+++ b/src/config/config_manager.py
@ -27,7 +27,9 @@ class KnowledgeConfig:
    embedding_model: str = "text-embedding-3-small"
    chunk_size: int = 1000
    chunk_overlap: int = 200
-    doclingPresets: str = "standard"
+    table_structure: bool = False
+    ocr: bool = False
+    picture_descriptions: bool = False


@dataclass