-
-
-
-
- Extracts text from images and scanned pages.
-
+
+
+
+
+
+
+ Extracts text from images/PDFs. Ingest is slower when enabled.
-
-
-
-
-
- Extracts text from images and scanned pages. Generates
- short image captions.
-
+
+
+
+
+
+
+ Adds captions for images. Ingest is slower when enabled.
-
-
-
-
-
- Extracts text from layout-aware parsing of text, tables,
- and sections.
-
-
-
-
+
+
diff --git a/frontend/src/lib/constants.ts b/frontend/src/lib/constants.ts
index 9c6ea7b0..8e7770fb 100644
--- a/frontend/src/lib/constants.ts
+++ b/frontend/src/lib/constants.ts
@@ -12,7 +12,9 @@ export const DEFAULT_AGENT_SETTINGS = {
export const DEFAULT_KNOWLEDGE_SETTINGS = {
chunk_size: 1000,
chunk_overlap: 200,
- processing_mode: "standard"
+ table_structure: false,
+ ocr: false,
+ picture_descriptions: false
} as const;
/**
diff --git a/src/api/settings.py b/src/api/settings.py
index c2c7cbd0..a99cce61 100644
--- a/src/api/settings.py
+++ b/src/api/settings.py
@@ -17,35 +17,30 @@ logger = get_logger(__name__)
# Docling preset configurations
-def get_docling_preset_configs():
- """Get docling preset configurations with platform-specific settings"""
+def get_docling_preset_configs(table_structure=False, ocr=False, picture_descriptions=False):
+ """Get docling preset configurations based on toggle settings
+
+ Args:
+ table_structure: Enable table structure parsing (default: False)
+ ocr: Enable OCR for text extraction from images (default: False)
+ picture_descriptions: Enable picture descriptions/captions (default: False)
+ """
is_macos = platform.system() == "Darwin"
- return {
- "standard": {"do_ocr": False},
- "ocr": {"do_ocr": True, "ocr_engine": "ocrmac" if is_macos else "easyocr"},
- "picture_description": {
- "do_ocr": True,
- "ocr_engine": "ocrmac" if is_macos else "easyocr",
- "do_picture_classification": True,
- "do_picture_description": True,
- "picture_description_local": {
- "repo_id": "HuggingFaceTB/SmolVLM-256M-Instruct",
- "prompt": "Describe this image in a few sentences.",
- },
- },
- "VLM": {
- "pipeline": "vlm",
- "vlm_pipeline_model_local": {
- "repo_id": "ds4sd/SmolDocling-256M-preview-mlx-bf16"
- if is_macos
- else "ds4sd/SmolDocling-256M-preview",
- "response_format": "doctags",
- "inference_framework": "mlx",
- },
- },
+ config = {
+ "do_ocr": ocr,
+ "ocr_engine": "ocrmac" if is_macos else "easyocr",
+ "do_table_structure": table_structure,
+ "do_picture_classification": picture_descriptions,
+ "do_picture_description": picture_descriptions,
+ "picture_description_local": {
+ "repo_id": "HuggingFaceTB/SmolVLM-256M-Instruct",
+ "prompt": "Describe this image in a few sentences.",
+ }
}
+ return config
+
async def get_settings(request, session_manager):
"""Get application settings"""
@@ -71,7 +66,9 @@ async def get_settings(request, session_manager):
"embedding_model": knowledge_config.embedding_model,
"chunk_size": knowledge_config.chunk_size,
"chunk_overlap": knowledge_config.chunk_overlap,
- "doclingPresets": knowledge_config.doclingPresets,
+ "table_structure": knowledge_config.table_structure,
+ "ocr": knowledge_config.ocr,
+ "picture_descriptions": knowledge_config.picture_descriptions,
},
"agent": {
"llm_model": agent_config.llm_model,
@@ -178,7 +175,9 @@ async def update_settings(request, session_manager):
"system_prompt",
"chunk_size",
"chunk_overlap",
- "doclingPresets",
+ "table_structure",
+ "ocr",
+ "picture_descriptions",
"embedding_model",
}
@@ -255,32 +254,68 @@ async def update_settings(request, session_manager):
# Don't fail the entire settings update if flow update fails
# The config will still be saved
- if "doclingPresets" in body:
- preset_configs = get_docling_preset_configs()
- valid_presets = list(preset_configs.keys())
- if body["doclingPresets"] not in valid_presets:
+ if "table_structure" in body:
+ if not isinstance(body["table_structure"], bool):
return JSONResponse(
- {
- "error": f"doclingPresets must be one of: {', '.join(valid_presets)}"
- },
- status_code=400,
+ {"error": "table_structure must be a boolean"}, status_code=400
)
- current_config.knowledge.doclingPresets = body["doclingPresets"]
+ current_config.knowledge.table_structure = body["table_structure"]
config_updated = True
- # Also update the flow with the new docling preset
+ # Also update the flow with the new docling settings
try:
flows_service = _get_flows_service()
- await flows_service.update_flow_docling_preset(
- body["doclingPresets"], preset_configs[body["doclingPresets"]]
- )
- logger.info(
- f"Successfully updated docling preset in flow to '{body['doclingPresets']}'"
+ preset_config = get_docling_preset_configs(
+ table_structure=body["table_structure"],
+ ocr=current_config.knowledge.ocr,
+ picture_descriptions=current_config.knowledge.picture_descriptions
)
+ await flows_service.update_flow_docling_preset("custom", preset_config)
+ logger.info(f"Successfully updated table_structure setting in flow")
except Exception as e:
- logger.error(f"Failed to update docling preset in flow: {str(e)}")
- # Don't fail the entire settings update if flow update fails
- # The config will still be saved
+ logger.error(f"Failed to update docling settings in flow: {str(e)}")
+
+ if "ocr" in body:
+ if not isinstance(body["ocr"], bool):
+ return JSONResponse(
+ {"error": "ocr must be a boolean"}, status_code=400
+ )
+ current_config.knowledge.ocr = body["ocr"]
+ config_updated = True
+
+ # Also update the flow with the new docling settings
+ try:
+ flows_service = _get_flows_service()
+ preset_config = get_docling_preset_configs(
+ table_structure=current_config.knowledge.table_structure,
+ ocr=body["ocr"],
+ picture_descriptions=current_config.knowledge.picture_descriptions
+ )
+ await flows_service.update_flow_docling_preset("custom", preset_config)
+ logger.info(f"Successfully updated ocr setting in flow")
+ except Exception as e:
+ logger.error(f"Failed to update docling settings in flow: {str(e)}")
+
+ if "picture_descriptions" in body:
+ if not isinstance(body["picture_descriptions"], bool):
+ return JSONResponse(
+ {"error": "picture_descriptions must be a boolean"}, status_code=400
+ )
+ current_config.knowledge.picture_descriptions = body["picture_descriptions"]
+ config_updated = True
+
+ # Also update the flow with the new docling settings
+ try:
+ flows_service = _get_flows_service()
+ preset_config = get_docling_preset_configs(
+ table_structure=current_config.knowledge.table_structure,
+ ocr=current_config.knowledge.ocr,
+ picture_descriptions=body["picture_descriptions"]
+ )
+ await flows_service.update_flow_docling_preset("custom", preset_config)
+ logger.info(f"Successfully updated picture_descriptions setting in flow")
+ except Exception as e:
+ logger.error(f"Failed to update docling settings in flow: {str(e)}")
if "chunk_size" in body:
if not isinstance(body["chunk_size"], int) or body["chunk_size"] <= 0:
@@ -624,48 +659,56 @@ def _get_flows_service():
async def update_docling_preset(request, session_manager):
- """Update docling preset in the ingest flow"""
+ """Update docling settings in the ingest flow - deprecated endpoint, use /settings instead"""
try:
# Parse request body
body = await request.json()
- # Validate preset parameter
- if "preset" not in body:
- return JSONResponse(
- {"error": "preset parameter is required"}, status_code=400
- )
+ # Support old preset-based API for backwards compatibility
+ if "preset" in body:
+ # Map old presets to new toggle settings
+ preset_map = {
+ "standard": {"table_structure": False, "ocr": False, "picture_descriptions": False},
+ "ocr": {"table_structure": False, "ocr": True, "picture_descriptions": False},
+ "picture_description": {"table_structure": False, "ocr": True, "picture_descriptions": True},
+ "VLM": {"table_structure": False, "ocr": False, "picture_descriptions": False},
+ }
- preset = body["preset"]
- preset_configs = get_docling_preset_configs()
+ preset = body["preset"]
+ if preset not in preset_map:
+ return JSONResponse(
+ {"error": f"Invalid preset '{preset}'. Valid presets: {', '.join(preset_map.keys())}"},
+ status_code=400,
+ )
- if preset not in preset_configs:
- valid_presets = list(preset_configs.keys())
- return JSONResponse(
- {
- "error": f"Invalid preset '{preset}'. Valid presets: {', '.join(valid_presets)}"
- },
- status_code=400,
- )
+ settings = preset_map[preset]
+ else:
+ # Support new toggle-based API
+ settings = {
+ "table_structure": body.get("table_structure", False),
+ "ocr": body.get("ocr", False),
+ "picture_descriptions": body.get("picture_descriptions", False),
+ }
# Get the preset configuration
- preset_config = preset_configs[preset]
+ preset_config = get_docling_preset_configs(**settings)
# Use the helper function to update the flow
flows_service = _get_flows_service()
- await flows_service.update_flow_docling_preset(preset, preset_config)
+ await flows_service.update_flow_docling_preset("custom", preset_config)
- logger.info(f"Successfully updated docling preset to '{preset}' in ingest flow")
+ logger.info(f"Successfully updated docling settings in ingest flow")
return JSONResponse(
{
- "message": f"Successfully updated docling preset to '{preset}'",
- "preset": preset,
+ "message": f"Successfully updated docling settings",
+ "settings": settings,
"preset_config": preset_config,
}
)
except Exception as e:
- logger.error("Failed to update docling preset", error=str(e))
+ logger.error("Failed to update docling settings", error=str(e))
return JSONResponse(
- {"error": f"Failed to update docling preset: {str(e)}"}, status_code=500
+ {"error": f"Failed to update docling settings: {str(e)}"}, status_code=500
)
diff --git a/src/config/config_manager.py b/src/config/config_manager.py
index 0b814470..6e891c5c 100644
--- a/src/config/config_manager.py
+++ b/src/config/config_manager.py
@@ -27,7 +27,9 @@ class KnowledgeConfig:
embedding_model: str = "text-embedding-3-small"
chunk_size: int = 1000
chunk_overlap: int = 200
- doclingPresets: str = "standard"
+ table_structure: bool = False
+ ocr: bool = False
+ picture_descriptions: bool = False
@dataclass
From f6bb375860fc821af56ac278939e5d34e5c89300 Mon Sep 17 00:00:00 2001
From: Mendon Kissling <59585235+mendonk@users.noreply.github.com>
Date: Tue, 30 Sep 2025 09:51:42 -0400
Subject: [PATCH 04/23] init
---
docs/docs/core-components/ingestion.mdx | 23 +++++++++++++++++++++++
1 file changed, 23 insertions(+)
create mode 100644 docs/docs/core-components/ingestion.mdx
diff --git a/docs/docs/core-components/ingestion.mdx b/docs/docs/core-components/ingestion.mdx
new file mode 100644
index 00000000..d240d53e
--- /dev/null
+++ b/docs/docs/core-components/ingestion.mdx
@@ -0,0 +1,23 @@
+---
+title: Docling Ingestion
+slug: /ingestion
+---
+
+import Icon from "@site/src/components/icon/icon";
+import Tabs from '@theme/Tabs';
+import TabItem from '@theme/TabItem';
+import PartialModifyFlows from '@site/docs/_partial-modify-flows.mdx';
+
+OpenRAG uses [Docling](https://docling-project.github.io/docling/) for its document ingestion pipeline.
+More specifically, OpenRAG uses [Docling Serve](https://github.com/docling-project/docling-serve), which starts a `docling-serve` process on your local machine and runs Docling ingestion through an API service.
+
+OpenRAG chose Docling for its support for a wide variety of file formats, high performance, and advanced understanding of tables and images.
+
+## Docling ingestion settings
+
+These settings control the Docling ingestion parameters.
+
+OpenRAG will warn you if `docling-serve` is not running.
+To start or stop `docling-serve` or any other native services, in the TUI main menu, click **Start Native Services** or **Stop Native Services**.
+
+## Use OpenRAG default ingestion instead of Docling
\ No newline at end of file
From 7d8bc4be211ed708226521c3da72a1c000cebb3a Mon Sep 17 00:00:00 2001
From: Mendon Kissling <59585235+mendonk@users.noreply.github.com>
Date: Tue, 30 Sep 2025 10:57:05 -0400
Subject: [PATCH 05/23] slug
---
docs/docs/support/troubleshoot.mdx | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/docs/docs/support/troubleshoot.mdx b/docs/docs/support/troubleshoot.mdx
index bf18e2f7..57dcb4d3 100644
--- a/docs/docs/support/troubleshoot.mdx
+++ b/docs/docs/support/troubleshoot.mdx
@@ -1,6 +1,6 @@
---
title: Troubleshoot
-slug: /reference/troubleshoot
+slug: /support/troubleshoot
---
import Tabs from '@theme/Tabs';
From bf871b9cd728518aaac51c77aa511de698cde890 Mon Sep 17 00:00:00 2001
From: phact
Date: Tue, 30 Sep 2025 11:46:17 -0400
Subject: [PATCH 06/23] use Dockerfile.langflow
---
.github/workflows/build-langflow-responses.yml | 6 ++----
1 file changed, 2 insertions(+), 4 deletions(-)
diff --git a/.github/workflows/build-langflow-responses.yml b/.github/workflows/build-langflow-responses.yml
index 8d9264e2..0f9d3d08 100644
--- a/.github/workflows/build-langflow-responses.yml
+++ b/.github/workflows/build-langflow-responses.yml
@@ -19,11 +19,8 @@ jobs:
runs-on: ${{ matrix.runs-on }}
steps:
- - name: Checkout langflow load_flows_autologin_false branch
+ - name: Checkout
uses: actions/checkout@v4
- with:
- repository: langflow-ai/langflow
- ref: load_flows_autologin_false
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
@@ -38,6 +35,7 @@ jobs:
uses: docker/build-push-action@v5
with:
context: .
+ file: ./Dockerfile.langflow
platforms: ${{ matrix.platform }}
push: true
tags: phact/langflow:responses-${{ matrix.arch }}
From 70a75ee63ddafff47ef5681fe140af01766125a4 Mon Sep 17 00:00:00 2001
From: Cole Goldsmith
Date: Tue, 30 Sep 2025 11:22:37 -0500
Subject: [PATCH 07/23] fix accent color variables
---
frontend/components/filter-icon-popover.tsx | 179 +++++++++---------
frontend/components/knowledge-filter-list.tsx | 128 +++----------
.../components/knowledge-filter-panel.tsx | 12 +-
.../src/app/api/queries/useGetSearchQuery.ts | 2 +-
frontend/src/app/globals.css | 51 +++--
frontend/src/app/knowledge/page.tsx | 2 +-
frontend/src/components/ui/status-badge.tsx | 12 +-
7 files changed, 154 insertions(+), 232 deletions(-)
diff --git a/frontend/components/filter-icon-popover.tsx b/frontend/components/filter-icon-popover.tsx
index 4bc181e7..84cd4a6b 100644
--- a/frontend/components/filter-icon-popover.tsx
+++ b/frontend/components/filter-icon-popover.tsx
@@ -1,78 +1,78 @@
"use client";
import React, { type SVGProps } from "react";
-import { Button } from "@/components/ui/button";
import {
Popover,
PopoverContent,
PopoverTrigger,
} from "@/components/ui/popover";
import {
- Filter as FilterIcon,
- Star,
+ File,
Book,
- FileText,
- Folder,
- Globe,
- Calendar,
- User,
- Users,
- Tag,
- Briefcase,
- Building2,
- Cog,
+ Scroll,
+ Library,
+ Map,
+ FileImage,
+ Layers3,
Database,
- Cpu,
- Bot,
- MessageSquare,
- Search,
+ Folder,
+ Archive,
+ MessagesSquare,
+ SquareStack,
+ Ghost,
+ Gem,
+ Swords,
+ Bolt,
Shield,
- Lock,
- Key,
- Link,
- Mail,
- Phone,
+ Hammer,
+ Globe,
+ HardDrive,
+ Upload,
+ Cable,
+ ShoppingCart,
+ ShoppingBag,
Check,
+ Plus,
} from "lucide-react";
import { filterAccentClasses } from "./knowledge-filter-panel";
+import { cn } from "@/lib/utils";
const ICON_MAP = {
- Filter: FilterIcon,
- Star,
- Book,
- FileText,
- Folder,
- Globe,
- Calendar,
- User,
- Users,
- Tag,
- Briefcase,
- Building2,
- Cog,
- Database,
- Cpu,
- Bot,
- MessageSquare,
- Search,
- Shield,
- Lock,
- Key,
- Link,
- Mail,
- Phone,
+ file: File,
+ book: Book,
+ scroll: Scroll,
+ library: Library,
+ map: Map,
+ image: FileImage,
+ layers3: Layers3,
+ database: Database,
+ folder: Folder,
+ archive: Archive,
+ messagesSquare: MessagesSquare,
+ squareStack: SquareStack,
+ ghost: Ghost,
+ gem: Gem,
+ swords: Swords,
+ bolt: Bolt,
+ shield: Shield,
+ hammer: Hammer,
+ globe: Globe,
+ hardDrive: HardDrive,
+ upload: Upload,
+ cable: Cable,
+ shoppingCart: ShoppingCart,
+ shoppingBag: ShoppingBag,
} as const;
export type IconKey = keyof typeof ICON_MAP;
-function iconKeyToComponent(
- key: string
-): React.ComponentType> {
+export function iconKeyToComponent(
+ key?: string
+): React.ComponentType> | undefined {
+ if (!key) return undefined;
return (
- (ICON_MAP as Record>>)[
- key
- ] || FilterIcon
- );
+ ICON_MAP as Record>>
+ )[key];
}
const COLORS = [
@@ -87,21 +87,21 @@ const COLORS = [
export type FilterColor = (typeof COLORS)[number];
const colorSwatchClasses = {
- zinc: "bg-muted-foreground",
- pink: "bg-accent-pink-foreground",
- purple: "bg-accent-purple-foreground",
- indigo: "bg-accent-indigo-foreground",
- emerald: "bg-accent-emerald-foreground",
- amber: "bg-accent-amber-foreground",
- red: "bg-accent-red-foreground",
- "": "bg-muted-foreground",
+ zinc: "bg-muted-foreground text-accent-foreground",
+ pink: "bg-accent-pink-foreground text-accent-pink",
+ purple: "bg-accent-purple-foreground text-accent-purple",
+ indigo: "bg-accent-indigo-foreground text-accent-indigo",
+ emerald: "bg-accent-emerald-foreground text-accent-emerald",
+ amber: "bg-accent-amber-foreground text-accent-amber",
+ red: "bg-accent-red-foreground text-accent-red",
+ "": "bg-muted-foreground text-accent-foreground",
};
export interface FilterIconPopoverProps {
color: FilterColor;
- iconKey: IconKey | string;
+ iconKey?: IconKey | undefined;
onColorChange: (c: FilterColor) => void;
- onIconChange: (k: IconKey) => void;
+ onIconChange: (k: IconKey | undefined) => void;
triggerClassName?: string;
}
@@ -116,56 +116,55 @@ export function FilterIconPopover({
return (
-
+ {Icon && }
+ {!Icon && }
+
-
+
{COLORS.map((c) => (
))}
-
- Icon
-
- {(Object.keys(ICON_MAP) as IconKey[]).map((k) => {
- const OptIcon = ICON_MAP[k];
+ {Object.keys(ICON_MAP).map((k: string) => {
+ const OptIcon = ICON_MAP[k as IconKey];
const active = iconKey === k;
return (