Merge pull request #141 from langflow-ai/docling-settings
Update docling preset options
This commit is contained in:
commit
f54479cf48
8 changed files with 211 additions and 184 deletions
|
|
@ -11,7 +11,7 @@ const Switch = React.forwardRef<
|
||||||
>(({ className, ...props }, ref) => (
|
>(({ className, ...props }, ref) => (
|
||||||
<SwitchPrimitives.Root
|
<SwitchPrimitives.Root
|
||||||
className={cn(
|
className={cn(
|
||||||
"peer inline-flex h-6 w-11 shrink-0 cursor-pointer items-center rounded-full border-2 border-transparent transition-colors focus-visible:outline-none focus-visible:ring-2 focus-visible:ring-ring focus-visible:ring-offset-2 focus-visible:ring-offset-background disabled:cursor-not-allowed disabled:opacity-50 data-[state=checked]:bg-primary data-[state=unchecked]:bg-input",
|
"peer inline-flex h-6 w-11 shrink-0 cursor-pointer items-center rounded-full border-2 border-transparent transition-colors focus-visible:outline-none focus-visible:ring-2 focus-visible:ring-ring focus-visible:ring-offset-2 focus-visible:ring-offset-background disabled:cursor-not-allowed disabled:opacity-50 data-[state=checked]:bg-primary data-[state=unchecked]:bg-muted",
|
||||||
className
|
className
|
||||||
)}
|
)}
|
||||||
{...props}
|
{...props}
|
||||||
|
|
@ -19,7 +19,7 @@ const Switch = React.forwardRef<
|
||||||
>
|
>
|
||||||
<SwitchPrimitives.Thumb
|
<SwitchPrimitives.Thumb
|
||||||
className={cn(
|
className={cn(
|
||||||
"pointer-events-none block h-5 w-5 rounded-full bg-background shadow-lg ring-0 transition-transform data-[state=checked]:translate-x-5 data-[state=unchecked]:translate-x-0"
|
"pointer-events-none block h-5 w-5 rounded-full bg-background shadow-lg ring-0 transition-transform data-[state=checked]:translate-x-5 data-[state=unchecked]:translate-x-0 data-[state=unchecked]:bg-primary"
|
||||||
)}
|
)}
|
||||||
/>
|
/>
|
||||||
</SwitchPrimitives.Root>
|
</SwitchPrimitives.Root>
|
||||||
|
|
|
||||||
|
|
@ -8,7 +8,9 @@ interface UpdateFlowSettingVariables {
|
||||||
llm_model?: string;
|
llm_model?: string;
|
||||||
system_prompt?: string;
|
system_prompt?: string;
|
||||||
embedding_model?: string;
|
embedding_model?: string;
|
||||||
doclingPresets?: string;
|
table_structure?: boolean;
|
||||||
|
ocr?: boolean;
|
||||||
|
picture_descriptions?: boolean;
|
||||||
chunk_size?: number;
|
chunk_size?: number;
|
||||||
chunk_overlap?: number;
|
chunk_overlap?: number;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -13,7 +13,9 @@ export interface KnowledgeSettings {
|
||||||
embedding_model?: string;
|
embedding_model?: string;
|
||||||
chunk_size?: number;
|
chunk_size?: number;
|
||||||
chunk_overlap?: number;
|
chunk_overlap?: number;
|
||||||
doclingPresets?: string;
|
table_structure?: boolean;
|
||||||
|
ocr?: boolean;
|
||||||
|
picture_descriptions?: boolean;
|
||||||
}
|
}
|
||||||
|
|
||||||
export interface Settings {
|
export interface Settings {
|
||||||
|
|
|
||||||
|
|
@ -22,9 +22,9 @@ import {
|
||||||
CardTitle,
|
CardTitle,
|
||||||
} from "@/components/ui/card";
|
} from "@/components/ui/card";
|
||||||
import { Checkbox } from "@/components/ui/checkbox";
|
import { Checkbox } from "@/components/ui/checkbox";
|
||||||
|
import { Switch } from "@/components/ui/switch";
|
||||||
import { Input } from "@/components/ui/input";
|
import { Input } from "@/components/ui/input";
|
||||||
import { Label } from "@/components/ui/label";
|
import { Label } from "@/components/ui/label";
|
||||||
import { RadioGroup, RadioGroupItem } from "@/components/ui/radio-group";
|
|
||||||
import {
|
import {
|
||||||
Select,
|
Select,
|
||||||
SelectContent,
|
SelectContent,
|
||||||
|
|
@ -39,11 +39,6 @@ import { DEFAULT_AGENT_SETTINGS, DEFAULT_KNOWLEDGE_SETTINGS, UI_CONSTANTS } from
|
||||||
import { getFallbackModels, type ModelProvider } from "./helpers/model-helpers";
|
import { getFallbackModels, type ModelProvider } from "./helpers/model-helpers";
|
||||||
import { ModelSelectItems } from "./helpers/model-select-item";
|
import { ModelSelectItems } from "./helpers/model-select-item";
|
||||||
import { LabelWrapper } from "@/components/label-wrapper";
|
import { LabelWrapper } from "@/components/label-wrapper";
|
||||||
import {
|
|
||||||
Tooltip,
|
|
||||||
TooltipContent,
|
|
||||||
TooltipTrigger,
|
|
||||||
} from "@radix-ui/react-tooltip";
|
|
||||||
|
|
||||||
const { MAX_SYSTEM_PROMPT_CHARS } = UI_CONSTANTS;
|
const { MAX_SYSTEM_PROMPT_CHARS } = UI_CONSTANTS;
|
||||||
|
|
||||||
|
|
@ -112,7 +107,9 @@ function KnowledgeSourcesPage() {
|
||||||
const [systemPrompt, setSystemPrompt] = useState<string>("");
|
const [systemPrompt, setSystemPrompt] = useState<string>("");
|
||||||
const [chunkSize, setChunkSize] = useState<number>(1024);
|
const [chunkSize, setChunkSize] = useState<number>(1024);
|
||||||
const [chunkOverlap, setChunkOverlap] = useState<number>(50);
|
const [chunkOverlap, setChunkOverlap] = useState<number>(50);
|
||||||
const [processingMode, setProcessingMode] = useState<string>("standard");
|
const [tableStructure, setTableStructure] = useState<boolean>(false);
|
||||||
|
const [ocr, setOcr] = useState<boolean>(false);
|
||||||
|
const [pictureDescriptions, setPictureDescriptions] = useState<boolean>(false);
|
||||||
|
|
||||||
// Fetch settings using React Query
|
// Fetch settings using React Query
|
||||||
const { data: settings = {} } = useGetSettingsQuery({
|
const { data: settings = {} } = useGetSettingsQuery({
|
||||||
|
|
@ -195,12 +192,24 @@ function KnowledgeSourcesPage() {
|
||||||
}
|
}
|
||||||
}, [settings.knowledge?.chunk_overlap]);
|
}, [settings.knowledge?.chunk_overlap]);
|
||||||
|
|
||||||
// Sync processing mode with settings data
|
// Sync docling settings with settings data
|
||||||
useEffect(() => {
|
useEffect(() => {
|
||||||
if (settings.knowledge?.doclingPresets) {
|
if (settings.knowledge?.table_structure !== undefined) {
|
||||||
setProcessingMode(settings.knowledge.doclingPresets);
|
setTableStructure(settings.knowledge.table_structure);
|
||||||
}
|
}
|
||||||
}, [settings.knowledge?.doclingPresets]);
|
}, [settings.knowledge?.table_structure]);
|
||||||
|
|
||||||
|
useEffect(() => {
|
||||||
|
if (settings.knowledge?.ocr !== undefined) {
|
||||||
|
setOcr(settings.knowledge.ocr);
|
||||||
|
}
|
||||||
|
}, [settings.knowledge?.ocr]);
|
||||||
|
|
||||||
|
useEffect(() => {
|
||||||
|
if (settings.knowledge?.picture_descriptions !== undefined) {
|
||||||
|
setPictureDescriptions(settings.knowledge.picture_descriptions);
|
||||||
|
}
|
||||||
|
}, [settings.knowledge?.picture_descriptions]);
|
||||||
|
|
||||||
// Update model selection immediately
|
// Update model selection immediately
|
||||||
const handleModelChange = (newModel: string) => {
|
const handleModelChange = (newModel: string) => {
|
||||||
|
|
@ -231,11 +240,20 @@ function KnowledgeSourcesPage() {
|
||||||
debouncedUpdate({ chunk_overlap: numValue });
|
debouncedUpdate({ chunk_overlap: numValue });
|
||||||
};
|
};
|
||||||
|
|
||||||
// Update processing mode
|
// Update docling settings
|
||||||
const handleProcessingModeChange = (mode: string) => {
|
const handleTableStructureChange = (checked: boolean) => {
|
||||||
setProcessingMode(mode);
|
setTableStructure(checked);
|
||||||
// Update the configuration setting (backend will also update the flow automatically)
|
updateFlowSettingMutation.mutate({ table_structure: checked });
|
||||||
debouncedUpdate({ doclingPresets: mode });
|
};
|
||||||
|
|
||||||
|
const handleOcrChange = (checked: boolean) => {
|
||||||
|
setOcr(checked);
|
||||||
|
updateFlowSettingMutation.mutate({ ocr: checked });
|
||||||
|
};
|
||||||
|
|
||||||
|
const handlePictureDescriptionsChange = (checked: boolean) => {
|
||||||
|
setPictureDescriptions(checked);
|
||||||
|
updateFlowSettingMutation.mutate({ picture_descriptions: checked });
|
||||||
};
|
};
|
||||||
|
|
||||||
// Helper function to get connector icon
|
// Helper function to get connector icon
|
||||||
|
|
@ -569,7 +587,9 @@ function KnowledgeSourcesPage() {
|
||||||
// Only reset form values if the API call was successful
|
// Only reset form values if the API call was successful
|
||||||
setChunkSize(DEFAULT_KNOWLEDGE_SETTINGS.chunk_size);
|
setChunkSize(DEFAULT_KNOWLEDGE_SETTINGS.chunk_size);
|
||||||
setChunkOverlap(DEFAULT_KNOWLEDGE_SETTINGS.chunk_overlap);
|
setChunkOverlap(DEFAULT_KNOWLEDGE_SETTINGS.chunk_overlap);
|
||||||
setProcessingMode(DEFAULT_KNOWLEDGE_SETTINGS.processing_mode);
|
setTableStructure(false);
|
||||||
|
setOcr(false);
|
||||||
|
setPictureDescriptions(false);
|
||||||
closeDialog(); // Close after successful completion
|
closeDialog(); // Close after successful completion
|
||||||
})
|
})
|
||||||
.catch((error) => {
|
.catch((error) => {
|
||||||
|
|
@ -1063,76 +1083,61 @@ function KnowledgeSourcesPage() {
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
<div className="space-y-3">
|
<div className="">
|
||||||
<Label className="text-base font-medium">Ingestion presets</Label>
|
<div className="flex items-center justify-between py-3 border-b border-border">
|
||||||
<RadioGroup
|
<div className="flex-1">
|
||||||
value={processingMode}
|
<Label
|
||||||
onValueChange={handleProcessingModeChange}
|
htmlFor="table-structure"
|
||||||
className="space-y-3"
|
className="text-base font-medium cursor-pointer pb-3"
|
||||||
>
|
>
|
||||||
<div className="flex items-center space-x-3">
|
Table Structure
|
||||||
<RadioGroupItem value="standard" id="standard" />
|
</Label>
|
||||||
<div className="flex-1">
|
<div className="text-sm text-muted-foreground">
|
||||||
<Label
|
Capture table structure during ingest.
|
||||||
htmlFor="standard"
|
|
||||||
className="text-base font-medium cursor-pointer"
|
|
||||||
>
|
|
||||||
No OCR
|
|
||||||
</Label>
|
|
||||||
<div className="text-sm text-muted-foreground">
|
|
||||||
Fast ingest for documents with selectable text. Images are
|
|
||||||
ignored.
|
|
||||||
</div>
|
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
<div className="flex items-center space-x-3">
|
<Switch
|
||||||
<RadioGroupItem value="ocr" id="ocr" />
|
id="table-structure"
|
||||||
<div className="flex-1">
|
checked={tableStructure}
|
||||||
<Label
|
onCheckedChange={handleTableStructureChange}
|
||||||
htmlFor="ocr"
|
/>
|
||||||
className="text-base font-medium cursor-pointer"
|
</div>
|
||||||
>
|
<div className="flex items-center justify-between py-3 border-b border-border">
|
||||||
OCR
|
<div className="flex-1">
|
||||||
</Label>
|
<Label
|
||||||
<div className="text-sm text-muted-foreground">
|
htmlFor="ocr"
|
||||||
Extracts text from images and scanned pages.
|
className="text-base font-medium cursor-pointer pb-3"
|
||||||
</div>
|
>
|
||||||
|
OCR
|
||||||
|
</Label>
|
||||||
|
<div className="text-sm text-muted-foreground">
|
||||||
|
Extracts text from images/PDFs. Ingest is slower when enabled.
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
<div className="flex items-center space-x-3">
|
<Switch
|
||||||
<RadioGroupItem
|
id="ocr"
|
||||||
value="picture_description"
|
checked={ocr}
|
||||||
id="picture_description"
|
onCheckedChange={handleOcrChange}
|
||||||
/>
|
/>
|
||||||
<div className="flex-1">
|
</div>
|
||||||
<Label
|
<div className="flex items-center justify-between py-3">
|
||||||
htmlFor="picture_description"
|
<div className="flex-1">
|
||||||
className="text-base font-medium cursor-pointer"
|
<Label
|
||||||
>
|
htmlFor="picture-descriptions"
|
||||||
OCR + Captions
|
className="text-base font-medium cursor-pointer pb-3"
|
||||||
</Label>
|
>
|
||||||
<div className="text-sm text-muted-foreground">
|
Picture Descriptions
|
||||||
Extracts text from images and scanned pages. Generates
|
</Label>
|
||||||
short image captions.
|
<div className="text-sm text-muted-foreground">
|
||||||
</div>
|
Adds captions for images. Ingest is slower when enabled.
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
<div className="flex items-center space-x-3">
|
<Switch
|
||||||
<RadioGroupItem value="VLM" id="VLM" />
|
id="picture-descriptions"
|
||||||
<div className="flex-1">
|
checked={pictureDescriptions}
|
||||||
<Label
|
onCheckedChange={handlePictureDescriptionsChange}
|
||||||
htmlFor="VLM"
|
/>
|
||||||
className="text-base font-medium cursor-pointer"
|
</div>
|
||||||
>
|
|
||||||
VLM
|
|
||||||
</Label>
|
|
||||||
<div className="text-sm text-muted-foreground">
|
|
||||||
Extracts text from layout-aware parsing of text, tables,
|
|
||||||
and sections.
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
</RadioGroup>
|
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
</CardContent>
|
</CardContent>
|
||||||
|
|
|
||||||
|
|
@ -1,29 +0,0 @@
|
||||||
"use client"
|
|
||||||
|
|
||||||
import * as React from "react"
|
|
||||||
import * as SwitchPrimitives from "@radix-ui/react-switch"
|
|
||||||
|
|
||||||
import { cn } from "@/lib/utils"
|
|
||||||
|
|
||||||
const Switch = React.forwardRef<
|
|
||||||
React.ElementRef<typeof SwitchPrimitives.Root>,
|
|
||||||
React.ComponentPropsWithoutRef<typeof SwitchPrimitives.Root>
|
|
||||||
>(({ className, ...props }, ref) => (
|
|
||||||
<SwitchPrimitives.Root
|
|
||||||
className={cn(
|
|
||||||
"peer inline-flex h-6 w-11 shrink-0 cursor-pointer items-center rounded-full border-2 border-transparent transition-colors focus-visible:outline-none focus-visible:ring-2 focus-visible:ring-ring focus-visible:ring-offset-2 focus-visible:ring-offset-background disabled:cursor-not-allowed disabled:opacity-50 data-[state=checked]:bg-primary data-[state=unchecked]:bg-input",
|
|
||||||
className
|
|
||||||
)}
|
|
||||||
{...props}
|
|
||||||
ref={ref}
|
|
||||||
>
|
|
||||||
<SwitchPrimitives.Thumb
|
|
||||||
className={cn(
|
|
||||||
"pointer-events-none block h-5 w-5 rounded-full bg-background shadow-lg ring-0 transition-transform data-[state=checked]:translate-x-5 data-[state=unchecked]:translate-x-0"
|
|
||||||
)}
|
|
||||||
/>
|
|
||||||
</SwitchPrimitives.Root>
|
|
||||||
))
|
|
||||||
Switch.displayName = SwitchPrimitives.Root.displayName
|
|
||||||
|
|
||||||
export { Switch }
|
|
||||||
|
|
@ -12,7 +12,9 @@ export const DEFAULT_AGENT_SETTINGS = {
|
||||||
export const DEFAULT_KNOWLEDGE_SETTINGS = {
|
export const DEFAULT_KNOWLEDGE_SETTINGS = {
|
||||||
chunk_size: 1000,
|
chunk_size: 1000,
|
||||||
chunk_overlap: 200,
|
chunk_overlap: 200,
|
||||||
processing_mode: "standard"
|
table_structure: false,
|
||||||
|
ocr: false,
|
||||||
|
picture_descriptions: false
|
||||||
} as const;
|
} as const;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
|
||||||
|
|
@ -17,35 +17,30 @@ logger = get_logger(__name__)
|
||||||
|
|
||||||
|
|
||||||
# Docling preset configurations
|
# Docling preset configurations
|
||||||
def get_docling_preset_configs():
|
def get_docling_preset_configs(table_structure=False, ocr=False, picture_descriptions=False):
|
||||||
"""Get docling preset configurations with platform-specific settings"""
|
"""Get docling preset configurations based on toggle settings
|
||||||
|
|
||||||
|
Args:
|
||||||
|
table_structure: Enable table structure parsing (default: False)
|
||||||
|
ocr: Enable OCR for text extraction from images (default: False)
|
||||||
|
picture_descriptions: Enable picture descriptions/captions (default: False)
|
||||||
|
"""
|
||||||
is_macos = platform.system() == "Darwin"
|
is_macos = platform.system() == "Darwin"
|
||||||
|
|
||||||
return {
|
config = {
|
||||||
"standard": {"do_ocr": False},
|
"do_ocr": ocr,
|
||||||
"ocr": {"do_ocr": True, "ocr_engine": "ocrmac" if is_macos else "easyocr"},
|
"ocr_engine": "ocrmac" if is_macos else "easyocr",
|
||||||
"picture_description": {
|
"do_table_structure": table_structure,
|
||||||
"do_ocr": True,
|
"do_picture_classification": picture_descriptions,
|
||||||
"ocr_engine": "ocrmac" if is_macos else "easyocr",
|
"do_picture_description": picture_descriptions,
|
||||||
"do_picture_classification": True,
|
"picture_description_local": {
|
||||||
"do_picture_description": True,
|
"repo_id": "HuggingFaceTB/SmolVLM-256M-Instruct",
|
||||||
"picture_description_local": {
|
"prompt": "Describe this image in a few sentences.",
|
||||||
"repo_id": "HuggingFaceTB/SmolVLM-256M-Instruct",
|
}
|
||||||
"prompt": "Describe this image in a few sentences.",
|
|
||||||
},
|
|
||||||
},
|
|
||||||
"VLM": {
|
|
||||||
"pipeline": "vlm",
|
|
||||||
"vlm_pipeline_model_local": {
|
|
||||||
"repo_id": "ds4sd/SmolDocling-256M-preview-mlx-bf16"
|
|
||||||
if is_macos
|
|
||||||
else "ds4sd/SmolDocling-256M-preview",
|
|
||||||
"response_format": "doctags",
|
|
||||||
"inference_framework": "mlx",
|
|
||||||
},
|
|
||||||
},
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
return config
|
||||||
|
|
||||||
|
|
||||||
async def get_settings(request, session_manager):
|
async def get_settings(request, session_manager):
|
||||||
"""Get application settings"""
|
"""Get application settings"""
|
||||||
|
|
@ -71,7 +66,9 @@ async def get_settings(request, session_manager):
|
||||||
"embedding_model": knowledge_config.embedding_model,
|
"embedding_model": knowledge_config.embedding_model,
|
||||||
"chunk_size": knowledge_config.chunk_size,
|
"chunk_size": knowledge_config.chunk_size,
|
||||||
"chunk_overlap": knowledge_config.chunk_overlap,
|
"chunk_overlap": knowledge_config.chunk_overlap,
|
||||||
"doclingPresets": knowledge_config.doclingPresets,
|
"table_structure": knowledge_config.table_structure,
|
||||||
|
"ocr": knowledge_config.ocr,
|
||||||
|
"picture_descriptions": knowledge_config.picture_descriptions,
|
||||||
},
|
},
|
||||||
"agent": {
|
"agent": {
|
||||||
"llm_model": agent_config.llm_model,
|
"llm_model": agent_config.llm_model,
|
||||||
|
|
@ -178,7 +175,9 @@ async def update_settings(request, session_manager):
|
||||||
"system_prompt",
|
"system_prompt",
|
||||||
"chunk_size",
|
"chunk_size",
|
||||||
"chunk_overlap",
|
"chunk_overlap",
|
||||||
"doclingPresets",
|
"table_structure",
|
||||||
|
"ocr",
|
||||||
|
"picture_descriptions",
|
||||||
"embedding_model",
|
"embedding_model",
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -255,32 +254,68 @@ async def update_settings(request, session_manager):
|
||||||
# Don't fail the entire settings update if flow update fails
|
# Don't fail the entire settings update if flow update fails
|
||||||
# The config will still be saved
|
# The config will still be saved
|
||||||
|
|
||||||
if "doclingPresets" in body:
|
if "table_structure" in body:
|
||||||
preset_configs = get_docling_preset_configs()
|
if not isinstance(body["table_structure"], bool):
|
||||||
valid_presets = list(preset_configs.keys())
|
|
||||||
if body["doclingPresets"] not in valid_presets:
|
|
||||||
return JSONResponse(
|
return JSONResponse(
|
||||||
{
|
{"error": "table_structure must be a boolean"}, status_code=400
|
||||||
"error": f"doclingPresets must be one of: {', '.join(valid_presets)}"
|
|
||||||
},
|
|
||||||
status_code=400,
|
|
||||||
)
|
)
|
||||||
current_config.knowledge.doclingPresets = body["doclingPresets"]
|
current_config.knowledge.table_structure = body["table_structure"]
|
||||||
config_updated = True
|
config_updated = True
|
||||||
|
|
||||||
# Also update the flow with the new docling preset
|
# Also update the flow with the new docling settings
|
||||||
try:
|
try:
|
||||||
flows_service = _get_flows_service()
|
flows_service = _get_flows_service()
|
||||||
await flows_service.update_flow_docling_preset(
|
preset_config = get_docling_preset_configs(
|
||||||
body["doclingPresets"], preset_configs[body["doclingPresets"]]
|
table_structure=body["table_structure"],
|
||||||
)
|
ocr=current_config.knowledge.ocr,
|
||||||
logger.info(
|
picture_descriptions=current_config.knowledge.picture_descriptions
|
||||||
f"Successfully updated docling preset in flow to '{body['doclingPresets']}'"
|
|
||||||
)
|
)
|
||||||
|
await flows_service.update_flow_docling_preset("custom", preset_config)
|
||||||
|
logger.info(f"Successfully updated table_structure setting in flow")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Failed to update docling preset in flow: {str(e)}")
|
logger.error(f"Failed to update docling settings in flow: {str(e)}")
|
||||||
# Don't fail the entire settings update if flow update fails
|
|
||||||
# The config will still be saved
|
if "ocr" in body:
|
||||||
|
if not isinstance(body["ocr"], bool):
|
||||||
|
return JSONResponse(
|
||||||
|
{"error": "ocr must be a boolean"}, status_code=400
|
||||||
|
)
|
||||||
|
current_config.knowledge.ocr = body["ocr"]
|
||||||
|
config_updated = True
|
||||||
|
|
||||||
|
# Also update the flow with the new docling settings
|
||||||
|
try:
|
||||||
|
flows_service = _get_flows_service()
|
||||||
|
preset_config = get_docling_preset_configs(
|
||||||
|
table_structure=current_config.knowledge.table_structure,
|
||||||
|
ocr=body["ocr"],
|
||||||
|
picture_descriptions=current_config.knowledge.picture_descriptions
|
||||||
|
)
|
||||||
|
await flows_service.update_flow_docling_preset("custom", preset_config)
|
||||||
|
logger.info(f"Successfully updated ocr setting in flow")
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Failed to update docling settings in flow: {str(e)}")
|
||||||
|
|
||||||
|
if "picture_descriptions" in body:
|
||||||
|
if not isinstance(body["picture_descriptions"], bool):
|
||||||
|
return JSONResponse(
|
||||||
|
{"error": "picture_descriptions must be a boolean"}, status_code=400
|
||||||
|
)
|
||||||
|
current_config.knowledge.picture_descriptions = body["picture_descriptions"]
|
||||||
|
config_updated = True
|
||||||
|
|
||||||
|
# Also update the flow with the new docling settings
|
||||||
|
try:
|
||||||
|
flows_service = _get_flows_service()
|
||||||
|
preset_config = get_docling_preset_configs(
|
||||||
|
table_structure=current_config.knowledge.table_structure,
|
||||||
|
ocr=current_config.knowledge.ocr,
|
||||||
|
picture_descriptions=body["picture_descriptions"]
|
||||||
|
)
|
||||||
|
await flows_service.update_flow_docling_preset("custom", preset_config)
|
||||||
|
logger.info(f"Successfully updated picture_descriptions setting in flow")
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Failed to update docling settings in flow: {str(e)}")
|
||||||
|
|
||||||
if "chunk_size" in body:
|
if "chunk_size" in body:
|
||||||
if not isinstance(body["chunk_size"], int) or body["chunk_size"] <= 0:
|
if not isinstance(body["chunk_size"], int) or body["chunk_size"] <= 0:
|
||||||
|
|
@ -624,48 +659,56 @@ def _get_flows_service():
|
||||||
|
|
||||||
|
|
||||||
async def update_docling_preset(request, session_manager):
|
async def update_docling_preset(request, session_manager):
|
||||||
"""Update docling preset in the ingest flow"""
|
"""Update docling settings in the ingest flow - deprecated endpoint, use /settings instead"""
|
||||||
try:
|
try:
|
||||||
# Parse request body
|
# Parse request body
|
||||||
body = await request.json()
|
body = await request.json()
|
||||||
|
|
||||||
# Validate preset parameter
|
# Support old preset-based API for backwards compatibility
|
||||||
if "preset" not in body:
|
if "preset" in body:
|
||||||
return JSONResponse(
|
# Map old presets to new toggle settings
|
||||||
{"error": "preset parameter is required"}, status_code=400
|
preset_map = {
|
||||||
)
|
"standard": {"table_structure": False, "ocr": False, "picture_descriptions": False},
|
||||||
|
"ocr": {"table_structure": False, "ocr": True, "picture_descriptions": False},
|
||||||
|
"picture_description": {"table_structure": False, "ocr": True, "picture_descriptions": True},
|
||||||
|
"VLM": {"table_structure": False, "ocr": False, "picture_descriptions": False},
|
||||||
|
}
|
||||||
|
|
||||||
preset = body["preset"]
|
preset = body["preset"]
|
||||||
preset_configs = get_docling_preset_configs()
|
if preset not in preset_map:
|
||||||
|
return JSONResponse(
|
||||||
|
{"error": f"Invalid preset '{preset}'. Valid presets: {', '.join(preset_map.keys())}"},
|
||||||
|
status_code=400,
|
||||||
|
)
|
||||||
|
|
||||||
if preset not in preset_configs:
|
settings = preset_map[preset]
|
||||||
valid_presets = list(preset_configs.keys())
|
else:
|
||||||
return JSONResponse(
|
# Support new toggle-based API
|
||||||
{
|
settings = {
|
||||||
"error": f"Invalid preset '{preset}'. Valid presets: {', '.join(valid_presets)}"
|
"table_structure": body.get("table_structure", False),
|
||||||
},
|
"ocr": body.get("ocr", False),
|
||||||
status_code=400,
|
"picture_descriptions": body.get("picture_descriptions", False),
|
||||||
)
|
}
|
||||||
|
|
||||||
# Get the preset configuration
|
# Get the preset configuration
|
||||||
preset_config = preset_configs[preset]
|
preset_config = get_docling_preset_configs(**settings)
|
||||||
|
|
||||||
# Use the helper function to update the flow
|
# Use the helper function to update the flow
|
||||||
flows_service = _get_flows_service()
|
flows_service = _get_flows_service()
|
||||||
await flows_service.update_flow_docling_preset(preset, preset_config)
|
await flows_service.update_flow_docling_preset("custom", preset_config)
|
||||||
|
|
||||||
logger.info(f"Successfully updated docling preset to '{preset}' in ingest flow")
|
logger.info(f"Successfully updated docling settings in ingest flow")
|
||||||
|
|
||||||
return JSONResponse(
|
return JSONResponse(
|
||||||
{
|
{
|
||||||
"message": f"Successfully updated docling preset to '{preset}'",
|
"message": f"Successfully updated docling settings",
|
||||||
"preset": preset,
|
"settings": settings,
|
||||||
"preset_config": preset_config,
|
"preset_config": preset_config,
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error("Failed to update docling preset", error=str(e))
|
logger.error("Failed to update docling settings", error=str(e))
|
||||||
return JSONResponse(
|
return JSONResponse(
|
||||||
{"error": f"Failed to update docling preset: {str(e)}"}, status_code=500
|
{"error": f"Failed to update docling settings: {str(e)}"}, status_code=500
|
||||||
)
|
)
|
||||||
|
|
|
||||||
|
|
@ -27,7 +27,9 @@ class KnowledgeConfig:
|
||||||
embedding_model: str = "text-embedding-3-small"
|
embedding_model: str = "text-embedding-3-small"
|
||||||
chunk_size: int = 1000
|
chunk_size: int = 1000
|
||||||
chunk_overlap: int = 200
|
chunk_overlap: int = 200
|
||||||
doclingPresets: str = "standard"
|
table_structure: bool = False
|
||||||
|
ocr: bool = False
|
||||||
|
picture_descriptions: bool = False
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue