Merge branch 'main' into feat/filters-design-sweep
This commit is contained in:
commit
f6ae18f2bd
12 changed files with 322 additions and 212 deletions
|
|
@ -138,7 +138,7 @@ podman machine start
|
|||
|
||||
### Common Issues
|
||||
|
||||
See common issues and fixes: [docs/reference/troubleshooting.mdx](docs/docs/reference/troubleshooting.mdx)
|
||||
See common issues and fixes: [docs/support/troubleshoot.mdx](docs/docs/reference/troubleshoot.mdx)
|
||||
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -1,24 +0,0 @@
|
|||
---
|
||||
title: Troubleshooting
|
||||
slug: /reference/troubleshooting
|
||||
---
|
||||
|
||||
# Troubleshooting
|
||||
|
||||
## Podman on macOS
|
||||
|
||||
If using Podman on macOS, you may need to increase VM memory:
|
||||
|
||||
```bash
|
||||
podman machine stop
|
||||
podman machine rm
|
||||
podman machine init --memory 8192 # 8 GB example
|
||||
podman machine start
|
||||
```
|
||||
|
||||
## Common Issues
|
||||
|
||||
1. OpenSearch fails to start: Check that `OPENSEARCH_PASSWORD` is set and meets requirements
|
||||
2. Langflow connection issues: Verify `LANGFLOW_SUPERUSER` credentials are correct
|
||||
3. Out of memory errors: Increase Docker memory allocation or use CPU-only mode
|
||||
4. Port conflicts: Ensure ports 3000, 7860, 8000, 9200, 5601 are available
|
||||
107
docs/docs/support/troubleshoot.mdx
Normal file
107
docs/docs/support/troubleshoot.mdx
Normal file
|
|
@ -0,0 +1,107 @@
|
|||
---
|
||||
title: Troubleshoot
|
||||
slug: /support/troubleshoot
|
||||
---
|
||||
|
||||
import Tabs from '@theme/Tabs';
|
||||
import TabItem from '@theme/TabItem';
|
||||
|
||||
This page provides troubleshooting advice for issues you might encounter when using OpenRAG or contributing to OpenRAG.
|
||||
|
||||
## OpenSearch fails to start
|
||||
|
||||
Check that `OPENSEARCH_PASSWORD` is set and meets requirements.
|
||||
The password must contain at least 8 characters, and must contain at least one uppercase letter, one lowercase letter, one digit, and one special character that is strong.
|
||||
|
||||
## Langflow connection issues
|
||||
|
||||
Verify the `LANGFLOW_SUPERUSER` credentials are correct.
|
||||
|
||||
## Memory errors
|
||||
|
||||
### Container out of memory errors
|
||||
|
||||
Increase Docker memory allocation or use [docker-compose-cpu.yml](https://github.com/langflow-ai/openrag/blob/main/docker-compose-cpu.yml) to deploy OpenRAG.
|
||||
|
||||
### Podman on macOS memory issues
|
||||
|
||||
If you're using Podman on macOS, you may need to increase VM memory on your Podman machine.
|
||||
This example increases the machine size to 8 GB of RAM, which should be sufficient to run OpenRAG.
|
||||
```bash
|
||||
podman machine stop
|
||||
podman machine rm
|
||||
podman machine init --memory 8192 # 8 GB example
|
||||
podman machine start
|
||||
```
|
||||
|
||||
## Port conflicts
|
||||
|
||||
Ensure ports 3000, 7860, 8000, 9200, 5601 are available.
|
||||
|
||||
## Langflow container already exists
|
||||
|
||||
If you are running other versions of Langflow containers on your machine, you may encounter an issue where Docker or Podman thinks Langflow is already up.
|
||||
|
||||
Remove just the problem container, or clean up all containers and start fresh.
|
||||
|
||||
To reset your local containers and pull new images, do the following:
|
||||
|
||||
1. Stop your containers and completely remove them.
|
||||
|
||||
<Tabs groupId="Container software">
|
||||
<TabItem value="Docker" label="Docker" default>
|
||||
|
||||
```bash
|
||||
# Stop all running containers
|
||||
docker stop $(docker ps -q)
|
||||
|
||||
# Remove all containers (including stopped ones)
|
||||
docker rm --force $(docker ps -aq)
|
||||
|
||||
# Remove all images
|
||||
docker rmi --force $(docker images -q)
|
||||
|
||||
# Remove all volumes
|
||||
docker volume prune --force
|
||||
|
||||
# Remove all networks (except default)
|
||||
docker network prune --force
|
||||
|
||||
# Clean up any leftover data
|
||||
docker system prune --all --force --volumes
|
||||
```
|
||||
|
||||
</TabItem>
|
||||
<TabItem value="Podman" label="Podman">
|
||||
|
||||
```bash
|
||||
# Stop all running containers
|
||||
podman stop --all
|
||||
|
||||
# Remove all containers (including stopped ones)
|
||||
podman rm --all --force
|
||||
|
||||
# Remove all images
|
||||
podman rmi --all --force
|
||||
|
||||
# Remove all volumes
|
||||
podman volume prune --force
|
||||
|
||||
# Remove all networks (except default)
|
||||
podman network prune --force
|
||||
|
||||
# Clean up any leftover data
|
||||
podman system prune --all --force --volumes
|
||||
```
|
||||
|
||||
</TabItem>
|
||||
</Tabs>
|
||||
|
||||
2. Restart OpenRAG and upgrade to get the latest images for your containers.
|
||||
```bash
|
||||
uv run openrag
|
||||
```
|
||||
|
||||
3. In the OpenRAG TUI, click **Status**, and then click **Upgrade**.
|
||||
When the **Close** button is active, the upgrade is complete.
|
||||
Close the window and open the OpenRAG appplication.
|
||||
|
|
@ -76,12 +76,12 @@ const sidebars = {
|
|||
},
|
||||
{
|
||||
type: "category",
|
||||
label: "Reference",
|
||||
label: "Support",
|
||||
items: [
|
||||
{
|
||||
type: "doc",
|
||||
id: "reference/troubleshooting",
|
||||
label: "Troubleshooting"
|
||||
id: "support/troubleshoot",
|
||||
label: "Troubleshoot"
|
||||
},
|
||||
],
|
||||
},
|
||||
|
|
|
|||
|
|
@ -11,7 +11,7 @@ const Switch = React.forwardRef<
|
|||
>(({ className, ...props }, ref) => (
|
||||
<SwitchPrimitives.Root
|
||||
className={cn(
|
||||
"peer inline-flex h-6 w-11 shrink-0 cursor-pointer items-center rounded-full border-2 border-transparent transition-colors focus-visible:outline-none focus-visible:ring-2 focus-visible:ring-ring focus-visible:ring-offset-2 focus-visible:ring-offset-background disabled:cursor-not-allowed disabled:opacity-50 data-[state=checked]:bg-primary data-[state=unchecked]:bg-input",
|
||||
"peer inline-flex h-6 w-11 shrink-0 cursor-pointer items-center rounded-full border-2 border-transparent transition-colors focus-visible:outline-none focus-visible:ring-2 focus-visible:ring-ring focus-visible:ring-offset-2 focus-visible:ring-offset-background disabled:cursor-not-allowed disabled:opacity-50 data-[state=checked]:bg-primary data-[state=unchecked]:bg-muted",
|
||||
className
|
||||
)}
|
||||
{...props}
|
||||
|
|
@ -19,7 +19,7 @@ const Switch = React.forwardRef<
|
|||
>
|
||||
<SwitchPrimitives.Thumb
|
||||
className={cn(
|
||||
"pointer-events-none block h-5 w-5 rounded-full bg-background shadow-lg ring-0 transition-transform data-[state=checked]:translate-x-5 data-[state=unchecked]:translate-x-0"
|
||||
"pointer-events-none block h-5 w-5 rounded-full bg-background shadow-lg ring-0 transition-transform data-[state=checked]:translate-x-5 data-[state=unchecked]:translate-x-0 data-[state=unchecked]:bg-primary"
|
||||
)}
|
||||
/>
|
||||
</SwitchPrimitives.Root>
|
||||
|
|
|
|||
|
|
@ -8,7 +8,9 @@ interface UpdateFlowSettingVariables {
|
|||
llm_model?: string;
|
||||
system_prompt?: string;
|
||||
embedding_model?: string;
|
||||
doclingPresets?: string;
|
||||
table_structure?: boolean;
|
||||
ocr?: boolean;
|
||||
picture_descriptions?: boolean;
|
||||
chunk_size?: number;
|
||||
chunk_overlap?: number;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -13,7 +13,9 @@ export interface KnowledgeSettings {
|
|||
embedding_model?: string;
|
||||
chunk_size?: number;
|
||||
chunk_overlap?: number;
|
||||
doclingPresets?: string;
|
||||
table_structure?: boolean;
|
||||
ocr?: boolean;
|
||||
picture_descriptions?: boolean;
|
||||
}
|
||||
|
||||
export interface Settings {
|
||||
|
|
|
|||
|
|
@ -22,9 +22,9 @@ import {
|
|||
CardTitle,
|
||||
} from "@/components/ui/card";
|
||||
import { Checkbox } from "@/components/ui/checkbox";
|
||||
import { Switch } from "@/components/ui/switch";
|
||||
import { Input } from "@/components/ui/input";
|
||||
import { Label } from "@/components/ui/label";
|
||||
import { RadioGroup, RadioGroupItem } from "@/components/ui/radio-group";
|
||||
import {
|
||||
Select,
|
||||
SelectContent,
|
||||
|
|
@ -39,11 +39,6 @@ import { DEFAULT_AGENT_SETTINGS, DEFAULT_KNOWLEDGE_SETTINGS, UI_CONSTANTS } from
|
|||
import { getFallbackModels, type ModelProvider } from "./helpers/model-helpers";
|
||||
import { ModelSelectItems } from "./helpers/model-select-item";
|
||||
import { LabelWrapper } from "@/components/label-wrapper";
|
||||
import {
|
||||
Tooltip,
|
||||
TooltipContent,
|
||||
TooltipTrigger,
|
||||
} from "@radix-ui/react-tooltip";
|
||||
|
||||
const { MAX_SYSTEM_PROMPT_CHARS } = UI_CONSTANTS;
|
||||
|
||||
|
|
@ -112,7 +107,9 @@ function KnowledgeSourcesPage() {
|
|||
const [systemPrompt, setSystemPrompt] = useState<string>("");
|
||||
const [chunkSize, setChunkSize] = useState<number>(1024);
|
||||
const [chunkOverlap, setChunkOverlap] = useState<number>(50);
|
||||
const [processingMode, setProcessingMode] = useState<string>("standard");
|
||||
const [tableStructure, setTableStructure] = useState<boolean>(false);
|
||||
const [ocr, setOcr] = useState<boolean>(false);
|
||||
const [pictureDescriptions, setPictureDescriptions] = useState<boolean>(false);
|
||||
|
||||
// Fetch settings using React Query
|
||||
const { data: settings = {} } = useGetSettingsQuery({
|
||||
|
|
@ -195,12 +192,24 @@ function KnowledgeSourcesPage() {
|
|||
}
|
||||
}, [settings.knowledge?.chunk_overlap]);
|
||||
|
||||
// Sync processing mode with settings data
|
||||
// Sync docling settings with settings data
|
||||
useEffect(() => {
|
||||
if (settings.knowledge?.doclingPresets) {
|
||||
setProcessingMode(settings.knowledge.doclingPresets);
|
||||
if (settings.knowledge?.table_structure !== undefined) {
|
||||
setTableStructure(settings.knowledge.table_structure);
|
||||
}
|
||||
}, [settings.knowledge?.doclingPresets]);
|
||||
}, [settings.knowledge?.table_structure]);
|
||||
|
||||
useEffect(() => {
|
||||
if (settings.knowledge?.ocr !== undefined) {
|
||||
setOcr(settings.knowledge.ocr);
|
||||
}
|
||||
}, [settings.knowledge?.ocr]);
|
||||
|
||||
useEffect(() => {
|
||||
if (settings.knowledge?.picture_descriptions !== undefined) {
|
||||
setPictureDescriptions(settings.knowledge.picture_descriptions);
|
||||
}
|
||||
}, [settings.knowledge?.picture_descriptions]);
|
||||
|
||||
// Update model selection immediately
|
||||
const handleModelChange = (newModel: string) => {
|
||||
|
|
@ -231,11 +240,20 @@ function KnowledgeSourcesPage() {
|
|||
debouncedUpdate({ chunk_overlap: numValue });
|
||||
};
|
||||
|
||||
// Update processing mode
|
||||
const handleProcessingModeChange = (mode: string) => {
|
||||
setProcessingMode(mode);
|
||||
// Update the configuration setting (backend will also update the flow automatically)
|
||||
debouncedUpdate({ doclingPresets: mode });
|
||||
// Update docling settings
|
||||
const handleTableStructureChange = (checked: boolean) => {
|
||||
setTableStructure(checked);
|
||||
updateFlowSettingMutation.mutate({ table_structure: checked });
|
||||
};
|
||||
|
||||
const handleOcrChange = (checked: boolean) => {
|
||||
setOcr(checked);
|
||||
updateFlowSettingMutation.mutate({ ocr: checked });
|
||||
};
|
||||
|
||||
const handlePictureDescriptionsChange = (checked: boolean) => {
|
||||
setPictureDescriptions(checked);
|
||||
updateFlowSettingMutation.mutate({ picture_descriptions: checked });
|
||||
};
|
||||
|
||||
// Helper function to get connector icon
|
||||
|
|
@ -569,7 +587,9 @@ function KnowledgeSourcesPage() {
|
|||
// Only reset form values if the API call was successful
|
||||
setChunkSize(DEFAULT_KNOWLEDGE_SETTINGS.chunk_size);
|
||||
setChunkOverlap(DEFAULT_KNOWLEDGE_SETTINGS.chunk_overlap);
|
||||
setProcessingMode(DEFAULT_KNOWLEDGE_SETTINGS.processing_mode);
|
||||
setTableStructure(false);
|
||||
setOcr(false);
|
||||
setPictureDescriptions(false);
|
||||
closeDialog(); // Close after successful completion
|
||||
})
|
||||
.catch((error) => {
|
||||
|
|
@ -1063,76 +1083,61 @@ function KnowledgeSourcesPage() {
|
|||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<div className="space-y-3">
|
||||
<Label className="text-base font-medium">Ingestion presets</Label>
|
||||
<RadioGroup
|
||||
value={processingMode}
|
||||
onValueChange={handleProcessingModeChange}
|
||||
className="space-y-3"
|
||||
>
|
||||
<div className="flex items-center space-x-3">
|
||||
<RadioGroupItem value="standard" id="standard" />
|
||||
<div className="flex-1">
|
||||
<Label
|
||||
htmlFor="standard"
|
||||
className="text-base font-medium cursor-pointer"
|
||||
>
|
||||
No OCR
|
||||
</Label>
|
||||
<div className="text-sm text-muted-foreground">
|
||||
Fast ingest for documents with selectable text. Images are
|
||||
ignored.
|
||||
</div>
|
||||
<div className="">
|
||||
<div className="flex items-center justify-between py-3 border-b border-border">
|
||||
<div className="flex-1">
|
||||
<Label
|
||||
htmlFor="table-structure"
|
||||
className="text-base font-medium cursor-pointer pb-3"
|
||||
>
|
||||
Table Structure
|
||||
</Label>
|
||||
<div className="text-sm text-muted-foreground">
|
||||
Capture table structure during ingest.
|
||||
</div>
|
||||
</div>
|
||||
<div className="flex items-center space-x-3">
|
||||
<RadioGroupItem value="ocr" id="ocr" />
|
||||
<div className="flex-1">
|
||||
<Label
|
||||
htmlFor="ocr"
|
||||
className="text-base font-medium cursor-pointer"
|
||||
>
|
||||
OCR
|
||||
</Label>
|
||||
<div className="text-sm text-muted-foreground">
|
||||
Extracts text from images and scanned pages.
|
||||
</div>
|
||||
<Switch
|
||||
id="table-structure"
|
||||
checked={tableStructure}
|
||||
onCheckedChange={handleTableStructureChange}
|
||||
/>
|
||||
</div>
|
||||
<div className="flex items-center justify-between py-3 border-b border-border">
|
||||
<div className="flex-1">
|
||||
<Label
|
||||
htmlFor="ocr"
|
||||
className="text-base font-medium cursor-pointer pb-3"
|
||||
>
|
||||
OCR
|
||||
</Label>
|
||||
<div className="text-sm text-muted-foreground">
|
||||
Extracts text from images/PDFs. Ingest is slower when enabled.
|
||||
</div>
|
||||
</div>
|
||||
<div className="flex items-center space-x-3">
|
||||
<RadioGroupItem
|
||||
value="picture_description"
|
||||
id="picture_description"
|
||||
/>
|
||||
<div className="flex-1">
|
||||
<Label
|
||||
htmlFor="picture_description"
|
||||
className="text-base font-medium cursor-pointer"
|
||||
>
|
||||
OCR + Captions
|
||||
</Label>
|
||||
<div className="text-sm text-muted-foreground">
|
||||
Extracts text from images and scanned pages. Generates
|
||||
short image captions.
|
||||
</div>
|
||||
<Switch
|
||||
id="ocr"
|
||||
checked={ocr}
|
||||
onCheckedChange={handleOcrChange}
|
||||
/>
|
||||
</div>
|
||||
<div className="flex items-center justify-between py-3">
|
||||
<div className="flex-1">
|
||||
<Label
|
||||
htmlFor="picture-descriptions"
|
||||
className="text-base font-medium cursor-pointer pb-3"
|
||||
>
|
||||
Picture Descriptions
|
||||
</Label>
|
||||
<div className="text-sm text-muted-foreground">
|
||||
Adds captions for images. Ingest is slower when enabled.
|
||||
</div>
|
||||
</div>
|
||||
<div className="flex items-center space-x-3">
|
||||
<RadioGroupItem value="VLM" id="VLM" />
|
||||
<div className="flex-1">
|
||||
<Label
|
||||
htmlFor="VLM"
|
||||
className="text-base font-medium cursor-pointer"
|
||||
>
|
||||
VLM
|
||||
</Label>
|
||||
<div className="text-sm text-muted-foreground">
|
||||
Extracts text from layout-aware parsing of text, tables,
|
||||
and sections.
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</RadioGroup>
|
||||
<Switch
|
||||
id="picture-descriptions"
|
||||
checked={pictureDescriptions}
|
||||
onCheckedChange={handlePictureDescriptionsChange}
|
||||
/>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</CardContent>
|
||||
|
|
|
|||
|
|
@ -1,29 +0,0 @@
|
|||
"use client"
|
||||
|
||||
import * as React from "react"
|
||||
import * as SwitchPrimitives from "@radix-ui/react-switch"
|
||||
|
||||
import { cn } from "@/lib/utils"
|
||||
|
||||
const Switch = React.forwardRef<
|
||||
React.ElementRef<typeof SwitchPrimitives.Root>,
|
||||
React.ComponentPropsWithoutRef<typeof SwitchPrimitives.Root>
|
||||
>(({ className, ...props }, ref) => (
|
||||
<SwitchPrimitives.Root
|
||||
className={cn(
|
||||
"peer inline-flex h-6 w-11 shrink-0 cursor-pointer items-center rounded-full border-2 border-transparent transition-colors focus-visible:outline-none focus-visible:ring-2 focus-visible:ring-ring focus-visible:ring-offset-2 focus-visible:ring-offset-background disabled:cursor-not-allowed disabled:opacity-50 data-[state=checked]:bg-primary data-[state=unchecked]:bg-input",
|
||||
className
|
||||
)}
|
||||
{...props}
|
||||
ref={ref}
|
||||
>
|
||||
<SwitchPrimitives.Thumb
|
||||
className={cn(
|
||||
"pointer-events-none block h-5 w-5 rounded-full bg-background shadow-lg ring-0 transition-transform data-[state=checked]:translate-x-5 data-[state=unchecked]:translate-x-0"
|
||||
)}
|
||||
/>
|
||||
</SwitchPrimitives.Root>
|
||||
))
|
||||
Switch.displayName = SwitchPrimitives.Root.displayName
|
||||
|
||||
export { Switch }
|
||||
|
|
@ -12,7 +12,9 @@ export const DEFAULT_AGENT_SETTINGS = {
|
|||
export const DEFAULT_KNOWLEDGE_SETTINGS = {
|
||||
chunk_size: 1000,
|
||||
chunk_overlap: 200,
|
||||
processing_mode: "standard"
|
||||
table_structure: false,
|
||||
ocr: false,
|
||||
picture_descriptions: false
|
||||
} as const;
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -17,35 +17,30 @@ logger = get_logger(__name__)
|
|||
|
||||
|
||||
# Docling preset configurations
|
||||
def get_docling_preset_configs():
|
||||
"""Get docling preset configurations with platform-specific settings"""
|
||||
def get_docling_preset_configs(table_structure=False, ocr=False, picture_descriptions=False):
|
||||
"""Get docling preset configurations based on toggle settings
|
||||
|
||||
Args:
|
||||
table_structure: Enable table structure parsing (default: False)
|
||||
ocr: Enable OCR for text extraction from images (default: False)
|
||||
picture_descriptions: Enable picture descriptions/captions (default: False)
|
||||
"""
|
||||
is_macos = platform.system() == "Darwin"
|
||||
|
||||
return {
|
||||
"standard": {"do_ocr": False},
|
||||
"ocr": {"do_ocr": True, "ocr_engine": "ocrmac" if is_macos else "easyocr"},
|
||||
"picture_description": {
|
||||
"do_ocr": True,
|
||||
"ocr_engine": "ocrmac" if is_macos else "easyocr",
|
||||
"do_picture_classification": True,
|
||||
"do_picture_description": True,
|
||||
"picture_description_local": {
|
||||
"repo_id": "HuggingFaceTB/SmolVLM-256M-Instruct",
|
||||
"prompt": "Describe this image in a few sentences.",
|
||||
},
|
||||
},
|
||||
"VLM": {
|
||||
"pipeline": "vlm",
|
||||
"vlm_pipeline_model_local": {
|
||||
"repo_id": "ds4sd/SmolDocling-256M-preview-mlx-bf16"
|
||||
if is_macos
|
||||
else "ds4sd/SmolDocling-256M-preview",
|
||||
"response_format": "doctags",
|
||||
"inference_framework": "mlx",
|
||||
},
|
||||
},
|
||||
config = {
|
||||
"do_ocr": ocr,
|
||||
"ocr_engine": "ocrmac" if is_macos else "easyocr",
|
||||
"do_table_structure": table_structure,
|
||||
"do_picture_classification": picture_descriptions,
|
||||
"do_picture_description": picture_descriptions,
|
||||
"picture_description_local": {
|
||||
"repo_id": "HuggingFaceTB/SmolVLM-256M-Instruct",
|
||||
"prompt": "Describe this image in a few sentences.",
|
||||
}
|
||||
}
|
||||
|
||||
return config
|
||||
|
||||
|
||||
async def get_settings(request, session_manager):
|
||||
"""Get application settings"""
|
||||
|
|
@ -71,7 +66,9 @@ async def get_settings(request, session_manager):
|
|||
"embedding_model": knowledge_config.embedding_model,
|
||||
"chunk_size": knowledge_config.chunk_size,
|
||||
"chunk_overlap": knowledge_config.chunk_overlap,
|
||||
"doclingPresets": knowledge_config.doclingPresets,
|
||||
"table_structure": knowledge_config.table_structure,
|
||||
"ocr": knowledge_config.ocr,
|
||||
"picture_descriptions": knowledge_config.picture_descriptions,
|
||||
},
|
||||
"agent": {
|
||||
"llm_model": agent_config.llm_model,
|
||||
|
|
@ -178,7 +175,9 @@ async def update_settings(request, session_manager):
|
|||
"system_prompt",
|
||||
"chunk_size",
|
||||
"chunk_overlap",
|
||||
"doclingPresets",
|
||||
"table_structure",
|
||||
"ocr",
|
||||
"picture_descriptions",
|
||||
"embedding_model",
|
||||
}
|
||||
|
||||
|
|
@ -255,32 +254,68 @@ async def update_settings(request, session_manager):
|
|||
# Don't fail the entire settings update if flow update fails
|
||||
# The config will still be saved
|
||||
|
||||
if "doclingPresets" in body:
|
||||
preset_configs = get_docling_preset_configs()
|
||||
valid_presets = list(preset_configs.keys())
|
||||
if body["doclingPresets"] not in valid_presets:
|
||||
if "table_structure" in body:
|
||||
if not isinstance(body["table_structure"], bool):
|
||||
return JSONResponse(
|
||||
{
|
||||
"error": f"doclingPresets must be one of: {', '.join(valid_presets)}"
|
||||
},
|
||||
status_code=400,
|
||||
{"error": "table_structure must be a boolean"}, status_code=400
|
||||
)
|
||||
current_config.knowledge.doclingPresets = body["doclingPresets"]
|
||||
current_config.knowledge.table_structure = body["table_structure"]
|
||||
config_updated = True
|
||||
|
||||
# Also update the flow with the new docling preset
|
||||
# Also update the flow with the new docling settings
|
||||
try:
|
||||
flows_service = _get_flows_service()
|
||||
await flows_service.update_flow_docling_preset(
|
||||
body["doclingPresets"], preset_configs[body["doclingPresets"]]
|
||||
)
|
||||
logger.info(
|
||||
f"Successfully updated docling preset in flow to '{body['doclingPresets']}'"
|
||||
preset_config = get_docling_preset_configs(
|
||||
table_structure=body["table_structure"],
|
||||
ocr=current_config.knowledge.ocr,
|
||||
picture_descriptions=current_config.knowledge.picture_descriptions
|
||||
)
|
||||
await flows_service.update_flow_docling_preset("custom", preset_config)
|
||||
logger.info(f"Successfully updated table_structure setting in flow")
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to update docling preset in flow: {str(e)}")
|
||||
# Don't fail the entire settings update if flow update fails
|
||||
# The config will still be saved
|
||||
logger.error(f"Failed to update docling settings in flow: {str(e)}")
|
||||
|
||||
if "ocr" in body:
|
||||
if not isinstance(body["ocr"], bool):
|
||||
return JSONResponse(
|
||||
{"error": "ocr must be a boolean"}, status_code=400
|
||||
)
|
||||
current_config.knowledge.ocr = body["ocr"]
|
||||
config_updated = True
|
||||
|
||||
# Also update the flow with the new docling settings
|
||||
try:
|
||||
flows_service = _get_flows_service()
|
||||
preset_config = get_docling_preset_configs(
|
||||
table_structure=current_config.knowledge.table_structure,
|
||||
ocr=body["ocr"],
|
||||
picture_descriptions=current_config.knowledge.picture_descriptions
|
||||
)
|
||||
await flows_service.update_flow_docling_preset("custom", preset_config)
|
||||
logger.info(f"Successfully updated ocr setting in flow")
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to update docling settings in flow: {str(e)}")
|
||||
|
||||
if "picture_descriptions" in body:
|
||||
if not isinstance(body["picture_descriptions"], bool):
|
||||
return JSONResponse(
|
||||
{"error": "picture_descriptions must be a boolean"}, status_code=400
|
||||
)
|
||||
current_config.knowledge.picture_descriptions = body["picture_descriptions"]
|
||||
config_updated = True
|
||||
|
||||
# Also update the flow with the new docling settings
|
||||
try:
|
||||
flows_service = _get_flows_service()
|
||||
preset_config = get_docling_preset_configs(
|
||||
table_structure=current_config.knowledge.table_structure,
|
||||
ocr=current_config.knowledge.ocr,
|
||||
picture_descriptions=body["picture_descriptions"]
|
||||
)
|
||||
await flows_service.update_flow_docling_preset("custom", preset_config)
|
||||
logger.info(f"Successfully updated picture_descriptions setting in flow")
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to update docling settings in flow: {str(e)}")
|
||||
|
||||
if "chunk_size" in body:
|
||||
if not isinstance(body["chunk_size"], int) or body["chunk_size"] <= 0:
|
||||
|
|
@ -624,48 +659,56 @@ def _get_flows_service():
|
|||
|
||||
|
||||
async def update_docling_preset(request, session_manager):
|
||||
"""Update docling preset in the ingest flow"""
|
||||
"""Update docling settings in the ingest flow - deprecated endpoint, use /settings instead"""
|
||||
try:
|
||||
# Parse request body
|
||||
body = await request.json()
|
||||
|
||||
# Validate preset parameter
|
||||
if "preset" not in body:
|
||||
return JSONResponse(
|
||||
{"error": "preset parameter is required"}, status_code=400
|
||||
)
|
||||
# Support old preset-based API for backwards compatibility
|
||||
if "preset" in body:
|
||||
# Map old presets to new toggle settings
|
||||
preset_map = {
|
||||
"standard": {"table_structure": False, "ocr": False, "picture_descriptions": False},
|
||||
"ocr": {"table_structure": False, "ocr": True, "picture_descriptions": False},
|
||||
"picture_description": {"table_structure": False, "ocr": True, "picture_descriptions": True},
|
||||
"VLM": {"table_structure": False, "ocr": False, "picture_descriptions": False},
|
||||
}
|
||||
|
||||
preset = body["preset"]
|
||||
preset_configs = get_docling_preset_configs()
|
||||
preset = body["preset"]
|
||||
if preset not in preset_map:
|
||||
return JSONResponse(
|
||||
{"error": f"Invalid preset '{preset}'. Valid presets: {', '.join(preset_map.keys())}"},
|
||||
status_code=400,
|
||||
)
|
||||
|
||||
if preset not in preset_configs:
|
||||
valid_presets = list(preset_configs.keys())
|
||||
return JSONResponse(
|
||||
{
|
||||
"error": f"Invalid preset '{preset}'. Valid presets: {', '.join(valid_presets)}"
|
||||
},
|
||||
status_code=400,
|
||||
)
|
||||
settings = preset_map[preset]
|
||||
else:
|
||||
# Support new toggle-based API
|
||||
settings = {
|
||||
"table_structure": body.get("table_structure", False),
|
||||
"ocr": body.get("ocr", False),
|
||||
"picture_descriptions": body.get("picture_descriptions", False),
|
||||
}
|
||||
|
||||
# Get the preset configuration
|
||||
preset_config = preset_configs[preset]
|
||||
preset_config = get_docling_preset_configs(**settings)
|
||||
|
||||
# Use the helper function to update the flow
|
||||
flows_service = _get_flows_service()
|
||||
await flows_service.update_flow_docling_preset(preset, preset_config)
|
||||
await flows_service.update_flow_docling_preset("custom", preset_config)
|
||||
|
||||
logger.info(f"Successfully updated docling preset to '{preset}' in ingest flow")
|
||||
logger.info(f"Successfully updated docling settings in ingest flow")
|
||||
|
||||
return JSONResponse(
|
||||
{
|
||||
"message": f"Successfully updated docling preset to '{preset}'",
|
||||
"preset": preset,
|
||||
"message": f"Successfully updated docling settings",
|
||||
"settings": settings,
|
||||
"preset_config": preset_config,
|
||||
}
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
logger.error("Failed to update docling preset", error=str(e))
|
||||
logger.error("Failed to update docling settings", error=str(e))
|
||||
return JSONResponse(
|
||||
{"error": f"Failed to update docling preset: {str(e)}"}, status_code=500
|
||||
{"error": f"Failed to update docling settings: {str(e)}"}, status_code=500
|
||||
)
|
||||
|
|
|
|||
|
|
@ -27,7 +27,9 @@ class KnowledgeConfig:
|
|||
embedding_model: str = "text-embedding-3-small"
|
||||
chunk_size: int = 1000
|
||||
chunk_overlap: int = 200
|
||||
doclingPresets: str = "standard"
|
||||
table_structure: bool = False
|
||||
ocr: bool = False
|
||||
picture_descriptions: bool = False
|
||||
|
||||
|
||||
@dataclass
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue