Merge branch 'main' into feat/filters-design-sweep

This commit is contained in:
Cole Goldsmith 2025-09-30 13:55:07 -05:00 committed by GitHub
commit f6ae18f2bd
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
12 changed files with 322 additions and 212 deletions

View file

@ -138,7 +138,7 @@ podman machine start
### Common Issues
See common issues and fixes: [docs/reference/troubleshooting.mdx](docs/docs/reference/troubleshooting.mdx)
See common issues and fixes: [docs/support/troubleshoot.mdx](docs/docs/reference/troubleshoot.mdx)

View file

@ -1,24 +0,0 @@
---
title: Troubleshooting
slug: /reference/troubleshooting
---
# Troubleshooting
## Podman on macOS
If using Podman on macOS, you may need to increase VM memory:
```bash
podman machine stop
podman machine rm
podman machine init --memory 8192 # 8 GB example
podman machine start
```
## Common Issues
1. OpenSearch fails to start: Check that `OPENSEARCH_PASSWORD` is set and meets requirements
2. Langflow connection issues: Verify `LANGFLOW_SUPERUSER` credentials are correct
3. Out of memory errors: Increase Docker memory allocation or use CPU-only mode
4. Port conflicts: Ensure ports 3000, 7860, 8000, 9200, 5601 are available

View file

@ -0,0 +1,107 @@
---
title: Troubleshoot
slug: /support/troubleshoot
---
import Tabs from '@theme/Tabs';
import TabItem from '@theme/TabItem';
This page provides troubleshooting advice for issues you might encounter when using OpenRAG or contributing to OpenRAG.
## OpenSearch fails to start
Check that `OPENSEARCH_PASSWORD` is set and meets requirements.
The password must contain at least 8 characters, and must contain at least one uppercase letter, one lowercase letter, one digit, and one special character that is strong.
## Langflow connection issues
Verify the `LANGFLOW_SUPERUSER` credentials are correct.
## Memory errors
### Container out of memory errors
Increase Docker memory allocation or use [docker-compose-cpu.yml](https://github.com/langflow-ai/openrag/blob/main/docker-compose-cpu.yml) to deploy OpenRAG.
### Podman on macOS memory issues
If you're using Podman on macOS, you may need to increase VM memory on your Podman machine.
This example increases the machine size to 8 GB of RAM, which should be sufficient to run OpenRAG.
```bash
podman machine stop
podman machine rm
podman machine init --memory 8192 # 8 GB example
podman machine start
```
## Port conflicts
Ensure ports 3000, 7860, 8000, 9200, 5601 are available.
## Langflow container already exists
If you are running other versions of Langflow containers on your machine, you may encounter an issue where Docker or Podman thinks Langflow is already up.
Remove just the problem container, or clean up all containers and start fresh.
To reset your local containers and pull new images, do the following:
1. Stop your containers and completely remove them.
<Tabs groupId="Container software">
<TabItem value="Docker" label="Docker" default>
```bash
# Stop all running containers
docker stop $(docker ps -q)
# Remove all containers (including stopped ones)
docker rm --force $(docker ps -aq)
# Remove all images
docker rmi --force $(docker images -q)
# Remove all volumes
docker volume prune --force
# Remove all networks (except default)
docker network prune --force
# Clean up any leftover data
docker system prune --all --force --volumes
```
</TabItem>
<TabItem value="Podman" label="Podman">
```bash
# Stop all running containers
podman stop --all
# Remove all containers (including stopped ones)
podman rm --all --force
# Remove all images
podman rmi --all --force
# Remove all volumes
podman volume prune --force
# Remove all networks (except default)
podman network prune --force
# Clean up any leftover data
podman system prune --all --force --volumes
```
</TabItem>
</Tabs>
2. Restart OpenRAG and upgrade to get the latest images for your containers.
```bash
uv run openrag
```
3. In the OpenRAG TUI, click **Status**, and then click **Upgrade**.
When the **Close** button is active, the upgrade is complete.
Close the window and open the OpenRAG appplication.

View file

@ -76,12 +76,12 @@ const sidebars = {
},
{
type: "category",
label: "Reference",
label: "Support",
items: [
{
type: "doc",
id: "reference/troubleshooting",
label: "Troubleshooting"
id: "support/troubleshoot",
label: "Troubleshoot"
},
],
},

View file

@ -11,7 +11,7 @@ const Switch = React.forwardRef<
>(({ className, ...props }, ref) => (
<SwitchPrimitives.Root
className={cn(
"peer inline-flex h-6 w-11 shrink-0 cursor-pointer items-center rounded-full border-2 border-transparent transition-colors focus-visible:outline-none focus-visible:ring-2 focus-visible:ring-ring focus-visible:ring-offset-2 focus-visible:ring-offset-background disabled:cursor-not-allowed disabled:opacity-50 data-[state=checked]:bg-primary data-[state=unchecked]:bg-input",
"peer inline-flex h-6 w-11 shrink-0 cursor-pointer items-center rounded-full border-2 border-transparent transition-colors focus-visible:outline-none focus-visible:ring-2 focus-visible:ring-ring focus-visible:ring-offset-2 focus-visible:ring-offset-background disabled:cursor-not-allowed disabled:opacity-50 data-[state=checked]:bg-primary data-[state=unchecked]:bg-muted",
className
)}
{...props}
@ -19,7 +19,7 @@ const Switch = React.forwardRef<
>
<SwitchPrimitives.Thumb
className={cn(
"pointer-events-none block h-5 w-5 rounded-full bg-background shadow-lg ring-0 transition-transform data-[state=checked]:translate-x-5 data-[state=unchecked]:translate-x-0"
"pointer-events-none block h-5 w-5 rounded-full bg-background shadow-lg ring-0 transition-transform data-[state=checked]:translate-x-5 data-[state=unchecked]:translate-x-0 data-[state=unchecked]:bg-primary"
)}
/>
</SwitchPrimitives.Root>

View file

@ -8,7 +8,9 @@ interface UpdateFlowSettingVariables {
llm_model?: string;
system_prompt?: string;
embedding_model?: string;
doclingPresets?: string;
table_structure?: boolean;
ocr?: boolean;
picture_descriptions?: boolean;
chunk_size?: number;
chunk_overlap?: number;
}

View file

@ -13,7 +13,9 @@ export interface KnowledgeSettings {
embedding_model?: string;
chunk_size?: number;
chunk_overlap?: number;
doclingPresets?: string;
table_structure?: boolean;
ocr?: boolean;
picture_descriptions?: boolean;
}
export interface Settings {

View file

@ -22,9 +22,9 @@ import {
CardTitle,
} from "@/components/ui/card";
import { Checkbox } from "@/components/ui/checkbox";
import { Switch } from "@/components/ui/switch";
import { Input } from "@/components/ui/input";
import { Label } from "@/components/ui/label";
import { RadioGroup, RadioGroupItem } from "@/components/ui/radio-group";
import {
Select,
SelectContent,
@ -39,11 +39,6 @@ import { DEFAULT_AGENT_SETTINGS, DEFAULT_KNOWLEDGE_SETTINGS, UI_CONSTANTS } from
import { getFallbackModels, type ModelProvider } from "./helpers/model-helpers";
import { ModelSelectItems } from "./helpers/model-select-item";
import { LabelWrapper } from "@/components/label-wrapper";
import {
Tooltip,
TooltipContent,
TooltipTrigger,
} from "@radix-ui/react-tooltip";
const { MAX_SYSTEM_PROMPT_CHARS } = UI_CONSTANTS;
@ -112,7 +107,9 @@ function KnowledgeSourcesPage() {
const [systemPrompt, setSystemPrompt] = useState<string>("");
const [chunkSize, setChunkSize] = useState<number>(1024);
const [chunkOverlap, setChunkOverlap] = useState<number>(50);
const [processingMode, setProcessingMode] = useState<string>("standard");
const [tableStructure, setTableStructure] = useState<boolean>(false);
const [ocr, setOcr] = useState<boolean>(false);
const [pictureDescriptions, setPictureDescriptions] = useState<boolean>(false);
// Fetch settings using React Query
const { data: settings = {} } = useGetSettingsQuery({
@ -195,12 +192,24 @@ function KnowledgeSourcesPage() {
}
}, [settings.knowledge?.chunk_overlap]);
// Sync processing mode with settings data
// Sync docling settings with settings data
useEffect(() => {
if (settings.knowledge?.doclingPresets) {
setProcessingMode(settings.knowledge.doclingPresets);
if (settings.knowledge?.table_structure !== undefined) {
setTableStructure(settings.knowledge.table_structure);
}
}, [settings.knowledge?.doclingPresets]);
}, [settings.knowledge?.table_structure]);
useEffect(() => {
if (settings.knowledge?.ocr !== undefined) {
setOcr(settings.knowledge.ocr);
}
}, [settings.knowledge?.ocr]);
useEffect(() => {
if (settings.knowledge?.picture_descriptions !== undefined) {
setPictureDescriptions(settings.knowledge.picture_descriptions);
}
}, [settings.knowledge?.picture_descriptions]);
// Update model selection immediately
const handleModelChange = (newModel: string) => {
@ -231,11 +240,20 @@ function KnowledgeSourcesPage() {
debouncedUpdate({ chunk_overlap: numValue });
};
// Update processing mode
const handleProcessingModeChange = (mode: string) => {
setProcessingMode(mode);
// Update the configuration setting (backend will also update the flow automatically)
debouncedUpdate({ doclingPresets: mode });
// Update docling settings
const handleTableStructureChange = (checked: boolean) => {
setTableStructure(checked);
updateFlowSettingMutation.mutate({ table_structure: checked });
};
const handleOcrChange = (checked: boolean) => {
setOcr(checked);
updateFlowSettingMutation.mutate({ ocr: checked });
};
const handlePictureDescriptionsChange = (checked: boolean) => {
setPictureDescriptions(checked);
updateFlowSettingMutation.mutate({ picture_descriptions: checked });
};
// Helper function to get connector icon
@ -569,7 +587,9 @@ function KnowledgeSourcesPage() {
// Only reset form values if the API call was successful
setChunkSize(DEFAULT_KNOWLEDGE_SETTINGS.chunk_size);
setChunkOverlap(DEFAULT_KNOWLEDGE_SETTINGS.chunk_overlap);
setProcessingMode(DEFAULT_KNOWLEDGE_SETTINGS.processing_mode);
setTableStructure(false);
setOcr(false);
setPictureDescriptions(false);
closeDialog(); // Close after successful completion
})
.catch((error) => {
@ -1063,76 +1083,61 @@ function KnowledgeSourcesPage() {
</div>
</div>
</div>
<div className="space-y-3">
<Label className="text-base font-medium">Ingestion presets</Label>
<RadioGroup
value={processingMode}
onValueChange={handleProcessingModeChange}
className="space-y-3"
>
<div className="flex items-center space-x-3">
<RadioGroupItem value="standard" id="standard" />
<div className="flex-1">
<Label
htmlFor="standard"
className="text-base font-medium cursor-pointer"
>
No OCR
</Label>
<div className="text-sm text-muted-foreground">
Fast ingest for documents with selectable text. Images are
ignored.
</div>
<div className="">
<div className="flex items-center justify-between py-3 border-b border-border">
<div className="flex-1">
<Label
htmlFor="table-structure"
className="text-base font-medium cursor-pointer pb-3"
>
Table Structure
</Label>
<div className="text-sm text-muted-foreground">
Capture table structure during ingest.
</div>
</div>
<div className="flex items-center space-x-3">
<RadioGroupItem value="ocr" id="ocr" />
<div className="flex-1">
<Label
htmlFor="ocr"
className="text-base font-medium cursor-pointer"
>
OCR
</Label>
<div className="text-sm text-muted-foreground">
Extracts text from images and scanned pages.
</div>
<Switch
id="table-structure"
checked={tableStructure}
onCheckedChange={handleTableStructureChange}
/>
</div>
<div className="flex items-center justify-between py-3 border-b border-border">
<div className="flex-1">
<Label
htmlFor="ocr"
className="text-base font-medium cursor-pointer pb-3"
>
OCR
</Label>
<div className="text-sm text-muted-foreground">
Extracts text from images/PDFs. Ingest is slower when enabled.
</div>
</div>
<div className="flex items-center space-x-3">
<RadioGroupItem
value="picture_description"
id="picture_description"
/>
<div className="flex-1">
<Label
htmlFor="picture_description"
className="text-base font-medium cursor-pointer"
>
OCR + Captions
</Label>
<div className="text-sm text-muted-foreground">
Extracts text from images and scanned pages. Generates
short image captions.
</div>
<Switch
id="ocr"
checked={ocr}
onCheckedChange={handleOcrChange}
/>
</div>
<div className="flex items-center justify-between py-3">
<div className="flex-1">
<Label
htmlFor="picture-descriptions"
className="text-base font-medium cursor-pointer pb-3"
>
Picture Descriptions
</Label>
<div className="text-sm text-muted-foreground">
Adds captions for images. Ingest is slower when enabled.
</div>
</div>
<div className="flex items-center space-x-3">
<RadioGroupItem value="VLM" id="VLM" />
<div className="flex-1">
<Label
htmlFor="VLM"
className="text-base font-medium cursor-pointer"
>
VLM
</Label>
<div className="text-sm text-muted-foreground">
Extracts text from layout-aware parsing of text, tables,
and sections.
</div>
</div>
</div>
</RadioGroup>
<Switch
id="picture-descriptions"
checked={pictureDescriptions}
onCheckedChange={handlePictureDescriptionsChange}
/>
</div>
</div>
</div>
</CardContent>

View file

@ -1,29 +0,0 @@
"use client"
import * as React from "react"
import * as SwitchPrimitives from "@radix-ui/react-switch"
import { cn } from "@/lib/utils"
const Switch = React.forwardRef<
React.ElementRef<typeof SwitchPrimitives.Root>,
React.ComponentPropsWithoutRef<typeof SwitchPrimitives.Root>
>(({ className, ...props }, ref) => (
<SwitchPrimitives.Root
className={cn(
"peer inline-flex h-6 w-11 shrink-0 cursor-pointer items-center rounded-full border-2 border-transparent transition-colors focus-visible:outline-none focus-visible:ring-2 focus-visible:ring-ring focus-visible:ring-offset-2 focus-visible:ring-offset-background disabled:cursor-not-allowed disabled:opacity-50 data-[state=checked]:bg-primary data-[state=unchecked]:bg-input",
className
)}
{...props}
ref={ref}
>
<SwitchPrimitives.Thumb
className={cn(
"pointer-events-none block h-5 w-5 rounded-full bg-background shadow-lg ring-0 transition-transform data-[state=checked]:translate-x-5 data-[state=unchecked]:translate-x-0"
)}
/>
</SwitchPrimitives.Root>
))
Switch.displayName = SwitchPrimitives.Root.displayName
export { Switch }

View file

@ -12,7 +12,9 @@ export const DEFAULT_AGENT_SETTINGS = {
export const DEFAULT_KNOWLEDGE_SETTINGS = {
chunk_size: 1000,
chunk_overlap: 200,
processing_mode: "standard"
table_structure: false,
ocr: false,
picture_descriptions: false
} as const;
/**

View file

@ -17,35 +17,30 @@ logger = get_logger(__name__)
# Docling preset configurations
def get_docling_preset_configs():
"""Get docling preset configurations with platform-specific settings"""
def get_docling_preset_configs(table_structure=False, ocr=False, picture_descriptions=False):
"""Get docling preset configurations based on toggle settings
Args:
table_structure: Enable table structure parsing (default: False)
ocr: Enable OCR for text extraction from images (default: False)
picture_descriptions: Enable picture descriptions/captions (default: False)
"""
is_macos = platform.system() == "Darwin"
return {
"standard": {"do_ocr": False},
"ocr": {"do_ocr": True, "ocr_engine": "ocrmac" if is_macos else "easyocr"},
"picture_description": {
"do_ocr": True,
"ocr_engine": "ocrmac" if is_macos else "easyocr",
"do_picture_classification": True,
"do_picture_description": True,
"picture_description_local": {
"repo_id": "HuggingFaceTB/SmolVLM-256M-Instruct",
"prompt": "Describe this image in a few sentences.",
},
},
"VLM": {
"pipeline": "vlm",
"vlm_pipeline_model_local": {
"repo_id": "ds4sd/SmolDocling-256M-preview-mlx-bf16"
if is_macos
else "ds4sd/SmolDocling-256M-preview",
"response_format": "doctags",
"inference_framework": "mlx",
},
},
config = {
"do_ocr": ocr,
"ocr_engine": "ocrmac" if is_macos else "easyocr",
"do_table_structure": table_structure,
"do_picture_classification": picture_descriptions,
"do_picture_description": picture_descriptions,
"picture_description_local": {
"repo_id": "HuggingFaceTB/SmolVLM-256M-Instruct",
"prompt": "Describe this image in a few sentences.",
}
}
return config
async def get_settings(request, session_manager):
"""Get application settings"""
@ -71,7 +66,9 @@ async def get_settings(request, session_manager):
"embedding_model": knowledge_config.embedding_model,
"chunk_size": knowledge_config.chunk_size,
"chunk_overlap": knowledge_config.chunk_overlap,
"doclingPresets": knowledge_config.doclingPresets,
"table_structure": knowledge_config.table_structure,
"ocr": knowledge_config.ocr,
"picture_descriptions": knowledge_config.picture_descriptions,
},
"agent": {
"llm_model": agent_config.llm_model,
@ -178,7 +175,9 @@ async def update_settings(request, session_manager):
"system_prompt",
"chunk_size",
"chunk_overlap",
"doclingPresets",
"table_structure",
"ocr",
"picture_descriptions",
"embedding_model",
}
@ -255,32 +254,68 @@ async def update_settings(request, session_manager):
# Don't fail the entire settings update if flow update fails
# The config will still be saved
if "doclingPresets" in body:
preset_configs = get_docling_preset_configs()
valid_presets = list(preset_configs.keys())
if body["doclingPresets"] not in valid_presets:
if "table_structure" in body:
if not isinstance(body["table_structure"], bool):
return JSONResponse(
{
"error": f"doclingPresets must be one of: {', '.join(valid_presets)}"
},
status_code=400,
{"error": "table_structure must be a boolean"}, status_code=400
)
current_config.knowledge.doclingPresets = body["doclingPresets"]
current_config.knowledge.table_structure = body["table_structure"]
config_updated = True
# Also update the flow with the new docling preset
# Also update the flow with the new docling settings
try:
flows_service = _get_flows_service()
await flows_service.update_flow_docling_preset(
body["doclingPresets"], preset_configs[body["doclingPresets"]]
)
logger.info(
f"Successfully updated docling preset in flow to '{body['doclingPresets']}'"
preset_config = get_docling_preset_configs(
table_structure=body["table_structure"],
ocr=current_config.knowledge.ocr,
picture_descriptions=current_config.knowledge.picture_descriptions
)
await flows_service.update_flow_docling_preset("custom", preset_config)
logger.info(f"Successfully updated table_structure setting in flow")
except Exception as e:
logger.error(f"Failed to update docling preset in flow: {str(e)}")
# Don't fail the entire settings update if flow update fails
# The config will still be saved
logger.error(f"Failed to update docling settings in flow: {str(e)}")
if "ocr" in body:
if not isinstance(body["ocr"], bool):
return JSONResponse(
{"error": "ocr must be a boolean"}, status_code=400
)
current_config.knowledge.ocr = body["ocr"]
config_updated = True
# Also update the flow with the new docling settings
try:
flows_service = _get_flows_service()
preset_config = get_docling_preset_configs(
table_structure=current_config.knowledge.table_structure,
ocr=body["ocr"],
picture_descriptions=current_config.knowledge.picture_descriptions
)
await flows_service.update_flow_docling_preset("custom", preset_config)
logger.info(f"Successfully updated ocr setting in flow")
except Exception as e:
logger.error(f"Failed to update docling settings in flow: {str(e)}")
if "picture_descriptions" in body:
if not isinstance(body["picture_descriptions"], bool):
return JSONResponse(
{"error": "picture_descriptions must be a boolean"}, status_code=400
)
current_config.knowledge.picture_descriptions = body["picture_descriptions"]
config_updated = True
# Also update the flow with the new docling settings
try:
flows_service = _get_flows_service()
preset_config = get_docling_preset_configs(
table_structure=current_config.knowledge.table_structure,
ocr=current_config.knowledge.ocr,
picture_descriptions=body["picture_descriptions"]
)
await flows_service.update_flow_docling_preset("custom", preset_config)
logger.info(f"Successfully updated picture_descriptions setting in flow")
except Exception as e:
logger.error(f"Failed to update docling settings in flow: {str(e)}")
if "chunk_size" in body:
if not isinstance(body["chunk_size"], int) or body["chunk_size"] <= 0:
@ -624,48 +659,56 @@ def _get_flows_service():
async def update_docling_preset(request, session_manager):
"""Update docling preset in the ingest flow"""
"""Update docling settings in the ingest flow - deprecated endpoint, use /settings instead"""
try:
# Parse request body
body = await request.json()
# Validate preset parameter
if "preset" not in body:
return JSONResponse(
{"error": "preset parameter is required"}, status_code=400
)
# Support old preset-based API for backwards compatibility
if "preset" in body:
# Map old presets to new toggle settings
preset_map = {
"standard": {"table_structure": False, "ocr": False, "picture_descriptions": False},
"ocr": {"table_structure": False, "ocr": True, "picture_descriptions": False},
"picture_description": {"table_structure": False, "ocr": True, "picture_descriptions": True},
"VLM": {"table_structure": False, "ocr": False, "picture_descriptions": False},
}
preset = body["preset"]
preset_configs = get_docling_preset_configs()
preset = body["preset"]
if preset not in preset_map:
return JSONResponse(
{"error": f"Invalid preset '{preset}'. Valid presets: {', '.join(preset_map.keys())}"},
status_code=400,
)
if preset not in preset_configs:
valid_presets = list(preset_configs.keys())
return JSONResponse(
{
"error": f"Invalid preset '{preset}'. Valid presets: {', '.join(valid_presets)}"
},
status_code=400,
)
settings = preset_map[preset]
else:
# Support new toggle-based API
settings = {
"table_structure": body.get("table_structure", False),
"ocr": body.get("ocr", False),
"picture_descriptions": body.get("picture_descriptions", False),
}
# Get the preset configuration
preset_config = preset_configs[preset]
preset_config = get_docling_preset_configs(**settings)
# Use the helper function to update the flow
flows_service = _get_flows_service()
await flows_service.update_flow_docling_preset(preset, preset_config)
await flows_service.update_flow_docling_preset("custom", preset_config)
logger.info(f"Successfully updated docling preset to '{preset}' in ingest flow")
logger.info(f"Successfully updated docling settings in ingest flow")
return JSONResponse(
{
"message": f"Successfully updated docling preset to '{preset}'",
"preset": preset,
"message": f"Successfully updated docling settings",
"settings": settings,
"preset_config": preset_config,
}
)
except Exception as e:
logger.error("Failed to update docling preset", error=str(e))
logger.error("Failed to update docling settings", error=str(e))
return JSONResponse(
{"error": f"Failed to update docling preset: {str(e)}"}, status_code=500
{"error": f"Failed to update docling settings: {str(e)}"}, status_code=500
)

View file

@ -27,7 +27,9 @@ class KnowledgeConfig:
embedding_model: str = "text-embedding-3-small"
chunk_size: int = 1000
chunk_overlap: int = 200
doclingPresets: str = "standard"
table_structure: bool = False
ocr: bool = False
picture_descriptions: bool = False
@dataclass