diff --git a/README.md b/README.md index d79011a0..df1d6451 100644 --- a/README.md +++ b/README.md @@ -138,7 +138,7 @@ podman machine start ### Common Issues -See common issues and fixes: [docs/reference/troubleshooting.mdx](docs/docs/reference/troubleshooting.mdx) +See common issues and fixes: [docs/support/troubleshoot.mdx](docs/docs/reference/troubleshoot.mdx) diff --git a/docs/docs/reference/troubleshooting.mdx b/docs/docs/reference/troubleshooting.mdx deleted file mode 100644 index c1893ef5..00000000 --- a/docs/docs/reference/troubleshooting.mdx +++ /dev/null @@ -1,24 +0,0 @@ ---- -title: Troubleshooting -slug: /reference/troubleshooting ---- - -# Troubleshooting - -## Podman on macOS - -If using Podman on macOS, you may need to increase VM memory: - -```bash -podman machine stop -podman machine rm -podman machine init --memory 8192 # 8 GB example -podman machine start -``` - -## Common Issues - -1. OpenSearch fails to start: Check that `OPENSEARCH_PASSWORD` is set and meets requirements -2. Langflow connection issues: Verify `LANGFLOW_SUPERUSER` credentials are correct -3. Out of memory errors: Increase Docker memory allocation or use CPU-only mode -4. Port conflicts: Ensure ports 3000, 7860, 8000, 9200, 5601 are available diff --git a/docs/docs/support/troubleshoot.mdx b/docs/docs/support/troubleshoot.mdx new file mode 100644 index 00000000..57dcb4d3 --- /dev/null +++ b/docs/docs/support/troubleshoot.mdx @@ -0,0 +1,107 @@ +--- +title: Troubleshoot +slug: /support/troubleshoot +--- + +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; + +This page provides troubleshooting advice for issues you might encounter when using OpenRAG or contributing to OpenRAG. + +## OpenSearch fails to start + +Check that `OPENSEARCH_PASSWORD` is set and meets requirements. +The password must contain at least 8 characters, and must contain at least one uppercase letter, one lowercase letter, one digit, and one special character that is strong. + +## Langflow connection issues + +Verify the `LANGFLOW_SUPERUSER` credentials are correct. + +## Memory errors + +### Container out of memory errors + +Increase Docker memory allocation or use [docker-compose-cpu.yml](https://github.com/langflow-ai/openrag/blob/main/docker-compose-cpu.yml) to deploy OpenRAG. + +### Podman on macOS memory issues + +If you're using Podman on macOS, you may need to increase VM memory on your Podman machine. +This example increases the machine size to 8 GB of RAM, which should be sufficient to run OpenRAG. + ```bash + podman machine stop + podman machine rm + podman machine init --memory 8192 # 8 GB example + podman machine start + ``` + +## Port conflicts + +Ensure ports 3000, 7860, 8000, 9200, 5601 are available. + +## Langflow container already exists + +If you are running other versions of Langflow containers on your machine, you may encounter an issue where Docker or Podman thinks Langflow is already up. + +Remove just the problem container, or clean up all containers and start fresh. + +To reset your local containers and pull new images, do the following: + +1. Stop your containers and completely remove them. + + + + + ```bash + # Stop all running containers + docker stop $(docker ps -q) + + # Remove all containers (including stopped ones) + docker rm --force $(docker ps -aq) + + # Remove all images + docker rmi --force $(docker images -q) + + # Remove all volumes + docker volume prune --force + + # Remove all networks (except default) + docker network prune --force + + # Clean up any leftover data + docker system prune --all --force --volumes + ``` + + + + + ```bash + # Stop all running containers + podman stop --all + + # Remove all containers (including stopped ones) + podman rm --all --force + + # Remove all images + podman rmi --all --force + + # Remove all volumes + podman volume prune --force + + # Remove all networks (except default) + podman network prune --force + + # Clean up any leftover data + podman system prune --all --force --volumes + ``` + + + + +2. Restart OpenRAG and upgrade to get the latest images for your containers. + ```bash + uv run openrag + ``` + +3. In the OpenRAG TUI, click **Status**, and then click **Upgrade**. +When the **Close** button is active, the upgrade is complete. +Close the window and open the OpenRAG appplication. diff --git a/docs/sidebars.js b/docs/sidebars.js index 3048cb70..aa37e2b5 100644 --- a/docs/sidebars.js +++ b/docs/sidebars.js @@ -76,12 +76,12 @@ const sidebars = { }, { type: "category", - label: "Reference", + label: "Support", items: [ { type: "doc", - id: "reference/troubleshooting", - label: "Troubleshooting" + id: "support/troubleshoot", + label: "Troubleshoot" }, ], }, diff --git a/frontend/components/ui/switch.tsx b/frontend/components/ui/switch.tsx index 12187e8f..9d78fcfd 100644 --- a/frontend/components/ui/switch.tsx +++ b/frontend/components/ui/switch.tsx @@ -11,7 +11,7 @@ const Switch = React.forwardRef< >(({ className, ...props }, ref) => ( diff --git a/frontend/src/app/api/mutations/useUpdateFlowSettingMutation.ts b/frontend/src/app/api/mutations/useUpdateFlowSettingMutation.ts index 5f196ebd..e789af48 100644 --- a/frontend/src/app/api/mutations/useUpdateFlowSettingMutation.ts +++ b/frontend/src/app/api/mutations/useUpdateFlowSettingMutation.ts @@ -8,7 +8,9 @@ interface UpdateFlowSettingVariables { llm_model?: string; system_prompt?: string; embedding_model?: string; - doclingPresets?: string; + table_structure?: boolean; + ocr?: boolean; + picture_descriptions?: boolean; chunk_size?: number; chunk_overlap?: number; } diff --git a/frontend/src/app/api/queries/useGetSettingsQuery.ts b/frontend/src/app/api/queries/useGetSettingsQuery.ts index cf1b4ec2..d2d5a15d 100644 --- a/frontend/src/app/api/queries/useGetSettingsQuery.ts +++ b/frontend/src/app/api/queries/useGetSettingsQuery.ts @@ -13,7 +13,9 @@ export interface KnowledgeSettings { embedding_model?: string; chunk_size?: number; chunk_overlap?: number; - doclingPresets?: string; + table_structure?: boolean; + ocr?: boolean; + picture_descriptions?: boolean; } export interface Settings { diff --git a/frontend/src/app/settings/page.tsx b/frontend/src/app/settings/page.tsx index a63d91d3..cc439f53 100644 --- a/frontend/src/app/settings/page.tsx +++ b/frontend/src/app/settings/page.tsx @@ -22,9 +22,9 @@ import { CardTitle, } from "@/components/ui/card"; import { Checkbox } from "@/components/ui/checkbox"; +import { Switch } from "@/components/ui/switch"; import { Input } from "@/components/ui/input"; import { Label } from "@/components/ui/label"; -import { RadioGroup, RadioGroupItem } from "@/components/ui/radio-group"; import { Select, SelectContent, @@ -39,11 +39,6 @@ import { DEFAULT_AGENT_SETTINGS, DEFAULT_KNOWLEDGE_SETTINGS, UI_CONSTANTS } from import { getFallbackModels, type ModelProvider } from "./helpers/model-helpers"; import { ModelSelectItems } from "./helpers/model-select-item"; import { LabelWrapper } from "@/components/label-wrapper"; -import { - Tooltip, - TooltipContent, - TooltipTrigger, -} from "@radix-ui/react-tooltip"; const { MAX_SYSTEM_PROMPT_CHARS } = UI_CONSTANTS; @@ -112,7 +107,9 @@ function KnowledgeSourcesPage() { const [systemPrompt, setSystemPrompt] = useState(""); const [chunkSize, setChunkSize] = useState(1024); const [chunkOverlap, setChunkOverlap] = useState(50); - const [processingMode, setProcessingMode] = useState("standard"); + const [tableStructure, setTableStructure] = useState(false); + const [ocr, setOcr] = useState(false); + const [pictureDescriptions, setPictureDescriptions] = useState(false); // Fetch settings using React Query const { data: settings = {} } = useGetSettingsQuery({ @@ -195,12 +192,24 @@ function KnowledgeSourcesPage() { } }, [settings.knowledge?.chunk_overlap]); - // Sync processing mode with settings data + // Sync docling settings with settings data useEffect(() => { - if (settings.knowledge?.doclingPresets) { - setProcessingMode(settings.knowledge.doclingPresets); + if (settings.knowledge?.table_structure !== undefined) { + setTableStructure(settings.knowledge.table_structure); } - }, [settings.knowledge?.doclingPresets]); + }, [settings.knowledge?.table_structure]); + + useEffect(() => { + if (settings.knowledge?.ocr !== undefined) { + setOcr(settings.knowledge.ocr); + } + }, [settings.knowledge?.ocr]); + + useEffect(() => { + if (settings.knowledge?.picture_descriptions !== undefined) { + setPictureDescriptions(settings.knowledge.picture_descriptions); + } + }, [settings.knowledge?.picture_descriptions]); // Update model selection immediately const handleModelChange = (newModel: string) => { @@ -231,11 +240,20 @@ function KnowledgeSourcesPage() { debouncedUpdate({ chunk_overlap: numValue }); }; - // Update processing mode - const handleProcessingModeChange = (mode: string) => { - setProcessingMode(mode); - // Update the configuration setting (backend will also update the flow automatically) - debouncedUpdate({ doclingPresets: mode }); + // Update docling settings + const handleTableStructureChange = (checked: boolean) => { + setTableStructure(checked); + updateFlowSettingMutation.mutate({ table_structure: checked }); + }; + + const handleOcrChange = (checked: boolean) => { + setOcr(checked); + updateFlowSettingMutation.mutate({ ocr: checked }); + }; + + const handlePictureDescriptionsChange = (checked: boolean) => { + setPictureDescriptions(checked); + updateFlowSettingMutation.mutate({ picture_descriptions: checked }); }; // Helper function to get connector icon @@ -569,7 +587,9 @@ function KnowledgeSourcesPage() { // Only reset form values if the API call was successful setChunkSize(DEFAULT_KNOWLEDGE_SETTINGS.chunk_size); setChunkOverlap(DEFAULT_KNOWLEDGE_SETTINGS.chunk_overlap); - setProcessingMode(DEFAULT_KNOWLEDGE_SETTINGS.processing_mode); + setTableStructure(false); + setOcr(false); + setPictureDescriptions(false); closeDialog(); // Close after successful completion }) .catch((error) => { @@ -1063,76 +1083,61 @@ function KnowledgeSourcesPage() { -
- - -
- -
- -
- Fast ingest for documents with selectable text. Images are - ignored. -
+
+
+
+ +
+ Capture table structure during ingest.
-
- -
- -
- Extracts text from images and scanned pages. -
+ +
+
+
+ +
+ Extracts text from images/PDFs. Ingest is slower when enabled.
-
- -
- -
- Extracts text from images and scanned pages. Generates - short image captions. -
+ +
+
+
+ +
+ Adds captions for images. Ingest is slower when enabled.
-
- -
- -
- Extracts text from layout-aware parsing of text, tables, - and sections. -
-
-
- + +
diff --git a/frontend/src/components/ui/switch.tsx b/frontend/src/components/ui/switch.tsx deleted file mode 100644 index b7f4d8a1..00000000 --- a/frontend/src/components/ui/switch.tsx +++ /dev/null @@ -1,29 +0,0 @@ -"use client" - -import * as React from "react" -import * as SwitchPrimitives from "@radix-ui/react-switch" - -import { cn } from "@/lib/utils" - -const Switch = React.forwardRef< - React.ElementRef, - React.ComponentPropsWithoutRef ->(({ className, ...props }, ref) => ( - - - -)) -Switch.displayName = SwitchPrimitives.Root.displayName - -export { Switch } \ No newline at end of file diff --git a/frontend/src/lib/constants.ts b/frontend/src/lib/constants.ts index 9c6ea7b0..8e7770fb 100644 --- a/frontend/src/lib/constants.ts +++ b/frontend/src/lib/constants.ts @@ -12,7 +12,9 @@ export const DEFAULT_AGENT_SETTINGS = { export const DEFAULT_KNOWLEDGE_SETTINGS = { chunk_size: 1000, chunk_overlap: 200, - processing_mode: "standard" + table_structure: false, + ocr: false, + picture_descriptions: false } as const; /** diff --git a/src/api/settings.py b/src/api/settings.py index c2c7cbd0..a99cce61 100644 --- a/src/api/settings.py +++ b/src/api/settings.py @@ -17,35 +17,30 @@ logger = get_logger(__name__) # Docling preset configurations -def get_docling_preset_configs(): - """Get docling preset configurations with platform-specific settings""" +def get_docling_preset_configs(table_structure=False, ocr=False, picture_descriptions=False): + """Get docling preset configurations based on toggle settings + + Args: + table_structure: Enable table structure parsing (default: False) + ocr: Enable OCR for text extraction from images (default: False) + picture_descriptions: Enable picture descriptions/captions (default: False) + """ is_macos = platform.system() == "Darwin" - return { - "standard": {"do_ocr": False}, - "ocr": {"do_ocr": True, "ocr_engine": "ocrmac" if is_macos else "easyocr"}, - "picture_description": { - "do_ocr": True, - "ocr_engine": "ocrmac" if is_macos else "easyocr", - "do_picture_classification": True, - "do_picture_description": True, - "picture_description_local": { - "repo_id": "HuggingFaceTB/SmolVLM-256M-Instruct", - "prompt": "Describe this image in a few sentences.", - }, - }, - "VLM": { - "pipeline": "vlm", - "vlm_pipeline_model_local": { - "repo_id": "ds4sd/SmolDocling-256M-preview-mlx-bf16" - if is_macos - else "ds4sd/SmolDocling-256M-preview", - "response_format": "doctags", - "inference_framework": "mlx", - }, - }, + config = { + "do_ocr": ocr, + "ocr_engine": "ocrmac" if is_macos else "easyocr", + "do_table_structure": table_structure, + "do_picture_classification": picture_descriptions, + "do_picture_description": picture_descriptions, + "picture_description_local": { + "repo_id": "HuggingFaceTB/SmolVLM-256M-Instruct", + "prompt": "Describe this image in a few sentences.", + } } + return config + async def get_settings(request, session_manager): """Get application settings""" @@ -71,7 +66,9 @@ async def get_settings(request, session_manager): "embedding_model": knowledge_config.embedding_model, "chunk_size": knowledge_config.chunk_size, "chunk_overlap": knowledge_config.chunk_overlap, - "doclingPresets": knowledge_config.doclingPresets, + "table_structure": knowledge_config.table_structure, + "ocr": knowledge_config.ocr, + "picture_descriptions": knowledge_config.picture_descriptions, }, "agent": { "llm_model": agent_config.llm_model, @@ -178,7 +175,9 @@ async def update_settings(request, session_manager): "system_prompt", "chunk_size", "chunk_overlap", - "doclingPresets", + "table_structure", + "ocr", + "picture_descriptions", "embedding_model", } @@ -255,32 +254,68 @@ async def update_settings(request, session_manager): # Don't fail the entire settings update if flow update fails # The config will still be saved - if "doclingPresets" in body: - preset_configs = get_docling_preset_configs() - valid_presets = list(preset_configs.keys()) - if body["doclingPresets"] not in valid_presets: + if "table_structure" in body: + if not isinstance(body["table_structure"], bool): return JSONResponse( - { - "error": f"doclingPresets must be one of: {', '.join(valid_presets)}" - }, - status_code=400, + {"error": "table_structure must be a boolean"}, status_code=400 ) - current_config.knowledge.doclingPresets = body["doclingPresets"] + current_config.knowledge.table_structure = body["table_structure"] config_updated = True - # Also update the flow with the new docling preset + # Also update the flow with the new docling settings try: flows_service = _get_flows_service() - await flows_service.update_flow_docling_preset( - body["doclingPresets"], preset_configs[body["doclingPresets"]] - ) - logger.info( - f"Successfully updated docling preset in flow to '{body['doclingPresets']}'" + preset_config = get_docling_preset_configs( + table_structure=body["table_structure"], + ocr=current_config.knowledge.ocr, + picture_descriptions=current_config.knowledge.picture_descriptions ) + await flows_service.update_flow_docling_preset("custom", preset_config) + logger.info(f"Successfully updated table_structure setting in flow") except Exception as e: - logger.error(f"Failed to update docling preset in flow: {str(e)}") - # Don't fail the entire settings update if flow update fails - # The config will still be saved + logger.error(f"Failed to update docling settings in flow: {str(e)}") + + if "ocr" in body: + if not isinstance(body["ocr"], bool): + return JSONResponse( + {"error": "ocr must be a boolean"}, status_code=400 + ) + current_config.knowledge.ocr = body["ocr"] + config_updated = True + + # Also update the flow with the new docling settings + try: + flows_service = _get_flows_service() + preset_config = get_docling_preset_configs( + table_structure=current_config.knowledge.table_structure, + ocr=body["ocr"], + picture_descriptions=current_config.knowledge.picture_descriptions + ) + await flows_service.update_flow_docling_preset("custom", preset_config) + logger.info(f"Successfully updated ocr setting in flow") + except Exception as e: + logger.error(f"Failed to update docling settings in flow: {str(e)}") + + if "picture_descriptions" in body: + if not isinstance(body["picture_descriptions"], bool): + return JSONResponse( + {"error": "picture_descriptions must be a boolean"}, status_code=400 + ) + current_config.knowledge.picture_descriptions = body["picture_descriptions"] + config_updated = True + + # Also update the flow with the new docling settings + try: + flows_service = _get_flows_service() + preset_config = get_docling_preset_configs( + table_structure=current_config.knowledge.table_structure, + ocr=current_config.knowledge.ocr, + picture_descriptions=body["picture_descriptions"] + ) + await flows_service.update_flow_docling_preset("custom", preset_config) + logger.info(f"Successfully updated picture_descriptions setting in flow") + except Exception as e: + logger.error(f"Failed to update docling settings in flow: {str(e)}") if "chunk_size" in body: if not isinstance(body["chunk_size"], int) or body["chunk_size"] <= 0: @@ -624,48 +659,56 @@ def _get_flows_service(): async def update_docling_preset(request, session_manager): - """Update docling preset in the ingest flow""" + """Update docling settings in the ingest flow - deprecated endpoint, use /settings instead""" try: # Parse request body body = await request.json() - # Validate preset parameter - if "preset" not in body: - return JSONResponse( - {"error": "preset parameter is required"}, status_code=400 - ) + # Support old preset-based API for backwards compatibility + if "preset" in body: + # Map old presets to new toggle settings + preset_map = { + "standard": {"table_structure": False, "ocr": False, "picture_descriptions": False}, + "ocr": {"table_structure": False, "ocr": True, "picture_descriptions": False}, + "picture_description": {"table_structure": False, "ocr": True, "picture_descriptions": True}, + "VLM": {"table_structure": False, "ocr": False, "picture_descriptions": False}, + } - preset = body["preset"] - preset_configs = get_docling_preset_configs() + preset = body["preset"] + if preset not in preset_map: + return JSONResponse( + {"error": f"Invalid preset '{preset}'. Valid presets: {', '.join(preset_map.keys())}"}, + status_code=400, + ) - if preset not in preset_configs: - valid_presets = list(preset_configs.keys()) - return JSONResponse( - { - "error": f"Invalid preset '{preset}'. Valid presets: {', '.join(valid_presets)}" - }, - status_code=400, - ) + settings = preset_map[preset] + else: + # Support new toggle-based API + settings = { + "table_structure": body.get("table_structure", False), + "ocr": body.get("ocr", False), + "picture_descriptions": body.get("picture_descriptions", False), + } # Get the preset configuration - preset_config = preset_configs[preset] + preset_config = get_docling_preset_configs(**settings) # Use the helper function to update the flow flows_service = _get_flows_service() - await flows_service.update_flow_docling_preset(preset, preset_config) + await flows_service.update_flow_docling_preset("custom", preset_config) - logger.info(f"Successfully updated docling preset to '{preset}' in ingest flow") + logger.info(f"Successfully updated docling settings in ingest flow") return JSONResponse( { - "message": f"Successfully updated docling preset to '{preset}'", - "preset": preset, + "message": f"Successfully updated docling settings", + "settings": settings, "preset_config": preset_config, } ) except Exception as e: - logger.error("Failed to update docling preset", error=str(e)) + logger.error("Failed to update docling settings", error=str(e)) return JSONResponse( - {"error": f"Failed to update docling preset: {str(e)}"}, status_code=500 + {"error": f"Failed to update docling settings: {str(e)}"}, status_code=500 ) diff --git a/src/config/config_manager.py b/src/config/config_manager.py index 0b814470..6e891c5c 100644 --- a/src/config/config_manager.py +++ b/src/config/config_manager.py @@ -27,7 +27,9 @@ class KnowledgeConfig: embedding_model: str = "text-embedding-3-small" chunk_size: int = 1000 chunk_overlap: int = 200 - doclingPresets: str = "standard" + table_structure: bool = False + ocr: bool = False + picture_descriptions: bool = False @dataclass