From 911c2bc025ea9e43d153fbdcc155b51e284089c4 Mon Sep 17 00:00:00 2001 From: PentaFrame-Development Date: Thu, 11 Dec 2025 17:42:39 +0100 Subject: [PATCH] docs: improve Docs Generator documentation - Add key features section - Document all UI configuration options (logo, fonts, margins, watermark) - Fix output table to match actual UI outputs - Add comprehensive Unicode and multi-language support section - Add troubleshooting section - Clean up unused outputs in code --- agent/component/docs_generator.py | 7 -- .../docs_generator.md | 115 ++++++++++++++++-- 2 files changed, 104 insertions(+), 18 deletions(-) diff --git a/agent/component/docs_generator.py b/agent/component/docs_generator.py index 11de225b1..9c2442958 100644 --- a/agent/component/docs_generator.py +++ b/agent/component/docs_generator.py @@ -76,8 +76,6 @@ class PDFGeneratorParam(ComponentParamBase): self.outputs = { "file_path": {"value": "", "type": "string"}, - "filename": {"value": "", "type": "string"}, - "message": {"value": "", "type": "string"}, "pdf_base64": {"value": "", "type": "string"}, "download": {"value": "", "type": "string"}, "success": {"value": False, "type": "boolean"} @@ -375,7 +373,6 @@ class PDFGenerator(Message, ABC): # Set outputs self.set_output("file_path", file_path) self.set_output("pdf_base64", doc_base64) # Keep same output name for compatibility - self.set_output("filename", filename) self.set_output("success", True) # Create download info object @@ -390,10 +387,6 @@ class PDFGenerator(Message, ABC): download_json = json.dumps(download_info) self.set_output("download", download_json) - # Create a user-friendly download message - download_message = f"✓ {output_format.upper()} generated successfully: {filename}\nFile saved to: {file_path}\nSize: {file_size} bytes" - self.set_output("message", download_message) - return download_info except Exception as e: diff --git a/docs/guides/agent/agent_component_reference/docs_generator.md b/docs/guides/agent/agent_component_reference/docs_generator.md index 9834364a5..2fc0700f7 100644 --- a/docs/guides/agent/agent_component_reference/docs_generator.md +++ b/docs/guides/agent/agent_component_reference/docs_generator.md @@ -5,12 +5,21 @@ slug: /docs_generator # Docs Generator component -A component that generates downloadable PDF, DOCX, or TXT documents from markdown-style content. +A component that generates downloadable PDF, DOCX, or TXT documents from markdown-style content with full Unicode support. --- The **Docs Generator** component enables you to create professional documents directly within your agent workflow. It accepts markdown-formatted text and converts it into downloadable files, making it ideal for generating reports, summaries, or any structured document output. +## Key features + +- **Multiple output formats**: PDF, DOCX, and TXT +- **Full Unicode support**: Automatic font switching for CJK (Chinese, Japanese, Korean), Arabic, Hebrew, and other non-Latin scripts +- **Rich formatting**: Headers, lists, tables, code blocks, and more +- **Customizable styling**: Fonts, margins, page size, and orientation +- **Document extras**: Logo, watermark, page numbers, and timestamps +- **Direct download**: Generates a download button for the chat interface + ## Prerequisites - Content to be converted into a document (typically from an **Agent** or other text-generating component). @@ -63,6 +72,26 @@ The file format for the generated document: - **DOCX**: Microsoft Word format. - **TXT**: Plain text format. +### Logo image + +Optional. A logo image to display at the top of the document. You can either: + +- Upload an image file using the file picker +- Paste an image path, URL, or base64-encoded data + +### Logo position + +The horizontal position of the logo: + +- **left** (default) +- **center** +- **right** + +### Logo dimensions + +- **Logo width**: Width in inches (default: `2.0`) +- **Logo height**: Height in inches (default: `1.0`) + ### Font family The font used throughout the document: @@ -70,10 +99,8 @@ The font used throughout the document: - **Helvetica** (default) - **Times-Roman** - **Courier** - -:::tip NOTE -When the document contains CJK (Chinese, Japanese, Korean) or other non-Latin characters, the system automatically switches to a compatible Unicode font (STSong-Light) to ensure proper rendering. The selected font family is used for Latin-only content. -::: +- **Helvetica-Bold** +- **Times-Bold** ### Font size @@ -110,6 +137,10 @@ Page margins in inches: Optional. Custom filename for the generated document. If left empty, a filename is auto-generated with a timestamp. +### Output directory + +The server directory where generated documents are saved. Defaults to `/tmp/pdf_outputs`. + ### Add page numbers When enabled, page numbers are added to the footer of each page. Defaults to `true`. @@ -118,6 +149,10 @@ When enabled, page numbers are added to the footer of each page. Defaults to `tr When enabled, a generation timestamp is added to the document footer. Defaults to `true`. +### Watermark text + +Optional. Text to display as a diagonal watermark across each page. Useful for marking documents as "Draft", "Confidential", etc. + ## Output The **Docs Generator** component provides the following output variables: @@ -126,7 +161,7 @@ The **Docs Generator** component provides the following output variables: | ------------- | --------- | --------------------------------------------------------------------------- | | `file_path` | `string` | The server path where the generated document is saved. | | `pdf_base64` | `string` | The document content encoded in base64 format. | -| `download` | `string` | JSON containing download information. Reference this in a **Message** component to display a download button. | +| `download` | `string` | JSON containing download information for the chat interface. | | `success` | `boolean` | Indicates whether the document was generated successfully. | ### Displaying the download button @@ -137,12 +172,70 @@ To display a download button in the chat, add a **Message** component after the 2. In the **Message** component's content field, type `/` and select `{Docs Generator_0@download}`. 3. When the agent runs, a download button will appear in the chat, allowing users to download the generated document. -## Multi-language support +The download button automatically handles: +- File type detection (PDF, DOCX, TXT) +- Proper MIME type for browser downloads +- Base64 decoding for direct file delivery -The **Docs Generator** automatically detects non-Latin characters (Chinese, Japanese, Korean, Arabic, Hebrew, Cyrillic, etc.) and uses appropriate Unicode fonts when available on the server. +## Unicode and multi-language support + +The **Docs Generator** includes intelligent font handling for international content: + +### How it works + +1. **Content analysis**: The component scans the text for non-Latin characters. +2. **Automatic font switching**: When CJK or other complex scripts are detected, the system automatically switches to a compatible CID font (STSong-Light for Chinese, HeiseiMin-W3 for Japanese, HYSMyeongJo-Medium for Korean). +3. **Latin content**: For documents containing only Latin characters (including extended Latin, Cyrillic, and Greek), the user-selected font family is used. + +### Supported scripts + +| Script | Unicode Range | Font Used | +| ------ | ------------- | --------- | +| Chinese (CJK) | U+4E00–U+9FFF | STSong-Light | +| Japanese (Hiragana/Katakana) | U+3040–U+30FF | HeiseiMin-W3 | +| Korean (Hangul) | U+AC00–U+D7AF | HYSMyeongJo-Medium | +| Arabic | U+0600–U+06FF | CID font fallback | +| Hebrew | U+0590–U+05FF | CID font fallback | +| Devanagari (Hindi) | U+0900–U+097F | CID font fallback | +| Thai | U+0E00–U+0E7F | CID font fallback | + +### Font installation + +For full multi-language support in self-hosted deployments, ensure Unicode fonts are installed: + +**Linux (Debian/Ubuntu):** +```bash +apt-get install fonts-freefont-ttf fonts-noto-cjk +``` + +**Docker:** The official RAGFlow Docker image includes these fonts. For custom images, add the font packages to your Dockerfile: +```dockerfile +RUN apt-get update && apt-get install -y fonts-freefont-ttf fonts-noto-cjk +``` :::tip NOTE -For full multi-language support, ensure Unicode fonts are installed on the RAGFlow server: -- **Linux**: `fonts-freefont-ttf`, `fonts-noto-cjk`, or `fonts-droid-fallback` -- **Docker**: Add font packages to the Dockerfile if needed +CID fonts (STSong-Light, HeiseiMin-W3, etc.) are built into ReportLab and do not require additional installation. They are used automatically when CJK content is detected. ::: + +## Troubleshooting + +### Characters appear as boxes or question marks + +This indicates missing font support. Ensure: +1. The content contains supported Unicode characters. +2. For self-hosted deployments, Unicode fonts are installed on the server. +3. The document is being viewed in a PDF reader that supports embedded fonts. + +### Download button not appearing + +Ensure: +1. The **Message** component is connected after the **Docs Generator**. +2. The `download` variable is correctly referenced using `/` (which appears as `{Docs Generator_0@download}` when copied). +3. The document generation completed successfully (check `success` output). + +### Large tables not rendering correctly + +For tables with many columns or large cell content: +- The component automatically converts wide tables to a definition list format for better readability. +- Consider splitting large tables into multiple smaller tables. +- Use landscape orientation for wide tables.