feat: MinerU integration with environment-based configuration
- Add MinerU PDF parser support via external API or local installation - Configure MinerU through environment variables (MINERU_APISERVER, etc.) - Add per-dataset MinerU settings: language, formula/table recognition - Add MinerU config form fields in Knowledge Base settings - Remove MinerU from LLM factories (not a typical LLM model) - Clean up unused ingestors tab from settings
This commit is contained in:
parent
74afb8d710
commit
035e8ced98
12 changed files with 310 additions and 40 deletions
|
|
@ -193,7 +193,14 @@ async def add_llm():
|
|||
api_key = apikey_json(["api_key", "provider_order"])
|
||||
|
||||
elif factory == "MinerU":
|
||||
api_key = apikey_json(["api_key", "provider_order"])
|
||||
api_key = apikey_json([
|
||||
"llm_name",
|
||||
"mineru_apiserver",
|
||||
"mineru_output_dir",
|
||||
"mineru_backend",
|
||||
"mineru_server_url",
|
||||
"mineru_delete_output",
|
||||
])
|
||||
|
||||
llm = {
|
||||
"tenant_id": current_user.id,
|
||||
|
|
|
|||
|
|
@ -5496,14 +5496,6 @@
|
|||
"model_type": "reranker"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "MinerU",
|
||||
"logo": "",
|
||||
"tags": "OCR",
|
||||
"status": "1",
|
||||
"rank": "900",
|
||||
"llm": []
|
||||
}
|
||||
]
|
||||
}
|
||||
|
|
|
|||
|
|
@ -185,14 +185,16 @@ class MinerUParser(RAGFlowPdfParser):
|
|||
return False, reason
|
||||
|
||||
def _run_mineru(
|
||||
self, input_path: Path, output_dir: Path, method: str = "auto", backend: str = "pipeline", lang: Optional[str] = None, server_url: Optional[str] = None, callback: Optional[Callable] = None
|
||||
self, input_path: Path, output_dir: Path, method: str = "auto", backend: str = "pipeline", lang: Optional[str] = None, server_url: Optional[str] = None, callback: Optional[Callable] = None,
|
||||
formula_enable: bool = True, table_enable: bool = True
|
||||
):
|
||||
if self.using_api:
|
||||
self._run_mineru_api(input_path, output_dir, method, backend, lang, callback)
|
||||
self._run_mineru_api(input_path, output_dir, method, backend, lang, callback, formula_enable, table_enable)
|
||||
else:
|
||||
self._run_mineru_executable(input_path, output_dir, method, backend, lang, server_url, callback)
|
||||
|
||||
def _run_mineru_api(self, input_path: Path, output_dir: Path, method: str = "auto", backend: str = "pipeline", lang: Optional[str] = None, callback: Optional[Callable] = None):
|
||||
def _run_mineru_api(self, input_path: Path, output_dir: Path, method: str = "auto", backend: str = "pipeline", lang: Optional[str] = None, callback: Optional[Callable] = None,
|
||||
formula_enable: bool = True, table_enable: bool = True):
|
||||
output_zip_path = os.path.join(str(output_dir), "output.zip")
|
||||
|
||||
pdf_file_path = str(input_path)
|
||||
|
|
@ -201,7 +203,9 @@ class MinerUParser(RAGFlowPdfParser):
|
|||
raise RuntimeError(f"[MinerU] PDF file not exists: {pdf_file_path}")
|
||||
|
||||
pdf_file_name = Path(pdf_file_path).stem.strip()
|
||||
output_path = os.path.join(str(output_dir), pdf_file_name, method)
|
||||
# FIX: MinerU API outputs to 'vlm/' when using VLM backend, not 'auto/'
|
||||
output_subfolder = "vlm" if backend.startswith("vlm") else method
|
||||
output_path = os.path.join(str(output_dir), pdf_file_name, output_subfolder)
|
||||
os.makedirs(output_path, exist_ok=True)
|
||||
|
||||
files = {"files": (pdf_file_name + ".pdf", open(pdf_file_path, "rb"), "application/pdf")}
|
||||
|
|
@ -211,8 +215,8 @@ class MinerUParser(RAGFlowPdfParser):
|
|||
"lang_list": lang,
|
||||
"backend": backend,
|
||||
"parse_method": method,
|
||||
"formula_enable": True,
|
||||
"table_enable": True,
|
||||
"formula_enable": formula_enable,
|
||||
"table_enable": table_enable,
|
||||
"server_url": None,
|
||||
"return_md": True,
|
||||
"return_middle_json": True,
|
||||
|
|
@ -224,6 +228,11 @@ class MinerUParser(RAGFlowPdfParser):
|
|||
"end_page_id": 99999,
|
||||
}
|
||||
|
||||
# DEBUG: Log the exact request data being sent to MinerU
|
||||
self.logger.info(f"[MinerU DEBUG] Request URL: {self.mineru_api}/file_parse")
|
||||
self.logger.info(f"[MinerU DEBUG] Request data: {json.dumps(data, indent=2)}")
|
||||
self.logger.info(f"[MinerU DEBUG] File: {pdf_file_name}.pdf")
|
||||
|
||||
headers = {"Accept": "application/json"}
|
||||
try:
|
||||
self.logger.info(f"[MinerU] invoke api: {self.mineru_api}/file_parse")
|
||||
|
|
@ -581,6 +590,8 @@ class MinerUParser(RAGFlowPdfParser):
|
|||
server_url: Optional[str] = None,
|
||||
delete_output: bool = True,
|
||||
parse_method: str = "raw",
|
||||
formula_enable: bool = True,
|
||||
table_enable: bool = True,
|
||||
) -> tuple:
|
||||
import shutil
|
||||
|
||||
|
|
@ -625,7 +636,8 @@ class MinerUParser(RAGFlowPdfParser):
|
|||
self.__images__(pdf, zoomin=1)
|
||||
|
||||
try:
|
||||
self._run_mineru(pdf, out_dir, method=method, backend=backend, lang=lang, server_url=server_url, callback=callback)
|
||||
self._run_mineru(pdf, out_dir, method=method, backend=backend, lang=lang, server_url=server_url, callback=callback,
|
||||
formula_enable=formula_enable, table_enable=table_enable)
|
||||
outputs = self._read_output(out_dir, pdf.stem, method=method, backend=backend)
|
||||
self.logger.info(f"[MinerU] Parsed {len(outputs)} blocks from PDF.")
|
||||
if callback:
|
||||
|
|
|
|||
|
|
@ -39,6 +39,52 @@ from deepdoc.parser.tcadp_parser import TCADPParser
|
|||
from rag.nlp import concat_img, find_codec, naive_merge, naive_merge_with_images, naive_merge_docx, rag_tokenizer, tokenize_chunks, tokenize_chunks_with_images, tokenize_table, attach_media_context
|
||||
|
||||
|
||||
# MinerU OCR language mapping (RAGFlow language -> MinerU OCR code)
|
||||
# See: https://github.com/opendatalab/MinerU for supported languages
|
||||
MINERU_LANG_MAP = {
|
||||
"chinese": "ch",
|
||||
"english": "en",
|
||||
"russian": "cyrillic",
|
||||
"ukrainian": "cyrillic",
|
||||
"belarusian": "cyrillic",
|
||||
"bulgarian": "cyrillic",
|
||||
"serbian": "cyrillic",
|
||||
"korean": "korean",
|
||||
"japanese": "japan",
|
||||
"arabic": "arabic",
|
||||
"thai": "th",
|
||||
"greek": "el",
|
||||
"hindi": "devanagari",
|
||||
"tamil": "ta",
|
||||
"telugu": "te",
|
||||
"kannada": "ka",
|
||||
"georgian": "ka",
|
||||
"vietnamese": "latin",
|
||||
"french": "latin",
|
||||
"german": "latin",
|
||||
"spanish": "latin",
|
||||
"italian": "latin",
|
||||
"portuguese": "latin",
|
||||
"polish": "latin",
|
||||
"dutch": "latin",
|
||||
"turkish": "latin",
|
||||
}
|
||||
|
||||
|
||||
def _get_mineru_lang(lang: str) -> str:
|
||||
"""Convert RAGFlow language name to MinerU OCR language code.
|
||||
|
||||
Args:
|
||||
lang: RAGFlow language name (e.g., "Chinese", "Russian", "English")
|
||||
|
||||
Returns:
|
||||
MinerU OCR language code (e.g., "ch", "cyrillic", "en")
|
||||
"""
|
||||
if not lang:
|
||||
return "latin"
|
||||
return MINERU_LANG_MAP.get(lang.lower(), "latin")
|
||||
|
||||
|
||||
def by_deepdoc(filename, binary=None, from_page=0, to_page=100000, lang="Chinese", callback=None, pdf_cls = None ,**kwargs):
|
||||
callback = callback
|
||||
binary = binary
|
||||
|
|
@ -60,6 +106,15 @@ def by_mineru(filename, binary=None, from_page=0, to_page=100000, lang="Chinese"
|
|||
parse_method = kwargs.get("parse_method", "raw")
|
||||
mineru_llm_name = kwargs.get("mineru_llm_name")
|
||||
tenant_id = kwargs.get("tenant_id")
|
||||
|
||||
# Get MinerU-specific settings from parser_config
|
||||
parser_config = kwargs.get("parser_config", {})
|
||||
mineru_lang = parser_config.get("mineru_lang") or _get_mineru_lang(lang)
|
||||
formula_enable = parser_config.get("mineru_formula_enable", True)
|
||||
table_enable = parser_config.get("mineru_table_enable", True)
|
||||
|
||||
logging.info(f"[MinerU] by_mineru called with lang={lang}, parser_config mineru_lang={parser_config.get('mineru_lang')}, resolved mineru_lang={mineru_lang}")
|
||||
logging.info(f"[MinerU] formula_enable={formula_enable}, table_enable={table_enable}")
|
||||
|
||||
pdf_parser = None
|
||||
if tenant_id:
|
||||
|
|
@ -85,6 +140,9 @@ def by_mineru(filename, binary=None, from_page=0, to_page=100000, lang="Chinese"
|
|||
binary=binary,
|
||||
callback=callback,
|
||||
parse_method=parse_method,
|
||||
lang=mineru_lang,
|
||||
formula_enable=formula_enable,
|
||||
table_enable=table_enable,
|
||||
)
|
||||
return sections, tables, pdf_parser
|
||||
except Exception as e:
|
||||
|
|
|
|||
|
|
@ -13,10 +13,11 @@
|
|||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
from typing import Any, Optional, Tuple
|
||||
from io import BytesIO
|
||||
from os import PathLike
|
||||
from typing import Callable, Optional
|
||||
|
||||
from deepdoc.parser.mineru_parser import MinerUParser
|
||||
|
||||
|
|
@ -25,7 +26,22 @@ class Base:
|
|||
def __init__(self, key: str | dict, model_name: str, **kwargs):
|
||||
self.model_name = model_name
|
||||
|
||||
def parse_pdf(self, filepath: str, binary=None, **kwargs) -> Tuple[Any, Any]:
|
||||
def parse_pdf(
|
||||
self,
|
||||
filepath: str | PathLike[str],
|
||||
binary: BytesIO | bytes,
|
||||
callback: Optional[Callable] = None,
|
||||
*,
|
||||
output_dir: Optional[str] = None,
|
||||
backend: str = "pipeline",
|
||||
lang: Optional[str] = None,
|
||||
method: str = "auto",
|
||||
server_url: Optional[str] = None,
|
||||
delete_output: bool = True,
|
||||
parse_method: str = "raw",
|
||||
formula_enable: bool = True,
|
||||
table_enable: bool = True,
|
||||
) -> tuple:
|
||||
raise NotImplementedError("Please implement parse_pdf!")
|
||||
|
||||
|
||||
|
|
@ -34,30 +50,40 @@ class MinerUOcrModel(Base, MinerUParser):
|
|||
|
||||
def __init__(self, key: str | dict, model_name: str, **kwargs):
|
||||
Base.__init__(self, key, model_name, **kwargs)
|
||||
config = {}
|
||||
if key:
|
||||
try:
|
||||
config = json.loads(key)
|
||||
except Exception:
|
||||
config = {}
|
||||
config = config["api_key"]
|
||||
self.mineru_api = config.get("mineru_apiserver", os.environ.get("MINERU_APISERVER", ""))
|
||||
self.mineru_output_dir = config.get("mineru_output_dir", os.environ.get("MINERU_OUTPUT_DIR", ""))
|
||||
self.mineru_backend = config.get("mineru_backend", os.environ.get("MINERU_BACKEND", "pipeline"))
|
||||
self.mineru_server_url = config.get("mineru_server_url", os.environ.get("MINERU_SERVER_URL", ""))
|
||||
self.mineru_delete_output = bool(int(config.get("mineru_delete_output", os.environ.get("MINERU_DELETE_OUTPUT", 1))))
|
||||
|
||||
# Use environment variables directly - no database config needed
|
||||
self.mineru_api = os.environ.get("MINERU_APISERVER", "")
|
||||
self.mineru_output_dir = os.environ.get("MINERU_OUTPUT_DIR", "")
|
||||
self.mineru_backend = os.environ.get("MINERU_BACKEND", "pipeline")
|
||||
self.mineru_server_url = os.environ.get("MINERU_SERVER_URL", "")
|
||||
self.mineru_delete_output = os.environ.get("MINERU_DELETE_OUTPUT", "1") == "1"
|
||||
self.mineru_executable = os.environ.get("MINERU_EXECUTABLE", "mineru")
|
||||
|
||||
logging.info(f"Parsed MinerU config: {config}")
|
||||
logging.info(f"MinerU config from env: api={self.mineru_api}, backend={self.mineru_backend}, server_url={self.mineru_server_url}")
|
||||
|
||||
MinerUParser.__init__(self, mineru_path=self.mineru_executable, mineru_api=self.mineru_api, mineru_server_url=self.mineru_server_url)
|
||||
|
||||
def check_available(self, backend: Optional[str] = None, server_url: Optional[str] = None) -> Tuple[bool, str]:
|
||||
def check_available(self, backend: Optional[str] = None, server_url: Optional[str] = None) -> tuple[bool, str]:
|
||||
backend = backend or self.mineru_backend
|
||||
server_url = server_url or self.mineru_server_url
|
||||
return self.check_installation(backend=backend, server_url=server_url)
|
||||
|
||||
def parse_pdf(self, filepath: str, binary=None, callback=None, parse_method: str = "raw", **kwargs):
|
||||
def parse_pdf(
|
||||
self,
|
||||
filepath: str | PathLike[str],
|
||||
binary: BytesIO | bytes,
|
||||
callback: Optional[Callable] = None,
|
||||
*,
|
||||
output_dir: Optional[str] = None,
|
||||
backend: str = "pipeline",
|
||||
lang: Optional[str] = None,
|
||||
method: str = "auto",
|
||||
server_url: Optional[str] = None,
|
||||
delete_output: bool = True,
|
||||
parse_method: str = "raw",
|
||||
formula_enable: bool = True,
|
||||
table_enable: bool = True,
|
||||
) -> tuple:
|
||||
ok, reason = self.check_available()
|
||||
if not ok:
|
||||
raise RuntimeError(f"MinerU not found or server not accessible: {reason}. Please install it via: pip install -U 'mineru[core]'.")
|
||||
|
|
@ -65,12 +91,16 @@ class MinerUOcrModel(Base, MinerUParser):
|
|||
sections, tables = MinerUParser.parse_pdf(
|
||||
self,
|
||||
filepath=filepath,
|
||||
binary=binary,
|
||||
binary=binary, # type: ignore[arg-type]
|
||||
callback=callback,
|
||||
output_dir=self.mineru_output_dir,
|
||||
backend=self.mineru_backend,
|
||||
server_url=self.mineru_server_url,
|
||||
delete_output=self.mineru_delete_output,
|
||||
output_dir=output_dir or self.mineru_output_dir,
|
||||
backend=backend or self.mineru_backend,
|
||||
lang=lang,
|
||||
method=method,
|
||||
server_url=server_url or self.mineru_server_url,
|
||||
delete_output=delete_output if delete_output is not None else self.mineru_delete_output,
|
||||
parse_method=parse_method,
|
||||
formula_enable=formula_enable,
|
||||
table_enable=table_enable,
|
||||
)
|
||||
return sections, tables
|
||||
|
|
|
|||
156
web/src/components/mineru-config-form-field.tsx
Normal file
156
web/src/components/mineru-config-form-field.tsx
Normal file
|
|
@ -0,0 +1,156 @@
|
|||
import { useFormContext, useWatch } from 'react-hook-form';
|
||||
import {
|
||||
FormControl,
|
||||
FormField,
|
||||
FormItem,
|
||||
FormLabel,
|
||||
FormMessage,
|
||||
} from './ui/form';
|
||||
import {
|
||||
Select,
|
||||
SelectContent,
|
||||
SelectItem,
|
||||
SelectTrigger,
|
||||
SelectValue,
|
||||
} from './ui/select';
|
||||
import { Switch } from './ui/switch';
|
||||
|
||||
// MinerU OCR language options with human-readable labels
|
||||
const MINERU_LANG_OPTIONS = [
|
||||
{ value: 'ch', label: 'Chinese (Simplified)' },
|
||||
{ value: 'en', label: 'English' },
|
||||
{ value: 'cyrillic', label: 'Cyrillic (Russian, Ukrainian, etc.)' },
|
||||
{ value: 'latin', label: 'Latin (French, German, Spanish, etc.)' },
|
||||
{ value: 'korean', label: 'Korean' },
|
||||
{ value: 'japan', label: 'Japanese' },
|
||||
{ value: 'arabic', label: 'Arabic' },
|
||||
{ value: 'th', label: 'Thai' },
|
||||
{ value: 'el', label: 'Greek' },
|
||||
{ value: 'devanagari', label: 'Hindi (Devanagari)' },
|
||||
{ value: 'ta', label: 'Tamil' },
|
||||
{ value: 'te', label: 'Telugu' },
|
||||
{ value: 'ka', label: 'Georgian/Kannada' },
|
||||
{ value: 'chinese_cht', label: 'Chinese (Traditional)' },
|
||||
];
|
||||
|
||||
/**
|
||||
* Check if the current layout recognizer is MinerU
|
||||
*/
|
||||
function useIsMineruSelected() {
|
||||
const form = useFormContext();
|
||||
const layoutRecognize = useWatch({
|
||||
control: form.control,
|
||||
name: 'parser_config.layout_recognize',
|
||||
});
|
||||
|
||||
// MinerU models have format like "model-name@MinerU"
|
||||
return (
|
||||
typeof layoutRecognize === 'string' &&
|
||||
(layoutRecognize.toLowerCase().includes('mineru') ||
|
||||
layoutRecognize.toLowerCase().endsWith('@mineru'))
|
||||
);
|
||||
}
|
||||
|
||||
export function MineruConfigFormField() {
|
||||
const form = useFormContext();
|
||||
const isMineruSelected = useIsMineruSelected();
|
||||
|
||||
if (!isMineruSelected) {
|
||||
return null;
|
||||
}
|
||||
|
||||
return (
|
||||
<div className="space-y-4 p-4 border rounded-lg bg-muted/50">
|
||||
<div className="text-sm font-medium text-foreground">
|
||||
MinerU OCR Settings
|
||||
</div>
|
||||
|
||||
{/* MinerU Language Selection */}
|
||||
<FormField
|
||||
control={form.control}
|
||||
name="parser_config.mineru_lang"
|
||||
render={({ field }) => (
|
||||
<FormItem className="items-center space-y-0">
|
||||
<div className="flex items-center">
|
||||
<FormLabel className="text-sm text-text-secondary whitespace-wrap w-1/3">
|
||||
OCR Language
|
||||
</FormLabel>
|
||||
<div className="w-2/3">
|
||||
<FormControl>
|
||||
<Select
|
||||
value={field.value || 'latin'}
|
||||
onValueChange={field.onChange}
|
||||
>
|
||||
<SelectTrigger>
|
||||
<SelectValue placeholder="Select language" />
|
||||
</SelectTrigger>
|
||||
<SelectContent>
|
||||
{MINERU_LANG_OPTIONS.map((option) => (
|
||||
<SelectItem key={option.value} value={option.value}>
|
||||
{option.label}
|
||||
</SelectItem>
|
||||
))}
|
||||
</SelectContent>
|
||||
</Select>
|
||||
</FormControl>
|
||||
</div>
|
||||
</div>
|
||||
<FormMessage />
|
||||
</FormItem>
|
||||
)}
|
||||
/>
|
||||
|
||||
{/* Formula Recognition Toggle */}
|
||||
<FormField
|
||||
control={form.control}
|
||||
name="parser_config.mineru_formula_enable"
|
||||
render={({ field }) => (
|
||||
<FormItem className="items-center space-y-0">
|
||||
<div className="flex items-center">
|
||||
<FormLabel className="text-sm text-text-secondary whitespace-wrap w-1/3">
|
||||
Formula Recognition
|
||||
</FormLabel>
|
||||
<div className="w-2/3">
|
||||
<FormControl>
|
||||
<Switch
|
||||
checked={field.value ?? true}
|
||||
onCheckedChange={field.onChange}
|
||||
/>
|
||||
</FormControl>
|
||||
</div>
|
||||
</div>
|
||||
<div className="text-xs text-muted-foreground mt-1 ml-[33.33%]">
|
||||
Disable for Cyrillic/stylized fonts to avoid incorrect LaTeX
|
||||
conversion
|
||||
</div>
|
||||
<FormMessage />
|
||||
</FormItem>
|
||||
)}
|
||||
/>
|
||||
|
||||
{/* Table Recognition Toggle */}
|
||||
<FormField
|
||||
control={form.control}
|
||||
name="parser_config.mineru_table_enable"
|
||||
render={({ field }) => (
|
||||
<FormItem className="items-center space-y-0">
|
||||
<div className="flex items-center">
|
||||
<FormLabel className="text-sm text-text-secondary whitespace-wrap w-1/3">
|
||||
Table Recognition
|
||||
</FormLabel>
|
||||
<div className="w-2/3">
|
||||
<FormControl>
|
||||
<Switch
|
||||
checked={field.value ?? true}
|
||||
onCheckedChange={field.onChange}
|
||||
/>
|
||||
</FormControl>
|
||||
</div>
|
||||
</div>
|
||||
<FormMessage />
|
||||
</FormItem>
|
||||
)}
|
||||
/>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
|
@ -67,6 +67,11 @@ export interface ParserConfig {
|
|||
tag_kb_ids?: string[];
|
||||
topn_tags?: number;
|
||||
graphrag?: { use_graphrag?: boolean };
|
||||
// MinerU-specific settings
|
||||
mineru_lang?: string;
|
||||
mineru_formula_enable?: boolean;
|
||||
mineru_table_enable?: boolean;
|
||||
mineru_parse_method?: string;
|
||||
}
|
||||
|
||||
export interface IKnowledgeFileParserConfig {
|
||||
|
|
|
|||
|
|
@ -3,6 +3,7 @@ import {
|
|||
AutoQuestionsFormField,
|
||||
} from '@/components/auto-keywords-form-field';
|
||||
import { LayoutRecognizeFormField } from '@/components/layout-recognize-form-field';
|
||||
import { MineruConfigFormField } from '@/components/mineru-config-form-field';
|
||||
import {
|
||||
ConfigurationFormContainer,
|
||||
MainContainer,
|
||||
|
|
@ -13,6 +14,7 @@ export function BookConfiguration() {
|
|||
<MainContainer>
|
||||
<ConfigurationFormContainer>
|
||||
<LayoutRecognizeFormField></LayoutRecognizeFormField>
|
||||
<MineruConfigFormField></MineruConfigFormField>
|
||||
</ConfigurationFormContainer>
|
||||
|
||||
<ConfigurationFormContainer>
|
||||
|
|
|
|||
|
|
@ -6,6 +6,7 @@ import { DelimiterFormField } from '@/components/delimiter-form-field';
|
|||
import { ExcelToHtmlFormField } from '@/components/excel-to-html-form-field';
|
||||
import { LayoutRecognizeFormField } from '@/components/layout-recognize-form-field';
|
||||
import { MaxTokenNumberFormField } from '@/components/max-token-number-from-field';
|
||||
import { MineruConfigFormField } from '@/components/mineru-config-form-field';
|
||||
import {
|
||||
ConfigurationFormContainer,
|
||||
MainContainer,
|
||||
|
|
@ -17,6 +18,7 @@ export function NaiveConfiguration() {
|
|||
<MainContainer>
|
||||
<ConfigurationFormContainer>
|
||||
<LayoutRecognizeFormField></LayoutRecognizeFormField>
|
||||
<MineruConfigFormField></MineruConfigFormField>
|
||||
<MaxTokenNumberFormField initialValue={512}></MaxTokenNumberFormField>
|
||||
<DelimiterFormField></DelimiterFormField>
|
||||
<EnableTocToggle />
|
||||
|
|
|
|||
|
|
@ -3,6 +3,7 @@ import {
|
|||
AutoQuestionsFormField,
|
||||
} from '@/components/auto-keywords-form-field';
|
||||
import { LayoutRecognizeFormField } from '@/components/layout-recognize-form-field';
|
||||
import { MineruConfigFormField } from '@/components/mineru-config-form-field';
|
||||
import {
|
||||
ConfigurationFormContainer,
|
||||
MainContainer,
|
||||
|
|
@ -13,6 +14,7 @@ export function PaperConfiguration() {
|
|||
<MainContainer>
|
||||
<ConfigurationFormContainer>
|
||||
<LayoutRecognizeFormField></LayoutRecognizeFormField>
|
||||
<MineruConfigFormField></MineruConfigFormField>
|
||||
</ConfigurationFormContainer>
|
||||
|
||||
<ConfigurationFormContainer>
|
||||
|
|
|
|||
|
|
@ -30,6 +30,10 @@ export const formSchema = z
|
|||
topn_tags: z.number().optional(),
|
||||
toc_extraction: z.boolean().optional(),
|
||||
overlapped_percent: z.number().optional(),
|
||||
// MinerU-specific settings
|
||||
mineru_lang: z.string().optional(),
|
||||
mineru_formula_enable: z.boolean().optional(),
|
||||
mineru_table_enable: z.boolean().optional(),
|
||||
raptor: z
|
||||
.object({
|
||||
use_raptor: z.boolean().optional(),
|
||||
|
|
|
|||
|
|
@ -58,7 +58,7 @@ export function SavingButton() {
|
|||
onClick={() => {
|
||||
(async () => {
|
||||
try {
|
||||
let beValid = await form.formControl.trigger();
|
||||
let beValid = await form.trigger();
|
||||
if (beValid) {
|
||||
form.handleSubmit(async (values) => {
|
||||
console.log('saveKnowledgeConfiguration: ', values);
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue