From 998d15bca14b42efe5469532b9cf9ee2307fe175 Mon Sep 17 00:00:00 2001 From: lih Date: Thu, 20 Nov 2025 15:58:48 +0800 Subject: [PATCH] Fix: missing parameters in by_plaintext method for PDF naive mode --- rag/app/book.py | 1 + rag/app/laws.py | 1 + rag/app/manual.py | 1 + rag/app/naive.py | 8 +------- rag/app/one.py | 1 + rag/app/presentation.py | 1 + 6 files changed, 6 insertions(+), 7 deletions(-) diff --git a/rag/app/book.py b/rag/app/book.py index 5ea28d40d..5bdaec72d 100644 --- a/rag/app/book.py +++ b/rag/app/book.py @@ -113,6 +113,7 @@ def chunk(filename, binary=None, from_page=0, to_page=100000, lang = lang, callback = callback, pdf_cls = Pdf, + layout_recognizer = layout_recognizer, **kwargs ) diff --git a/rag/app/laws.py b/rag/app/laws.py index dd97e4e3a..ba2592833 100644 --- a/rag/app/laws.py +++ b/rag/app/laws.py @@ -172,6 +172,7 @@ def chunk(filename, binary=None, from_page=0, to_page=100000, lang = lang, callback = callback, pdf_cls = Pdf, + layout_recognizer = layout_recognizer, **kwargs ) diff --git a/rag/app/manual.py b/rag/app/manual.py index 5808e2498..4f9de40c7 100644 --- a/rag/app/manual.py +++ b/rag/app/manual.py @@ -213,6 +213,7 @@ def chunk(filename, binary=None, from_page=0, to_page=100000, lang = lang, callback = callback, pdf_cls = Pdf, + layout_recognizer = layout_recognizer, **kwargs ) diff --git a/rag/app/naive.py b/rag/app/naive.py index 099e2f6fd..49dca17af 100644 --- a/rag/app/naive.py +++ b/rag/app/naive.py @@ -111,13 +111,7 @@ def by_tcadp(filename, binary=None, from_page=0, to_page=100000, lang="Chinese", def by_plaintext(filename, binary=None, from_page=0, to_page=100000, callback=None, **kwargs): - parser_config = kwargs.get("parser_config", {}) - layout_recognizer = parser_config.get("layout_recognize", "DeepDOC") - - if isinstance(layout_recognizer, bool): - layout_recognizer = "DeepDOC" if layout_recognizer else "Plain Text" - - if layout_recognizer == "Plain Text": + if kwargs.get("layout_recognizer", "") == "Plain Text": pdf_parser = PlainParser() else: vision_model = LLMBundle(kwargs["tenant_id"], LLMType.IMAGE2TEXT, llm_name=kwargs.get("layout_recognizer", ""), lang=kwargs.get("lang", "Chinese")) diff --git a/rag/app/one.py b/rag/app/one.py index 5574aaa51..7cd1bb785 100644 --- a/rag/app/one.py +++ b/rag/app/one.py @@ -99,6 +99,7 @@ def chunk(filename, binary=None, from_page=0, to_page=100000, lang = lang, callback = callback, pdf_cls = Pdf, + layout_recognizer = layout_recognizer, **kwargs ) diff --git a/rag/app/presentation.py b/rag/app/presentation.py index cd1d308ec..6a872528f 100644 --- a/rag/app/presentation.py +++ b/rag/app/presentation.py @@ -142,6 +142,7 @@ def chunk(filename, binary=None, from_page=0, to_page=100000, lang = lang, callback = callback, pdf_cls = Pdf, + layout_recognizer = layout_recognizer, **kwargs )