Fix: missing parameters in by_plaintext method for PDF naive mode
This commit is contained in:
parent
3fe71ab7dd
commit
ae22d053c7
1 changed files with 7 additions and 1 deletions
|
|
@ -111,7 +111,13 @@ def by_tcadp(filename, binary=None, from_page=0, to_page=100000, lang="Chinese",
|
||||||
|
|
||||||
|
|
||||||
def by_plaintext(filename, binary=None, from_page=0, to_page=100000, callback=None, **kwargs):
|
def by_plaintext(filename, binary=None, from_page=0, to_page=100000, callback=None, **kwargs):
|
||||||
if kwargs.get("layout_recognizer", "") == "Plain Text":
|
parser_config = kwargs.get("parser_config", {})
|
||||||
|
layout_recognizer = parser_config.get("layout_recognize", "DeepDOC")
|
||||||
|
|
||||||
|
if isinstance(layout_recognizer, bool):
|
||||||
|
layout_recognizer = "DeepDOC" if layout_recognizer else "Plain Text"
|
||||||
|
|
||||||
|
if layout_recognizer == "Plain Text":
|
||||||
pdf_parser = PlainParser()
|
pdf_parser = PlainParser()
|
||||||
else:
|
else:
|
||||||
vision_model = LLMBundle(kwargs["tenant_id"], LLMType.IMAGE2TEXT, llm_name=kwargs.get("layout_recognizer", ""), lang=kwargs.get("lang", "Chinese"))
|
vision_model = LLMBundle(kwargs["tenant_id"], LLMType.IMAGE2TEXT, llm_name=kwargs.get("layout_recognizer", ""), lang=kwargs.get("lang", "Chinese"))
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue