Fix: match MinerU content types using checking string literals

This commit is contained in:
少卿 2025-12-09 19:15:41 +08:00
parent 136d43b0fa
commit 0d298cd5c9

View file

@ -849,7 +849,7 @@ class MinerUParser(RAGFlowPdfParser):
return
img_root = subdir / "generated_images"
img_root.mkdir(parents=True, exist_ok=True)
text_types = {MinerUContentType.TEXT, MinerUContentType.LIST, MinerUContentType.CODE, MinerUContentType.HEADER}
text_types = {"text", "list", "code", "header"}
generated = 0
for idx, item in enumerate(outputs):
if item.get("type") not in text_types: