Improve handling of position extraction in manual.py

Refactor position extraction logic to handle empty cases.
This commit is contained in:
Magicbook1108 2025-12-09 17:23:17 +08:00 committed by GitHub
parent d992b46c3b
commit b970d4b425
No known key found for this signature in database
GPG key ID: B5690EEEBB952194

View file

@ -229,11 +229,12 @@ def chunk(filename, binary=None, from_page=0, to_page=100000,
txt, layoutno, poss = section txt, layoutno, poss = section
if isinstance(poss, str): if isinstance(poss, str):
if poss:
poss = pdf_parser.extract_positions(poss) poss = pdf_parser.extract_positions(poss)
first = poss[0] # tuple: ([pn], x1, x2, y1, y2) first = poss[0] # tuple: ([pn], x1, x2, y1, y2)
pn = first[0] pn = first[0]
if isinstance(pn, list): if isinstance(pn, list) and pn:
pn = pn[0] # [pn] -> pn pn = pn[0] # [pn] -> pn
poss[0] = (pn, *first[1:]) poss[0] = (pn, *first[1:])