Improve handling of position extraction in manual.py

Refactor position extraction logic to handle empty cases.
This commit is contained in:
Magicbook1108 2025-12-09 17:23:17 +08:00 committed by GitHub
parent d992b46c3b
commit b970d4b425
No known key found for this signature in database
GPG key ID: B5690EEEBB952194

View file

@ -229,13 +229,14 @@ def chunk(filename, binary=None, from_page=0, to_page=100000,
txt, layoutno, poss = section
if isinstance(poss, str):
poss = pdf_parser.extract_positions(poss)
first = poss[0] # tuple: ([pn], x1, x2, y1, y2)
pn = first[0]
if isinstance(pn, list):
pn = pn[0] # [pn] -> pn
poss[0] = (pn, *first[1:])
if poss:
poss = pdf_parser.extract_positions(poss)
first = poss[0] # tuple: ([pn], x1, x2, y1, y2)
pn = first[0]
if isinstance(pn, list) and pn:
pn = pn[0] # [pn] -> pn
poss[0] = (pn, *first[1:])
return (txt, layoutno, poss)
@ -345,4 +346,4 @@ if __name__ == "__main__":
pass
chunk(sys.argv[1], callback=dummy)
chunk(sys.argv[1], callback=dummy)