Improve handling of position extraction in manual.py
Refactor position extraction logic to handle empty cases.
This commit is contained in:
parent
d992b46c3b
commit
b970d4b425
1 changed files with 9 additions and 8 deletions
|
|
@ -229,13 +229,14 @@ def chunk(filename, binary=None, from_page=0, to_page=100000,
|
|||
|
||||
txt, layoutno, poss = section
|
||||
if isinstance(poss, str):
|
||||
poss = pdf_parser.extract_positions(poss)
|
||||
first = poss[0] # tuple: ([pn], x1, x2, y1, y2)
|
||||
pn = first[0]
|
||||
|
||||
if isinstance(pn, list):
|
||||
pn = pn[0] # [pn] -> pn
|
||||
poss[0] = (pn, *first[1:])
|
||||
if poss:
|
||||
poss = pdf_parser.extract_positions(poss)
|
||||
first = poss[0] # tuple: ([pn], x1, x2, y1, y2)
|
||||
pn = first[0]
|
||||
|
||||
if isinstance(pn, list) and pn:
|
||||
pn = pn[0] # [pn] -> pn
|
||||
poss[0] = (pn, *first[1:])
|
||||
|
||||
return (txt, layoutno, poss)
|
||||
|
||||
|
|
@ -345,4 +346,4 @@ if __name__ == "__main__":
|
|||
pass
|
||||
|
||||
|
||||
chunk(sys.argv[1], callback=dummy)
|
||||
chunk(sys.argv[1], callback=dummy)
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue