Improve handling of position extraction in manual.py
Refactor position extraction logic to handle empty cases.
This commit is contained in:
parent
d992b46c3b
commit
b970d4b425
1 changed files with 9 additions and 8 deletions
|
|
@ -229,13 +229,14 @@ def chunk(filename, binary=None, from_page=0, to_page=100000,
|
||||||
|
|
||||||
txt, layoutno, poss = section
|
txt, layoutno, poss = section
|
||||||
if isinstance(poss, str):
|
if isinstance(poss, str):
|
||||||
poss = pdf_parser.extract_positions(poss)
|
if poss:
|
||||||
first = poss[0] # tuple: ([pn], x1, x2, y1, y2)
|
poss = pdf_parser.extract_positions(poss)
|
||||||
pn = first[0]
|
first = poss[0] # tuple: ([pn], x1, x2, y1, y2)
|
||||||
|
pn = first[0]
|
||||||
if isinstance(pn, list):
|
|
||||||
pn = pn[0] # [pn] -> pn
|
if isinstance(pn, list) and pn:
|
||||||
poss[0] = (pn, *first[1:])
|
pn = pn[0] # [pn] -> pn
|
||||||
|
poss[0] = (pn, *first[1:])
|
||||||
|
|
||||||
return (txt, layoutno, poss)
|
return (txt, layoutno, poss)
|
||||||
|
|
||||||
|
|
@ -345,4 +346,4 @@ if __name__ == "__main__":
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
chunk(sys.argv[1], callback=dummy)
|
chunk(sys.argv[1], callback=dummy)
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue