diff --git a/rag/app/manual.py b/rag/app/manual.py index 272f7148f..3fcd7b72f 100644 --- a/rag/app/manual.py +++ b/rag/app/manual.py @@ -229,13 +229,14 @@ def chunk(filename, binary=None, from_page=0, to_page=100000, txt, layoutno, poss = section if isinstance(poss, str): - poss = pdf_parser.extract_positions(poss) - first = poss[0] # tuple: ([pn], x1, x2, y1, y2) - pn = first[0] - - if isinstance(pn, list): - pn = pn[0] # [pn] -> pn - poss[0] = (pn, *first[1:]) + if poss: + poss = pdf_parser.extract_positions(poss) + first = poss[0] # tuple: ([pn], x1, x2, y1, y2) + pn = first[0] + + if isinstance(pn, list) and pn: + pn = pn[0] # [pn] -> pn + poss[0] = (pn, *first[1:]) return (txt, layoutno, poss) @@ -345,4 +346,4 @@ if __name__ == "__main__": pass - chunk(sys.argv[1], callback=dummy) \ No newline at end of file + chunk(sys.argv[1], callback=dummy)