fix(mineru): robust coordinate conversion in crop() fallback for 0-1000 tags
- Implement coordinate conversion (normalized -> pixels) in crop() fallback loop - Ensures correct cropping from page_images when cache lookup fails - Works consistently with _raw_line_tag (0-1000 normalized) changes
This commit is contained in:
parent
8049cb9275
commit
1c7bc47579
1 changed files with 16 additions and 0 deletions
|
|
@ -392,6 +392,22 @@ class MinerUParser(RAGFlowPdfParser):
|
|||
return None, None
|
||||
return
|
||||
|
||||
# Convert 0-1000 normalized coordinates to pixels using page dimensions
|
||||
# This ensures compatibility with GAP/padding logic and correct cropping
|
||||
pixel_poss = []
|
||||
for pns, left, right, top, bottom in poss:
|
||||
if not pns: continue
|
||||
page_idx = pns[0]
|
||||
if not (0 <= page_idx < page_count): continue
|
||||
|
||||
W, H = self.page_images[page_idx].size
|
||||
x0 = left * W / 1000.0
|
||||
x1 = right * W / 1000.0
|
||||
y0 = top * H / 1000.0
|
||||
y1 = bottom * H / 1000.0
|
||||
pixel_poss.append((pns, x0, x1, y0, y1))
|
||||
poss = pixel_poss
|
||||
|
||||
# 避免超长拼接图 - 只取首个位置
|
||||
if len(poss) > 1:
|
||||
poss = [poss[0]]
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue