fix(mineru): robust coordinate conversion in crop() fallback for 0-1000 tags
- Implement coordinate conversion (normalized -> pixels) in crop() fallback loop - Ensures correct cropping from page_images when cache lookup fails - Works consistently with _raw_line_tag (0-1000 normalized) changes
This commit is contained in:
parent
8049cb9275
commit
1c7bc47579
1 changed files with 16 additions and 0 deletions
|
|
@ -392,6 +392,22 @@ class MinerUParser(RAGFlowPdfParser):
|
||||||
return None, None
|
return None, None
|
||||||
return
|
return
|
||||||
|
|
||||||
|
# Convert 0-1000 normalized coordinates to pixels using page dimensions
|
||||||
|
# This ensures compatibility with GAP/padding logic and correct cropping
|
||||||
|
pixel_poss = []
|
||||||
|
for pns, left, right, top, bottom in poss:
|
||||||
|
if not pns: continue
|
||||||
|
page_idx = pns[0]
|
||||||
|
if not (0 <= page_idx < page_count): continue
|
||||||
|
|
||||||
|
W, H = self.page_images[page_idx].size
|
||||||
|
x0 = left * W / 1000.0
|
||||||
|
x1 = right * W / 1000.0
|
||||||
|
y0 = top * H / 1000.0
|
||||||
|
y1 = bottom * H / 1000.0
|
||||||
|
pixel_poss.append((pns, x0, x1, y0, y1))
|
||||||
|
poss = pixel_poss
|
||||||
|
|
||||||
# 避免超长拼接图 - 只取首个位置
|
# 避免超长拼接图 - 只取首个位置
|
||||||
if len(poss) > 1:
|
if len(poss) > 1:
|
||||||
poss = [poss[0]]
|
poss = [poss[0]]
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue