update
This commit is contained in:
parent
ff40d37251
commit
41bdf792ae
1 changed files with 1 additions and 8 deletions
|
|
@ -379,7 +379,7 @@ class RAGFlowPdfParser:
|
||||||
x0s = []
|
x0s = []
|
||||||
for x in x0s_raw:
|
for x in x0s_raw:
|
||||||
if abs(x - min_x0) < INDENT_TOL:
|
if abs(x - min_x0) < INDENT_TOL:
|
||||||
x0s.append([min_x0]) # 缩进视为同一列
|
x0s.append([min_x0])
|
||||||
else:
|
else:
|
||||||
x0s.append([x])
|
x0s.append([x])
|
||||||
x0s = np.array(x0s, dtype=float)
|
x0s = np.array(x0s, dtype=float)
|
||||||
|
|
@ -437,13 +437,6 @@ class RAGFlowPdfParser:
|
||||||
for b in bxs:
|
for b in bxs:
|
||||||
grouped[b["col_id"]].append(b)
|
grouped[b["col_id"]].append(b)
|
||||||
|
|
||||||
logging.info(f"\n======= Page {pg} column grouping =======")
|
|
||||||
for col_id, group in grouped.items():
|
|
||||||
logging.info(f"Column {col_id}: {len(group)} boxes")
|
|
||||||
for item in group:
|
|
||||||
logging.info(f" -> box: x0={item['x0']}, x1={item['x1']}, y0={item['top']}, y1={item['bottom']}")
|
|
||||||
logging.info("======================================\n")
|
|
||||||
|
|
||||||
return boxes
|
return boxes
|
||||||
|
|
||||||
def _text_merge(self, zoomin=3):
|
def _text_merge(self, zoomin=3):
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue