From 41bdf792aea4a69ad29fbe40d41851728a89efb7 Mon Sep 17 00:00:00 2001 From: buua436 Date: Thu, 20 Nov 2025 17:47:41 +0800 Subject: [PATCH] update --- deepdoc/parser/pdf_parser.py | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/deepdoc/parser/pdf_parser.py b/deepdoc/parser/pdf_parser.py index 95ab40f97..6d8431c82 100644 --- a/deepdoc/parser/pdf_parser.py +++ b/deepdoc/parser/pdf_parser.py @@ -379,7 +379,7 @@ class RAGFlowPdfParser: x0s = [] for x in x0s_raw: if abs(x - min_x0) < INDENT_TOL: - x0s.append([min_x0]) # 缩进视为同一列 + x0s.append([min_x0]) else: x0s.append([x]) x0s = np.array(x0s, dtype=float) @@ -437,13 +437,6 @@ class RAGFlowPdfParser: for b in bxs: grouped[b["col_id"]].append(b) - logging.info(f"\n======= Page {pg} column grouping =======") - for col_id, group in grouped.items(): - logging.info(f"Column {col_id}: {len(group)} boxes") - for item in group: - logging.info(f" -> box: x0={item['x0']}, x1={item['x1']}, y0={item['top']}, y1={item['bottom']}") - logging.info("======================================\n") - return boxes def _text_merge(self, zoomin=3):