cherry-pick 2b160163
This commit is contained in:
parent
2a247bdda1
commit
a3fb244631
1 changed files with 13 additions and 3 deletions
|
|
@ -1133,10 +1133,20 @@ def _extract_xlsx(file_bytes: bytes) -> str:
|
||||||
safe_title = escape_sheet_title(sheet.title)
|
safe_title = escape_sheet_title(sheet.title)
|
||||||
content_parts.append(f"{sheet_separator} Sheet: {safe_title} {sheet_separator}")
|
content_parts.append(f"{sheet_separator} Sheet: {safe_title} {sheet_separator}")
|
||||||
|
|
||||||
# Use sheet.max_column to get the maximum column width directly
|
# Two-pass approach to preserve column alignment without storing rows in memory:
|
||||||
max_columns = sheet.max_column if sheet.max_column else 0
|
# Pass 1: Scan to determine the maximum column width (memory-efficient)
|
||||||
|
max_columns = 0
|
||||||
|
for row in sheet.iter_rows(values_only=True):
|
||||||
|
last_nonempty_idx = -1
|
||||||
|
for idx, cell in enumerate(row):
|
||||||
|
# Check if cell has meaningful content (not None or empty string)
|
||||||
|
if cell is not None and str(cell).strip():
|
||||||
|
last_nonempty_idx = idx
|
||||||
|
|
||||||
# Extract rows with consistent width to preserve column alignment
|
if last_nonempty_idx >= 0:
|
||||||
|
max_columns = max(max_columns, last_nonempty_idx + 1)
|
||||||
|
|
||||||
|
# Pass 2: Extract rows with consistent width to preserve column alignment
|
||||||
for row in sheet.iter_rows(values_only=True):
|
for row in sheet.iter_rows(values_only=True):
|
||||||
row_parts = []
|
row_parts = []
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue