cherry-pick 4438ba41
This commit is contained in:
parent
d3d59b0dca
commit
5a9677396b
1 changed files with 5 additions and 6 deletions
|
|
@ -1006,8 +1006,8 @@ def _extract_docx(file_bytes: bytes) -> str:
|
|||
|
||||
paragraph = Paragraph(element, doc)
|
||||
text = paragraph.text.strip()
|
||||
# Always append to preserve document spacing (including blank paragraphs)
|
||||
content_parts.append(text)
|
||||
if text:
|
||||
content_parts.append(text)
|
||||
|
||||
# Check if element is a table
|
||||
elif element.tag.endswith("tbl"):
|
||||
|
|
@ -1021,10 +1021,9 @@ def _extract_docx(file_bytes: bytes) -> str:
|
|||
row_text = []
|
||||
for cell in row.cells:
|
||||
cell_text = cell.text.strip()
|
||||
# Always append cell text to preserve column structure
|
||||
row_text.append(cell_text)
|
||||
# Only add row if at least one cell has content
|
||||
if any(cell for cell in row_text):
|
||||
if cell_text:
|
||||
row_text.append(cell_text)
|
||||
if row_text:
|
||||
content_parts.append("\t".join(row_text))
|
||||
|
||||
return "\n".join(content_parts)
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue