cherry-pick fa887d81
This commit is contained in:
parent
7e53eaabee
commit
4501740849
1 changed files with 4 additions and 4 deletions
|
|
@ -1005,9 +1005,9 @@ def _extract_docx(file_bytes: bytes) -> str:
|
||||||
in_table = False
|
in_table = False
|
||||||
|
|
||||||
paragraph = Paragraph(element, doc)
|
paragraph = Paragraph(element, doc)
|
||||||
text = paragraph.text
|
text = paragraph.text.strip()
|
||||||
# Always append to preserve document spacing (including blank paragraphs)
|
if text:
|
||||||
content_parts.append(text)
|
content_parts.append(text)
|
||||||
|
|
||||||
# Check if element is a table
|
# Check if element is a table
|
||||||
elif element.tag.endswith("tbl"):
|
elif element.tag.endswith("tbl"):
|
||||||
|
|
@ -1020,7 +1020,7 @@ def _extract_docx(file_bytes: bytes) -> str:
|
||||||
for row in table.rows:
|
for row in table.rows:
|
||||||
row_text = []
|
row_text = []
|
||||||
for cell in row.cells:
|
for cell in row.cells:
|
||||||
cell_text = cell.text
|
cell_text = cell.text.strip()
|
||||||
# Always append cell text to preserve column structure
|
# Always append cell text to preserve column structure
|
||||||
row_text.append(cell_text)
|
row_text.append(cell_text)
|
||||||
# Only add row if at least one cell has content
|
# Only add row if at least one cell has content
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue