Preserve blank paragraphs in DOCX extraction to maintain spacing
• Remove text emptiness check • Always append paragraph text • Maintain document formatting • Preserve original spacing
This commit is contained in:
parent
fa887d811b
commit
186c8f0e16
1 changed files with 2 additions and 2 deletions
|
|
@ -1006,8 +1006,8 @@ def _extract_docx(file_bytes: bytes) -> str:
|
||||||
|
|
||||||
paragraph = Paragraph(element, doc)
|
paragraph = Paragraph(element, doc)
|
||||||
text = paragraph.text.strip()
|
text = paragraph.text.strip()
|
||||||
if text:
|
# Always append to preserve document spacing (including blank paragraphs)
|
||||||
content_parts.append(text)
|
content_parts.append(text)
|
||||||
|
|
||||||
# Check if element is a table
|
# Check if element is a table
|
||||||
elif element.tag.endswith("tbl"):
|
elif element.tag.endswith("tbl"):
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue