Preserve blank paragraphs in DOCX extraction to maintain spacing

• Remove text emptiness check
• Always append paragraph text
• Maintain document formatting
• Preserve original spacing
This commit is contained in:
yangdx 2025-11-19 02:03:10 +08:00
parent fa887d811b
commit 186c8f0e16

View file

@ -1006,8 +1006,8 @@ def _extract_docx(file_bytes: bytes) -> str:
paragraph = Paragraph(element, doc)
text = paragraph.text.strip()
if text:
content_parts.append(text)
# Always append to preserve document spacing (including blank paragraphs)
content_parts.append(text)
# Check if element is a table
elif element.tag.endswith("tbl"):