cherry-pick 4438ba41

2025-12-04 19:15:04 +08:00 · 2025-12-04 19:15:04 +08:00 · 5a9677396b
commit 5a9677396b
parent d3d59b0dca
1 changed files with 5 additions and 6 deletions
--- a/lightrag/api/routers/document_routes.py
+++ b/lightrag/api/routers/document_routes.py
@ -1006,8 +1006,8 @@ def _extract_docx(file_bytes: bytes) -> str:
            paragraph = Paragraph(element, doc)
            text = paragraph.text.strip()
-            # Always append to preserve document spacing (including blank paragraphs)
+            if text:
-            content_parts.append(text)
+                content_parts.append(text)
        # Check if element is a table
        elif element.tag.endswith("tbl"):
@ -1021,10 +1021,9 @@ def _extract_docx(file_bytes: bytes) -> str:
                row_text = []
                for cell in row.cells:
                    cell_text = cell.text.strip()
-                    # Always append cell text to preserve column structure
+                    if cell_text:
-                    row_text.append(cell_text)
+                        row_text.append(cell_text)
-                # Only add row if at least one cell has content
+                if row_text:
                if any(cell for cell in row_text):
                    content_parts.append("\t".join(row_text))
    return "\n".join(content_parts)