From 186c8f0e16fa08f239527d982a8d68d21482defe Mon Sep 17 00:00:00 2001 From: yangdx Date: Wed, 19 Nov 2025 02:03:10 +0800 Subject: [PATCH] Preserve blank paragraphs in DOCX extraction to maintain spacing MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit • Remove text emptiness check • Always append paragraph text • Maintain document formatting • Preserve original spacing --- lightrag/api/routers/document_routes.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lightrag/api/routers/document_routes.py b/lightrag/api/routers/document_routes.py index 15bf2508..1726e197 100644 --- a/lightrag/api/routers/document_routes.py +++ b/lightrag/api/routers/document_routes.py @@ -1006,8 +1006,8 @@ def _extract_docx(file_bytes: bytes) -> str: paragraph = Paragraph(element, doc) text = paragraph.text.strip() - if text: - content_parts.append(text) + # Always append to preserve document spacing (including blank paragraphs) + content_parts.append(text) # Check if element is a table elif element.tag.endswith("tbl"):