From 87de2b3e9e5d455a84bb93b2492ce4673d3faed9 Mon Sep 17 00:00:00 2001 From: yangdx Date: Wed, 19 Nov 2025 04:26:41 +0800 Subject: [PATCH] Update XLSX extraction documentation to reflect current implementation --- lightrag/api/routers/document_routes.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lightrag/api/routers/document_routes.py b/lightrag/api/routers/document_routes.py index 5775c4da..8839811c 100644 --- a/lightrag/api/routers/document_routes.py +++ b/lightrag/api/routers/document_routes.py @@ -1063,7 +1063,7 @@ def _extract_xlsx(file_bytes: bytes) -> str: - Special characters (tabs, newlines, backslashes) are escaped to prevent structure corruption - Column alignment is preserved across all rows to maintain tabular structure - Empty rows are preserved as blank lines to maintain row structure - - Two-pass processing: determines max column width, then extracts with consistent alignment + - Uses sheet.max_column to determine column width efficiently Args: file_bytes: XLSX file content as bytes