From 98150e80b83a3fb04fb9039f80aaed23b28f8587 Mon Sep 17 00:00:00 2001 From: yangdx Date: Sat, 5 Jul 2025 23:16:39 +0800 Subject: [PATCH] Improved empty/whitespace file handling - Better detection of whitespace-only files - Changed error to warning for empty chunks --- lightrag/api/routers/document_routes.py | 6 +++--- lightrag/lightrag.py | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/lightrag/api/routers/document_routes.py b/lightrag/api/routers/document_routes.py index 80e1ba56..4f22947c 100644 --- a/lightrag/api/routers/document_routes.py +++ b/lightrag/api/routers/document_routes.py @@ -714,9 +714,9 @@ async def pipeline_enqueue_file(rag: LightRAG, file_path: Path) -> bool: # Insert into the RAG queue if content: - - if content == "\n": - logger.info(f"File appears to be empty. file_paths={file_path.name}") + # Check if content contains only whitespace characters + if not content.strip(): + logger.warning(f"File contains only whitespace characters. file_paths={file_path.name}") await rag.apipeline_enqueue_documents(content, file_paths=file_path.name) logger.info(f"Successfully fetched and enqueued file: {file_path.name}") diff --git a/lightrag/lightrag.py b/lightrag/lightrag.py index 34ee15e6..47e484f0 100644 --- a/lightrag/lightrag.py +++ b/lightrag/lightrag.py @@ -966,7 +966,7 @@ class LightRAG: } if not chunks: - logger.error("No document chunks to process") + logger.warning("No document chunks to process") # Process document in two stages # Stage 1: Process text chunks and docs (parallel execution)