Improved empty/whitespace file handling

- Better detection of whitespace-only files
- Changed error to warning for empty chunks
This commit is contained in:
yangdx 2025-07-05 23:16:39 +08:00
parent bd50827ffc
commit 98150e80b8
2 changed files with 4 additions and 4 deletions

View file

@ -714,9 +714,9 @@ async def pipeline_enqueue_file(rag: LightRAG, file_path: Path) -> bool:
# Insert into the RAG queue
if content:
if content == "\n":
logger.info(f"File appears to be empty. file_paths={file_path.name}")
# Check if content contains only whitespace characters
if not content.strip():
logger.warning(f"File contains only whitespace characters. file_paths={file_path.name}")
await rag.apipeline_enqueue_documents(content, file_paths=file_path.name)
logger.info(f"Successfully fetched and enqueued file: {file_path.name}")

View file

@ -966,7 +966,7 @@ class LightRAG:
}
if not chunks:
logger.error("No document chunks to process")
logger.warning("No document chunks to process")
# Process document in two stages
# Stage 1: Process text chunks and docs (parallel execution)