Improved empty/whitespace file handling

- Better detection of whitespace-only files
- Changed error to warning for empty chunks
This commit is contained in:
yangdx 2025-07-05 23:16:39 +08:00
parent bd50827ffc
commit 98150e80b8
2 changed files with 4 additions and 4 deletions

View file

@ -714,9 +714,9 @@ async def pipeline_enqueue_file(rag: LightRAG, file_path: Path) -> bool:
# Insert into the RAG queue # Insert into the RAG queue
if content: if content:
# Check if content contains only whitespace characters
if content == "\n": if not content.strip():
logger.info(f"File appears to be empty. file_paths={file_path.name}") logger.warning(f"File contains only whitespace characters. file_paths={file_path.name}")
await rag.apipeline_enqueue_documents(content, file_paths=file_path.name) await rag.apipeline_enqueue_documents(content, file_paths=file_path.name)
logger.info(f"Successfully fetched and enqueued file: {file_path.name}") logger.info(f"Successfully fetched and enqueued file: {file_path.name}")

View file

@ -966,7 +966,7 @@ class LightRAG:
} }
if not chunks: if not chunks:
logger.error("No document chunks to process") logger.warning("No document chunks to process")
# Process document in two stages # Process document in two stages
# Stage 1: Process text chunks and docs (parallel execution) # Stage 1: Process text chunks and docs (parallel execution)