Improved empty/whitespace file handling
- Better detection of whitespace-only files - Changed error to warning for empty chunks
This commit is contained in:
parent
bd50827ffc
commit
98150e80b8
2 changed files with 4 additions and 4 deletions
|
|
@ -714,9 +714,9 @@ async def pipeline_enqueue_file(rag: LightRAG, file_path: Path) -> bool:
|
||||||
|
|
||||||
# Insert into the RAG queue
|
# Insert into the RAG queue
|
||||||
if content:
|
if content:
|
||||||
|
# Check if content contains only whitespace characters
|
||||||
if content == "\n":
|
if not content.strip():
|
||||||
logger.info(f"File appears to be empty. file_paths={file_path.name}")
|
logger.warning(f"File contains only whitespace characters. file_paths={file_path.name}")
|
||||||
|
|
||||||
await rag.apipeline_enqueue_documents(content, file_paths=file_path.name)
|
await rag.apipeline_enqueue_documents(content, file_paths=file_path.name)
|
||||||
logger.info(f"Successfully fetched and enqueued file: {file_path.name}")
|
logger.info(f"Successfully fetched and enqueued file: {file_path.name}")
|
||||||
|
|
|
||||||
|
|
@ -966,7 +966,7 @@ class LightRAG:
|
||||||
}
|
}
|
||||||
|
|
||||||
if not chunks:
|
if not chunks:
|
||||||
logger.error("No document chunks to process")
|
logger.warning("No document chunks to process")
|
||||||
|
|
||||||
# Process document in two stages
|
# Process document in two stages
|
||||||
# Stage 1: Process text chunks and docs (parallel execution)
|
# Stage 1: Process text chunks and docs (parallel execution)
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue