From 49cb51b5dc2cfc5b63ffc86155de688f31e395c1 Mon Sep 17 00:00:00 2001 From: xuewei <728857235@qq.com> Date: Sat, 5 Jul 2025 13:47:47 +0800 Subject: [PATCH 1/4] =?UTF-8?q?PDF=E6=96=87=E4=BB=B6=E8=A7=A3=E6=9E=90?= =?UTF-8?q?=E4=B8=8D=E5=88=B0=E5=86=85=E5=AE=B9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- lightrag/api/routers/document_routes.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/lightrag/api/routers/document_routes.py b/lightrag/api/routers/document_routes.py index c25f7241..80e1ba56 100644 --- a/lightrag/api/routers/document_routes.py +++ b/lightrag/api/routers/document_routes.py @@ -714,6 +714,10 @@ async def pipeline_enqueue_file(rag: LightRAG, file_path: Path) -> bool: # Insert into the RAG queue if content: + + if content == "\n": + logger.info(f"File appears to be empty. file_paths={file_path.name}") + await rag.apipeline_enqueue_documents(content, file_paths=file_path.name) logger.info(f"Successfully fetched and enqueued file: {file_path.name}") return True From 648a87653f616cb321a441e2c98805c48cd11b46 Mon Sep 17 00:00:00 2001 From: xuewei <728857235@qq.com> Date: Sat, 5 Jul 2025 14:28:42 +0800 Subject: [PATCH 2/4] =?UTF-8?q?=E6=96=87=E6=9C=AC=E5=9D=97=E6=98=AF?= =?UTF-8?q?=E7=A9=BA=E7=99=BD?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- lightrag/lightrag.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/lightrag/lightrag.py b/lightrag/lightrag.py index 2ab9f89a..34ee15e6 100644 --- a/lightrag/lightrag.py +++ b/lightrag/lightrag.py @@ -965,6 +965,9 @@ class LightRAG: ) } + if not chunks: + logger.error("No document chunks to process") + # Process document in two stages # Stage 1: Process text chunks and docs (parallel execution) doc_status_task = asyncio.create_task( From bd50827ffcbbce05fc8649e3611e5bad6641b23a Mon Sep 17 00:00:00 2001 From: yangdx Date: Sat, 5 Jul 2025 23:00:22 +0800 Subject: [PATCH 3/4] Update `pyproject.toml` to specify Python 3.10 as the minimum required version. --- pyproject.toml | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index b87df3bc..2bb1f288 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -11,7 +11,7 @@ authors = [ description = "LightRAG: Simple and Fast Retrieval-Augmented Generation" readme = "README.md" license = {text = "MIT"} -requires-python = ">=3.9" +requires-python = ">=3.10" classifiers = [ "Development Status :: 4 - Beta", "Programming Language :: Python :: 3", @@ -91,3 +91,6 @@ version = {attr = "lightrag.__version__"} [tool.setuptools.package-data] lightrag = ["api/webui/**/*"] + +[tool.ruff] +target-version = "py310" From 98150e80b83a3fb04fb9039f80aaed23b28f8587 Mon Sep 17 00:00:00 2001 From: yangdx Date: Sat, 5 Jul 2025 23:16:39 +0800 Subject: [PATCH 4/4] Improved empty/whitespace file handling - Better detection of whitespace-only files - Changed error to warning for empty chunks --- lightrag/api/routers/document_routes.py | 6 +++--- lightrag/lightrag.py | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/lightrag/api/routers/document_routes.py b/lightrag/api/routers/document_routes.py index 80e1ba56..4f22947c 100644 --- a/lightrag/api/routers/document_routes.py +++ b/lightrag/api/routers/document_routes.py @@ -714,9 +714,9 @@ async def pipeline_enqueue_file(rag: LightRAG, file_path: Path) -> bool: # Insert into the RAG queue if content: - - if content == "\n": - logger.info(f"File appears to be empty. file_paths={file_path.name}") + # Check if content contains only whitespace characters + if not content.strip(): + logger.warning(f"File contains only whitespace characters. file_paths={file_path.name}") await rag.apipeline_enqueue_documents(content, file_paths=file_path.name) logger.info(f"Successfully fetched and enqueued file: {file_path.name}") diff --git a/lightrag/lightrag.py b/lightrag/lightrag.py index 34ee15e6..47e484f0 100644 --- a/lightrag/lightrag.py +++ b/lightrag/lightrag.py @@ -966,7 +966,7 @@ class LightRAG: } if not chunks: - logger.error("No document chunks to process") + logger.warning("No document chunks to process") # Process document in two stages # Stage 1: Process text chunks and docs (parallel execution)