From 8835fc244a90017b8fc98f60017ae4e78e975c0b Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 24 Nov 2025 03:43:05 +0000 Subject: [PATCH] Improve edge case handling for max_tokens=1 Co-authored-by: netbrah <162479981+netbrah@users.noreply.github.com> --- lightrag/rerank.py | 4 +++- tests/test_overlap_validation.py | 13 +++++++++++++ 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/lightrag/rerank.py b/lightrag/rerank.py index 1b5d7612..81632b71 100644 --- a/lightrag/rerank.py +++ b/lightrag/rerank.py @@ -43,7 +43,9 @@ def chunk_documents_for_rerank( # If overlap_tokens >= max_tokens, the chunking loop would hang if overlap_tokens >= max_tokens: original_overlap = overlap_tokens - overlap_tokens = max(1, max_tokens - 1) + # Ensure overlap is at least 1 token less than max to guarantee progress + # For very small max_tokens (e.g., 1), set overlap to 0 + overlap_tokens = max(0, max_tokens - 1) logger.warning( f"overlap_tokens ({original_overlap}) must be less than max_tokens ({max_tokens}). " f"Clamping to {overlap_tokens} to prevent infinite loop." diff --git a/tests/test_overlap_validation.py b/tests/test_overlap_validation.py index da364719..7f84a3cf 100644 --- a/tests/test_overlap_validation.py +++ b/tests/test_overlap_validation.py @@ -98,3 +98,16 @@ class TestOverlapValidation: assert "short doc" in chunked_docs # Verify doc_indices maps correctly assert doc_indices[-1] == 1 # Last chunk is from second document + + def test_edge_case_max_tokens_one(self): + """Test edge case where max_tokens=1""" + documents = [" ".join([f"word{i}" for i in range(20)])] + + # max_tokens=1, overlap_tokens=5 should clamp to 0 + chunked_docs, doc_indices = chunk_documents_for_rerank( + documents, max_tokens=1, overlap_tokens=5 + ) + + # Should complete without hanging + assert len(chunked_docs) > 0 + assert all(idx == 0 for idx in doc_indices)