diff --git a/lightrag/rerank.py b/lightrag/rerank.py index 1b5d7612..81632b71 100644 --- a/lightrag/rerank.py +++ b/lightrag/rerank.py @@ -43,7 +43,9 @@ def chunk_documents_for_rerank( # If overlap_tokens >= max_tokens, the chunking loop would hang if overlap_tokens >= max_tokens: original_overlap = overlap_tokens - overlap_tokens = max(1, max_tokens - 1) + # Ensure overlap is at least 1 token less than max to guarantee progress + # For very small max_tokens (e.g., 1), set overlap to 0 + overlap_tokens = max(0, max_tokens - 1) logger.warning( f"overlap_tokens ({original_overlap}) must be less than max_tokens ({max_tokens}). " f"Clamping to {overlap_tokens} to prevent infinite loop." diff --git a/tests/test_overlap_validation.py b/tests/test_overlap_validation.py index da364719..7f84a3cf 100644 --- a/tests/test_overlap_validation.py +++ b/tests/test_overlap_validation.py @@ -98,3 +98,16 @@ class TestOverlapValidation: assert "short doc" in chunked_docs # Verify doc_indices maps correctly assert doc_indices[-1] == 1 # Last chunk is from second document + + def test_edge_case_max_tokens_one(self): + """Test edge case where max_tokens=1""" + documents = [" ".join([f"word{i}" for i in range(20)])] + + # max_tokens=1, overlap_tokens=5 should clamp to 0 + chunked_docs, doc_indices = chunk_documents_for_rerank( + documents, max_tokens=1, overlap_tokens=5 + ) + + # Should complete without hanging + assert len(chunked_docs) > 0 + assert all(idx == 0 for idx in doc_indices)