Improve edge case handling for max_tokens=1

Co-authored-by: netbrah <162479981+netbrah@users.noreply.github.com>
This commit is contained in:
copilot-swe-agent[bot] 2025-11-24 03:43:05 +00:00
parent 1d6ea0c5f7
commit 8835fc244a
2 changed files with 16 additions and 1 deletions

View file

@ -43,7 +43,9 @@ def chunk_documents_for_rerank(
# If overlap_tokens >= max_tokens, the chunking loop would hang
if overlap_tokens >= max_tokens:
original_overlap = overlap_tokens
overlap_tokens = max(1, max_tokens - 1)
# Ensure overlap is at least 1 token less than max to guarantee progress
# For very small max_tokens (e.g., 1), set overlap to 0
overlap_tokens = max(0, max_tokens - 1)
logger.warning(
f"overlap_tokens ({original_overlap}) must be less than max_tokens ({max_tokens}). "
f"Clamping to {overlap_tokens} to prevent infinite loop."

View file

@ -98,3 +98,16 @@ class TestOverlapValidation:
assert "short doc" in chunked_docs
# Verify doc_indices maps correctly
assert doc_indices[-1] == 1 # Last chunk is from second document
def test_edge_case_max_tokens_one(self):
"""Test edge case where max_tokens=1"""
documents = [" ".join([f"word{i}" for i in range(20)])]
# max_tokens=1, overlap_tokens=5 should clamp to 0
chunked_docs, doc_indices = chunk_documents_for_rerank(
documents, max_tokens=1, overlap_tokens=5
)
# Should complete without hanging
assert len(chunked_docs) > 0
assert all(idx == 0 for idx in doc_indices)