diff --git a/lightrag/exceptions.py b/lightrag/exceptions.py
index e6a616cd..709f294d 100644
--- a/lightrag/exceptions.py
+++ b/lightrag/exceptions.py
@@ -106,6 +106,28 @@ class PipelineCancelledException(Exception):
         self.message = message
 
 
+class ChunkTokenLimitExceededError(ValueError):
+    """Raised when a chunk exceeds the configured token limit."""
+
+    def __init__(
+        self,
+        chunk_tokens: int,
+        chunk_token_limit: int,
+        chunk_preview: str | None = None,
+    ) -> None:
+        preview = chunk_preview.strip() if chunk_preview else None
+        truncated_preview = preview[:80] if preview else None
+        preview_note = f" Preview: '{truncated_preview}'" if truncated_preview else ""
+        message = (
+            f"Chunk token length {chunk_tokens} exceeds chunk_token_size {chunk_token_limit}."
+            f"{preview_note}"
+        )
+        super().__init__(message)
+        self.chunk_tokens = chunk_tokens
+        self.chunk_token_limit = chunk_token_limit
+        self.chunk_preview = truncated_preview
+
+
 class QdrantMigrationError(Exception):
     """Raised when Qdrant data migration from legacy collections fails."""
 
diff --git a/lightrag/operate.py b/lightrag/operate.py
index f226566b..5f824af0 100644
--- a/lightrag/operate.py
+++ b/lightrag/operate.py
@@ -8,7 +8,10 @@ import json_repair
 from typing import Any, AsyncIterator, overload, Literal
 from collections import Counter, defaultdict
 
-from lightrag.exceptions import PipelineCancelledException
+from lightrag.exceptions import (
+    PipelineCancelledException,
+    ChunkTokenLimitExceededError,
+)
 from lightrag.utils import (
     logger,
     compute_mdhash_id,
@@ -109,6 +112,17 @@ def chunking_by_token_size(
         if split_by_character_only:
             for chunk in raw_chunks:
                 _tokens = tokenizer.encode(chunk)
+                if len(_tokens) > chunk_token_size:
+                    logger.warning(
+                        "Chunk split_by_character exceeds token limit: len=%d limit=%d",
+                        len(_tokens),
+                        chunk_token_size,
+                    )
+                    raise ChunkTokenLimitExceededError(
+                        chunk_tokens=len(_tokens),
+                        chunk_token_limit=chunk_token_size,
+                        chunk_preview=chunk[:120],
+                    )
                 new_chunks.append((len(_tokens), chunk))
         else:
             for chunk in raw_chunks:
diff --git a/tests/test_chunking.py b/tests/test_chunking.py
new file mode 100644
index 00000000..1895a36d
--- /dev/null
+++ b/tests/test_chunking.py
@@ -0,0 +1,1066 @@
+import pytest
+
+from lightrag.exceptions import ChunkTokenLimitExceededError
+from lightrag.operate import chunking_by_token_size
+from lightrag.utils import Tokenizer, TokenizerInterface
+
+
+class DummyTokenizer(TokenizerInterface):
+    """Simple 1:1 character-to-token mapping."""
+
+    def encode(self, content: str):
+        return [ord(ch) for ch in content]
+
+    def decode(self, tokens):
+        return "".join(chr(token) for token in tokens)
+
+
+class MultiTokenCharacterTokenizer(TokenizerInterface):
+    """
+    Tokenizer where character-to-token ratio is non-uniform.
+    This helps catch bugs where code incorrectly counts characters instead of tokens.
+
+    Mapping:
+    - Uppercase letters: 2 tokens each
+    - Punctuation (!, ?, .): 3 tokens each
+    - Other characters: 1 token each
+    """
+
+    def encode(self, content: str):
+        tokens = []
+        for ch in content:
+            if ch.isupper():  # Uppercase = 2 tokens
+                tokens.extend([ord(ch), ord(ch) + 1000])
+            elif ch in ["!", "?", "."]:  # Punctuation = 3 tokens
+                tokens.extend([ord(ch), ord(ch) + 2000, ord(ch) + 3000])
+            else:  # Regular chars = 1 token
+                tokens.append(ord(ch))
+        return tokens
+
+    def decode(self, tokens):
+        # Simplified decode for testing
+        result = []
+        i = 0
+        while i < len(tokens):
+            base_token = tokens[i]
+            # Check if this is part of a multi-token sequence
+            if (
+                i + 2 < len(tokens)
+                and tokens[i + 1] == base_token + 2000
+                and tokens[i + 2] == base_token + 3000
+            ):
+                # 3-token punctuation
+                result.append(chr(base_token))
+                i += 3
+            elif i + 1 < len(tokens) and tokens[i + 1] == base_token + 1000:
+                # 2-token uppercase
+                result.append(chr(base_token))
+                i += 2
+            else:
+                # Single token
+                result.append(chr(base_token))
+                i += 1
+        return "".join(result)
+
+
+def make_tokenizer() -> Tokenizer:
+    return Tokenizer(model_name="dummy", tokenizer=DummyTokenizer())
+
+
+def make_multi_token_tokenizer() -> Tokenizer:
+    return Tokenizer(model_name="multi", tokenizer=MultiTokenCharacterTokenizer())
+
+
+# ============================================================================
+# Tests for split_by_character_only=True (raises error on oversized chunks)
+# ============================================================================
+
+
+@pytest.mark.offline
+def test_split_by_character_only_within_limit():
+    """Test chunking when all chunks are within token limit."""
+    tokenizer = make_tokenizer()
+
+    chunks = chunking_by_token_size(
+        tokenizer,
+        "alpha\n\nbeta",
+        split_by_character="\n\n",
+        split_by_character_only=True,
+        chunk_token_size=10,
+    )
+
+    assert [chunk["content"] for chunk in chunks] == ["alpha", "beta"]
+
+
+@pytest.mark.offline
+def test_split_by_character_only_exceeding_limit_raises():
+    """Test that oversized chunks raise ChunkTokenLimitExceededError."""
+    tokenizer = make_tokenizer()
+    oversized = "a" * 12
+
+    with pytest.raises(ChunkTokenLimitExceededError) as excinfo:
+        chunking_by_token_size(
+            tokenizer,
+            oversized,
+            split_by_character="\n\n",
+            split_by_character_only=True,
+            chunk_token_size=5,
+        )
+
+    err = excinfo.value
+    assert err.chunk_tokens == len(oversized)
+    assert err.chunk_token_limit == 5
+
+
+@pytest.mark.offline
+def test_chunk_error_includes_preview():
+    """Test that error message includes chunk preview."""
+    tokenizer = make_tokenizer()
+    oversized = "x" * 100
+
+    with pytest.raises(ChunkTokenLimitExceededError) as excinfo:
+        chunking_by_token_size(
+            tokenizer,
+            oversized,
+            split_by_character="\n\n",
+            split_by_character_only=True,
+            chunk_token_size=10,
+        )
+
+    err = excinfo.value
+    # Preview should be first 80 chars of a 100-char string
+    assert err.chunk_preview == "x" * 80
+    assert "Preview:" in str(err)
+
+
+@pytest.mark.offline
+def test_split_by_character_only_at_exact_limit():
+    """Test chunking when chunk is exactly at token limit."""
+    tokenizer = make_tokenizer()
+    exact_size = "a" * 10
+
+    chunks = chunking_by_token_size(
+        tokenizer,
+        exact_size,
+        split_by_character="\n\n",
+        split_by_character_only=True,
+        chunk_token_size=10,
+    )
+
+    assert len(chunks) == 1
+    assert chunks[0]["content"] == exact_size
+    assert chunks[0]["tokens"] == 10
+
+
+@pytest.mark.offline
+def test_split_by_character_only_one_over_limit():
+    """Test that chunk with one token over limit raises error."""
+    tokenizer = make_tokenizer()
+    one_over = "a" * 11
+
+    with pytest.raises(ChunkTokenLimitExceededError) as excinfo:
+        chunking_by_token_size(
+            tokenizer,
+            one_over,
+            split_by_character="\n\n",
+            split_by_character_only=True,
+            chunk_token_size=10,
+        )
+
+    err = excinfo.value
+    assert err.chunk_tokens == 11
+    assert err.chunk_token_limit == 10
+
+
+# ============================================================================
+# Tests for split_by_character_only=False (recursive splitting)
+# ============================================================================
+
+
+@pytest.mark.offline
+def test_split_recursive_oversized_chunk():
+    """Test recursive splitting of oversized chunk with split_by_character_only=False."""
+    tokenizer = make_tokenizer()
+    # 30 chars - should split into chunks of size 10
+    oversized = "a" * 30
+
+    chunks = chunking_by_token_size(
+        tokenizer,
+        oversized,
+        split_by_character="\n\n",
+        split_by_character_only=False,
+        chunk_token_size=10,
+        chunk_overlap_token_size=0,
+    )
+
+    # Should create 3 chunks of 10 tokens each
+    assert len(chunks) == 3
+    assert all(chunk["tokens"] == 10 for chunk in chunks)
+    assert all(chunk["content"] == "a" * 10 for chunk in chunks)
+
+
+@pytest.mark.offline
+def test_split_with_chunk_overlap():
+    """
+    Test chunk splitting with overlap using distinctive content.
+
+    With distinctive characters, we can verify overlap positions are exact.
+    Misaligned overlap would produce wrong content and fail the test.
+    """
+    tokenizer = make_tokenizer()
+    # Each character is unique - enables exact position verification
+    content = "0123456789abcdefghijklmno"  # 25 chars
+
+    chunks = chunking_by_token_size(
+        tokenizer,
+        content,
+        split_by_character="\n\n",
+        split_by_character_only=False,
+        chunk_token_size=10,
+        chunk_overlap_token_size=3,
+    )
+
+    # With overlap=3, step size = chunk_size - overlap = 10 - 3 = 7
+    # Chunks start at positions: 0, 7, 14, 21
+    assert len(chunks) == 4
+
+    # Verify exact content and token counts
+    assert chunks[0]["tokens"] == 10
+    assert chunks[0]["content"] == "0123456789"  # [0:10]
+
+    assert chunks[1]["tokens"] == 10
+    assert chunks[1]["content"] == "789abcdefg"  # [7:17] - overlaps with "789"
+
+    assert chunks[2]["tokens"] == 10
+    assert chunks[2]["content"] == "efghijklmn"  # [14:24] - overlaps with "efg"
+
+    assert chunks[3]["tokens"] == 4
+    assert chunks[3]["content"] == "lmno"  # [21:25] - overlaps with "lmn"
+
+
+@pytest.mark.offline
+def test_split_multiple_chunks_with_mixed_sizes():
+    """Test splitting text with multiple chunks of different sizes."""
+    tokenizer = make_tokenizer()
+    # "small\n\nlarge_chunk_here\n\nmedium"
+    # small: 5 tokens, large_chunk_here: 16 tokens, medium: 6 tokens
+    content = "small\n\n" + "a" * 16 + "\n\nmedium"
+
+    chunks = chunking_by_token_size(
+        tokenizer,
+        content,
+        split_by_character="\n\n",
+        split_by_character_only=False,
+        chunk_token_size=10,
+        chunk_overlap_token_size=2,
+    )
+
+    # First chunk "small" should be kept as is (5 tokens)
+    # Second chunk (16 tokens) should be split into 2 chunks
+    # Third chunk "medium" should be kept as is (6 tokens)
+    assert len(chunks) == 4
+    assert chunks[0]["content"] == "small"
+    assert chunks[0]["tokens"] == 5
+
+
+@pytest.mark.offline
+def test_split_exact_boundary():
+    """Test splitting at exact chunk boundaries."""
+    tokenizer = make_tokenizer()
+    # Exactly 20 chars, should split into 2 chunks of 10
+    content = "a" * 20
+
+    chunks = chunking_by_token_size(
+        tokenizer,
+        content,
+        split_by_character="\n\n",
+        split_by_character_only=False,
+        chunk_token_size=10,
+        chunk_overlap_token_size=0,
+    )
+
+    assert len(chunks) == 2
+    assert chunks[0]["tokens"] == 10
+    assert chunks[1]["tokens"] == 10
+
+
+@pytest.mark.offline
+def test_split_very_large_text():
+    """Test splitting very large text into multiple chunks."""
+    tokenizer = make_tokenizer()
+    # 100 chars should create 10 chunks with chunk_size=10, overlap=0
+    content = "a" * 100
+
+    chunks = chunking_by_token_size(
+        tokenizer,
+        content,
+        split_by_character="\n\n",
+        split_by_character_only=False,
+        chunk_token_size=10,
+        chunk_overlap_token_size=0,
+    )
+
+    assert len(chunks) == 10
+    assert all(chunk["tokens"] == 10 for chunk in chunks)
+
+
+# ============================================================================
+# Edge Cases
+# ============================================================================
+
+
+@pytest.mark.offline
+def test_empty_content():
+    """Test chunking with empty content."""
+    tokenizer = make_tokenizer()
+
+    chunks = chunking_by_token_size(
+        tokenizer,
+        "",
+        split_by_character="\n\n",
+        split_by_character_only=True,
+        chunk_token_size=10,
+    )
+
+    assert len(chunks) == 1
+    assert chunks[0]["content"] == ""
+    assert chunks[0]["tokens"] == 0
+
+
+@pytest.mark.offline
+def test_single_character():
+    """Test chunking with single character."""
+    tokenizer = make_tokenizer()
+
+    chunks = chunking_by_token_size(
+        tokenizer,
+        "a",
+        split_by_character="\n\n",
+        split_by_character_only=True,
+        chunk_token_size=10,
+    )
+
+    assert len(chunks) == 1
+    assert chunks[0]["content"] == "a"
+    assert chunks[0]["tokens"] == 1
+
+
+@pytest.mark.offline
+def test_no_delimiter_in_content():
+    """Test chunking when content has no delimiter."""
+    tokenizer = make_tokenizer()
+    content = "a" * 30
+
+    chunks = chunking_by_token_size(
+        tokenizer,
+        content,
+        split_by_character="\n\n",  # Delimiter not in content
+        split_by_character_only=False,
+        chunk_token_size=10,
+        chunk_overlap_token_size=0,
+    )
+
+    # Should still split based on token size
+    assert len(chunks) == 3
+    assert all(chunk["tokens"] == 10 for chunk in chunks)
+
+
+@pytest.mark.offline
+def test_no_split_character():
+    """Test chunking without split_by_character (None)."""
+    tokenizer = make_tokenizer()
+    content = "a" * 30
+
+    chunks = chunking_by_token_size(
+        tokenizer,
+        content,
+        split_by_character=None,
+        split_by_character_only=False,
+        chunk_token_size=10,
+        chunk_overlap_token_size=0,
+    )
+
+    # Should split based purely on token size
+    assert len(chunks) == 3
+    assert all(chunk["tokens"] == 10 for chunk in chunks)
+
+
+# ============================================================================
+# Parameter Combinations
+# ============================================================================
+
+
+@pytest.mark.offline
+def test_different_delimiter_newline():
+    """Test with single newline delimiter."""
+    tokenizer = make_tokenizer()
+    content = "alpha\nbeta\ngamma"
+
+    chunks = chunking_by_token_size(
+        tokenizer,
+        content,
+        split_by_character="\n",
+        split_by_character_only=True,
+        chunk_token_size=10,
+    )
+
+    assert len(chunks) == 3
+    assert [c["content"] for c in chunks] == ["alpha", "beta", "gamma"]
+
+
+@pytest.mark.offline
+def test_delimiter_based_splitting_verification():
+    """
+    Verify that chunks are actually split at delimiter positions.
+
+    This test ensures split_by_character truly splits at the delimiter,
+    not at arbitrary positions.
+    """
+    tokenizer = make_tokenizer()
+
+    # Content with clear delimiter boundaries
+    content = "part1||part2||part3||part4"
+
+    chunks = chunking_by_token_size(
+        tokenizer,
+        content,
+        split_by_character="||",
+        split_by_character_only=True,
+        chunk_token_size=20,
+    )
+
+    # Should split exactly at || delimiters
+    assert len(chunks) == 4
+    assert chunks[0]["content"] == "part1"
+    assert chunks[1]["content"] == "part2"
+    assert chunks[2]["content"] == "part3"
+    assert chunks[3]["content"] == "part4"
+
+    # Verify delimiter is not included in chunks
+    for chunk in chunks:
+        assert "||" not in chunk["content"]
+
+
+@pytest.mark.offline
+def test_multi_character_delimiter_splitting():
+    """
+    Verify that multi-character delimiters are correctly recognized and not partially matched.
+
+    Tests various multi-character delimiter scenarios to ensure the entire delimiter
+    sequence is used for splitting, not individual characters.
+    """
+    tokenizer = make_tokenizer()
+
+    # Test 1: Multi-character delimiter that contains single chars also present elsewhere
+    content = "data<SEP>more<SEP>final"
+    chunks = chunking_by_token_size(
+        tokenizer,
+        content,
+        split_by_character="<SEP>",
+        split_by_character_only=True,
+        chunk_token_size=50,
+    )
+
+    assert len(chunks) == 3
+    assert chunks[0]["content"] == "data"
+    assert chunks[1]["content"] == "more"
+    assert chunks[2]["content"] == "final"
+    # Verify full delimiter is not in chunks, not just parts
+    for chunk in chunks:
+        assert "<SEP>" not in chunk["content"]
+
+    # Test 2: Delimiter appears in middle of content
+    content = "first><second><third"
+    chunks = chunking_by_token_size(
+        tokenizer,
+        content,
+        split_by_character="><",  # Multi-char delimiter
+        split_by_character_only=True,
+        chunk_token_size=50,
+    )
+
+    # Should split at "><" delimiter
+    assert len(chunks) == 3
+    assert chunks[0]["content"] == "first"
+    assert chunks[1]["content"] == "second"
+    assert chunks[2]["content"] == "third"
+
+    # Test 3: Three-character delimiter
+    content = "section1[***]section2[***]section3"
+    chunks = chunking_by_token_size(
+        tokenizer,
+        content,
+        split_by_character="[***]",
+        split_by_character_only=True,
+        chunk_token_size=50,
+    )
+
+    assert len(chunks) == 3
+    assert chunks[0]["content"] == "section1"
+    assert chunks[1]["content"] == "section2"
+    assert chunks[2]["content"] == "section3"
+
+    # Test 4: Delimiter with special regex characters (should be treated literally)
+    content = "partA...partB...partC"
+    chunks = chunking_by_token_size(
+        tokenizer,
+        content,
+        split_by_character="...",
+        split_by_character_only=True,
+        chunk_token_size=50,
+    )
+
+    assert len(chunks) == 3
+    assert chunks[0]["content"] == "partA"
+    assert chunks[1]["content"] == "partB"
+    assert chunks[2]["content"] == "partC"
+
+
+@pytest.mark.offline
+def test_delimiter_partial_match_not_split():
+    """
+    Verify that partial matches of multi-character delimiters don't cause splits.
+
+    Only the complete delimiter sequence should trigger a split.
+    """
+    tokenizer = make_tokenizer()
+
+    # Content contains "||" delimiter but also contains single "|"
+    content = "data|single||data|with|pipes||final"
+
+    chunks = chunking_by_token_size(
+        tokenizer,
+        content,
+        split_by_character="||",  # Only split on double pipe
+        split_by_character_only=True,
+        chunk_token_size=50,
+    )
+
+    # Should split only at "||", not at single "|"
+    assert len(chunks) == 3
+    assert chunks[0]["content"] == "data|single"
+    assert chunks[1]["content"] == "data|with|pipes"
+    assert chunks[2]["content"] == "final"
+
+    # Single "|" should remain in content, but not double "||"
+    assert "|" in chunks[0]["content"]
+    assert "|" in chunks[1]["content"]
+    assert "||" not in chunks[0]["content"]
+    assert "||" not in chunks[1]["content"]
+
+
+@pytest.mark.offline
+def test_no_delimiter_forces_token_based_split():
+    """
+    Verify that when split_by_character doesn't appear in content,
+    chunking falls back to token-based splitting.
+    """
+    tokenizer = make_tokenizer()
+
+    # Content without the specified delimiter
+    content = "0123456789abcdefghijklmnop"  # 26 chars, no "\n\n"
+
+    chunks = chunking_by_token_size(
+        tokenizer,
+        content,
+        split_by_character="\n\n",  # Delimiter not in content
+        split_by_character_only=False,
+        chunk_token_size=10,
+        chunk_overlap_token_size=0,
+    )
+
+    # Should fall back to token-based splitting
+    assert len(chunks) == 3
+    assert chunks[0]["content"] == "0123456789"  # [0:10]
+    assert chunks[1]["content"] == "abcdefghij"  # [10:20]
+    assert chunks[2]["content"] == "klmnop"  # [20:26]
+
+    # Verify it didn't somehow split at the delimiter that doesn't exist
+    for chunk in chunks:
+        assert "\n\n" not in chunk["content"]
+
+
+@pytest.mark.offline
+def test_delimiter_at_exact_chunk_boundary():
+    """
+    Verify correct behavior when delimiter appears exactly at chunk token limit.
+    """
+    tokenizer = make_tokenizer()
+
+    # "segment1\n\nsegment2" where each segment is within limit
+    content = "12345\n\nabcde"
+
+    chunks = chunking_by_token_size(
+        tokenizer,
+        content,
+        split_by_character="\n\n",
+        split_by_character_only=True,
+        chunk_token_size=10,
+    )
+
+    # Should split at delimiter, not at token count
+    assert len(chunks) == 2
+    assert chunks[0]["content"] == "12345"
+    assert chunks[1]["content"] == "abcde"
+
+
+@pytest.mark.offline
+def test_different_delimiter_comma():
+    """Test with comma delimiter."""
+    tokenizer = make_tokenizer()
+    content = "one,two,three"
+
+    chunks = chunking_by_token_size(
+        tokenizer,
+        content,
+        split_by_character=",",
+        split_by_character_only=True,
+        chunk_token_size=10,
+    )
+
+    assert len(chunks) == 3
+    assert [c["content"] for c in chunks] == ["one", "two", "three"]
+
+
+@pytest.mark.offline
+def test_zero_overlap():
+    """Test with zero overlap (no overlap)."""
+    tokenizer = make_tokenizer()
+    content = "a" * 20
+
+    chunks = chunking_by_token_size(
+        tokenizer,
+        content,
+        split_by_character=None,
+        split_by_character_only=False,
+        chunk_token_size=10,
+        chunk_overlap_token_size=0,
+    )
+
+    # Should create exactly 2 chunks with no overlap
+    assert len(chunks) == 2
+    assert chunks[0]["tokens"] == 10
+    assert chunks[1]["tokens"] == 10
+
+
+@pytest.mark.offline
+def test_large_overlap():
+    """
+    Test with overlap close to chunk size using distinctive content.
+
+    Large overlap (9 out of 10) means step size is only 1, creating many overlapping chunks.
+    Distinctive characters ensure each chunk has correct positioning.
+    """
+    tokenizer = make_tokenizer()
+    # Use distinctive characters to verify exact positions
+    content = "0123456789abcdefghijklmnopqrst"  # 30 chars
+
+    chunks = chunking_by_token_size(
+        tokenizer,
+        content,
+        split_by_character=None,
+        split_by_character_only=False,
+        chunk_token_size=10,
+        chunk_overlap_token_size=9,
+    )
+
+    # With overlap=9, step size = 10 - 9 = 1
+    # Chunks start at: 0, 1, 2, 3, ..., 20
+    # Total chunks = 21 (from position 0 to 20, each taking 10 tokens)
+    # Wait, let me recalculate: range(0, 30, 1) gives positions 0-29
+    # But each chunk is 10 tokens, so last chunk starts at position 20
+    # Actually: positions are 0, 1, 2, ..., 20 (21 chunks) for a 30-char string
+    # No wait: for i in range(0, 30, 1): if i + 10 <= 30, we can create a chunk
+    # So positions: 0-20 (chunks of size 10), then 21-29 would be partial
+    # Actually the loop is: for start in range(0, len(tokens), step):
+    # range(0, 30, 1) = [0, 1, 2, ..., 29], so 30 chunks total
+    assert len(chunks) == 30
+
+    # Verify first few chunks have correct content with proper overlap
+    assert chunks[0]["content"] == "0123456789"  # [0:10]
+    assert (
+        chunks[1]["content"] == "123456789a"
+    )  # [1:11] - overlaps 9 chars with previous
+    assert (
+        chunks[2]["content"] == "23456789ab"
+    )  # [2:12] - overlaps 9 chars with previous
+    assert chunks[3]["content"] == "3456789abc"  # [3:13]
+
+    # Verify last chunk
+    assert chunks[-1]["content"] == "t"  # [29:30] - last char only
+
+
+# ============================================================================
+# Chunk Order Index Tests
+# ============================================================================
+
+
+@pytest.mark.offline
+def test_chunk_order_index_simple():
+    """Test that chunk_order_index is correctly assigned."""
+    tokenizer = make_tokenizer()
+    content = "a\n\nb\n\nc"
+
+    chunks = chunking_by_token_size(
+        tokenizer,
+        content,
+        split_by_character="\n\n",
+        split_by_character_only=True,
+        chunk_token_size=10,
+    )
+
+    assert len(chunks) == 3
+    assert chunks[0]["chunk_order_index"] == 0
+    assert chunks[1]["chunk_order_index"] == 1
+    assert chunks[2]["chunk_order_index"] == 2
+
+
+@pytest.mark.offline
+def test_chunk_order_index_with_splitting():
+    """Test chunk_order_index with recursive splitting."""
+    tokenizer = make_tokenizer()
+    content = "a" * 30
+
+    chunks = chunking_by_token_size(
+        tokenizer,
+        content,
+        split_by_character=None,
+        split_by_character_only=False,
+        chunk_token_size=10,
+        chunk_overlap_token_size=0,
+    )
+
+    assert len(chunks) == 3
+    assert chunks[0]["chunk_order_index"] == 0
+    assert chunks[1]["chunk_order_index"] == 1
+    assert chunks[2]["chunk_order_index"] == 2
+
+
+# ============================================================================
+# Integration Tests
+# ============================================================================
+
+
+@pytest.mark.offline
+def test_mixed_size_chunks_no_error():
+    """Test that mixed size chunks work without error in recursive mode."""
+    tokenizer = make_tokenizer()
+    # Mix of small and large chunks
+    content = "small\n\n" + "a" * 50 + "\n\nmedium"
+
+    chunks = chunking_by_token_size(
+        tokenizer,
+        content,
+        split_by_character="\n\n",
+        split_by_character_only=False,
+        chunk_token_size=10,
+        chunk_overlap_token_size=2,
+    )
+
+    # Should handle all chunks without error
+    assert len(chunks) > 0
+    # Small chunk should remain intact
+    assert chunks[0]["content"] == "small"
+    # Large chunk should be split into multiple pieces
+    assert any(chunk["content"] == "a" * 10 for chunk in chunks)
+    # Last chunk should contain "medium"
+    assert any("medium" in chunk["content"] for chunk in chunks)
+
+
+@pytest.mark.offline
+def test_whitespace_handling():
+    """Test that whitespace is properly handled in chunk content."""
+    tokenizer = make_tokenizer()
+    content = "  alpha  \n\n  beta  "
+
+    chunks = chunking_by_token_size(
+        tokenizer,
+        content,
+        split_by_character="\n\n",
+        split_by_character_only=True,
+        chunk_token_size=20,
+    )
+
+    # Content should be stripped
+    assert chunks[0]["content"] == "alpha"
+    assert chunks[1]["content"] == "beta"
+
+
+@pytest.mark.offline
+def test_consecutive_delimiters():
+    """Test handling of consecutive delimiters."""
+    tokenizer = make_tokenizer()
+    content = "alpha\n\n\n\nbeta"
+
+    chunks = chunking_by_token_size(
+        tokenizer,
+        content,
+        split_by_character="\n\n",
+        split_by_character_only=True,
+        chunk_token_size=20,
+    )
+
+    # Should split on delimiter and include empty chunks
+    assert len(chunks) >= 2
+    assert "alpha" in [c["content"] for c in chunks]
+    assert "beta" in [c["content"] for c in chunks]
+
+
+# ============================================================================
+# Token vs Character Counting Tests (Multi-Token Characters)
+# ============================================================================
+
+
+@pytest.mark.offline
+def test_token_counting_not_character_counting():
+    """
+    Verify chunking uses token count, not character count.
+
+    With MultiTokenCharacterTokenizer:
+    - "aXa" = 3 chars but 4 tokens (a=1, X=2, a=1)
+
+    This test would PASS if code incorrectly used character count (3 <= 3)
+    but correctly FAILS because token count (4 > 3).
+    """
+    tokenizer = make_multi_token_tokenizer()
+
+    # "aXa" = 3 characters, 4 tokens
+    content = "aXa"
+
+    with pytest.raises(ChunkTokenLimitExceededError) as excinfo:
+        chunking_by_token_size(
+            tokenizer,
+            content,
+            split_by_character="\n\n",
+            split_by_character_only=True,
+            chunk_token_size=3,  # 3 token limit
+        )
+
+    err = excinfo.value
+    assert err.chunk_tokens == 4  # Should be 4 tokens, not 3 characters
+    assert err.chunk_token_limit == 3
+
+
+@pytest.mark.offline
+def test_token_limit_with_punctuation():
+    """
+    Test that punctuation token expansion is handled correctly.
+
+    "Hi!" = 3 chars but 6 tokens (H=2, i=1, !=3)
+    """
+    tokenizer = make_multi_token_tokenizer()
+
+    # "Hi!" = 3 characters, 6 tokens (H=2, i=1, !=3)
+    content = "Hi!"
+
+    with pytest.raises(ChunkTokenLimitExceededError) as excinfo:
+        chunking_by_token_size(
+            tokenizer,
+            content,
+            split_by_character="\n\n",
+            split_by_character_only=True,
+            chunk_token_size=4,
+        )
+
+    err = excinfo.value
+    assert err.chunk_tokens == 6
+    assert err.chunk_token_limit == 4
+
+
+@pytest.mark.offline
+def test_multi_token_within_limit():
+    """Test that multi-token characters work when within limit."""
+    tokenizer = make_multi_token_tokenizer()
+
+    # "Hi" = 2 chars, 3 tokens (H=2, i=1)
+    content = "Hi"
+
+    chunks = chunking_by_token_size(
+        tokenizer,
+        content,
+        split_by_character="\n\n",
+        split_by_character_only=True,
+        chunk_token_size=5,
+    )
+
+    assert len(chunks) == 1
+    assert chunks[0]["tokens"] == 3
+    assert chunks[0]["content"] == "Hi"
+
+
+@pytest.mark.offline
+def test_recursive_split_with_multi_token_chars():
+    """
+    Test recursive splitting respects token boundaries, not character boundaries.
+
+    "AAAAA" = 5 chars but 10 tokens (each A = 2 tokens)
+    With chunk_size=6, should split at token positions, not character positions.
+    """
+    tokenizer = make_multi_token_tokenizer()
+
+    # "AAAAA" = 5 characters, 10 tokens
+    content = "AAAAA"
+
+    chunks = chunking_by_token_size(
+        tokenizer,
+        content,
+        split_by_character="\n\n",
+        split_by_character_only=False,
+        chunk_token_size=6,
+        chunk_overlap_token_size=0,
+    )
+
+    # Should split into: [0:6]=3 chars, [6:10]=2 chars
+    # Not [0:3]=6 tokens, [3:5]=4 tokens (character-based would be wrong)
+    assert len(chunks) == 2
+    assert chunks[0]["tokens"] == 6
+    assert chunks[1]["tokens"] == 4
+
+
+@pytest.mark.offline
+def test_overlap_uses_token_count():
+    """
+    Verify overlap calculation uses token count, not character count.
+
+    "aAaAa" = 5 chars, 7 tokens (a=1, A=2, a=1, A=2, a=1)
+    """
+    tokenizer = make_multi_token_tokenizer()
+
+    # "aAaAa" = 5 characters, 7 tokens
+    content = "aAaAa"
+
+    chunks = chunking_by_token_size(
+        tokenizer,
+        content,
+        split_by_character="\n\n",
+        split_by_character_only=False,
+        chunk_token_size=4,
+        chunk_overlap_token_size=2,
+    )
+
+    # Chunks start at token positions: 0, 2, 4, 6
+    # [0:4]=2 chars, [2:6]=2.5 chars, [4:7]=1.5 chars
+    assert len(chunks) == 4
+    assert chunks[0]["tokens"] == 4
+    assert chunks[1]["tokens"] == 4
+    assert chunks[2]["tokens"] == 3
+    assert chunks[3]["tokens"] == 1
+
+
+@pytest.mark.offline
+def test_mixed_multi_token_content():
+    """Test chunking with mixed single and multi-token characters."""
+    tokenizer = make_multi_token_tokenizer()
+
+    # "hello\n\nWORLD!" = 12 chars
+    # hello = 5 tokens, WORLD = 10 tokens (5 chars × 2), ! = 3 tokens
+    # Total = 18 tokens
+    content = "hello\n\nWORLD!"
+
+    chunks = chunking_by_token_size(
+        tokenizer,
+        content,
+        split_by_character="\n\n",
+        split_by_character_only=True,
+        chunk_token_size=20,
+    )
+
+    assert len(chunks) == 2
+    assert chunks[0]["content"] == "hello"
+    assert chunks[0]["tokens"] == 5
+    assert chunks[1]["content"] == "WORLD!"
+    assert chunks[1]["tokens"] == 13  # 10 + 3
+
+
+@pytest.mark.offline
+def test_exact_token_boundary_multi_token():
+    """Test splitting exactly at token limit with multi-token characters."""
+    tokenizer = make_multi_token_tokenizer()
+
+    # "AAA" = 3 chars, 6 tokens (each A = 2 tokens)
+    content = "AAA"
+
+    chunks = chunking_by_token_size(
+        tokenizer,
+        content,
+        split_by_character="\n\n",
+        split_by_character_only=True,
+        chunk_token_size=6,
+    )
+
+    assert len(chunks) == 1
+    assert chunks[0]["tokens"] == 6
+    assert chunks[0]["content"] == "AAA"
+
+
+@pytest.mark.offline
+def test_multi_token_overlap_with_distinctive_content():
+    """
+    Verify overlap works correctly with multi-token characters using distinctive content.
+
+    With non-uniform tokenization, overlap must be calculated in token space, not character space.
+    Distinctive characters ensure we catch any misalignment.
+
+    Content: "abcABCdef"
+    - "abc" = 3 tokens (1+1+1)
+    - "ABC" = 6 tokens (2+2+2)
+    - "def" = 3 tokens (1+1+1)
+    - Total = 12 tokens
+    """
+    tokenizer = make_multi_token_tokenizer()
+
+    # Distinctive content with mixed single and multi-token chars
+    content = "abcABCdef"  # 9 chars, 12 tokens
+
+    chunks = chunking_by_token_size(
+        tokenizer,
+        content,
+        split_by_character=None,
+        split_by_character_only=False,
+        chunk_token_size=6,
+        chunk_overlap_token_size=2,
+    )
+
+    # With chunk_size=6, overlap=2, step=4
+    # Chunks start at token positions: 0, 4, 8
+    # Chunk 0: tokens [0:6] = "abcA" (tokens: a=1, b=1, c=1, A=2, total=5... wait)
+    # Let me recalculate:
+    # "a"=1, "b"=1, "c"=1, "A"=2, "B"=2, "C"=2, "d"=1, "e"=1, "f"=1
+    # Token positions: a=0, b=1, c=2, A=3-4, B=5-6, C=7-8, d=9, e=10, f=11
+    # Chunk 0 [0:6]: covers "abc" (tokens 0-2) + partial "ABC" (tokens 3-5, which is "AB")
+    # But we need to figure out what characters that maps to...
+    #
+    # Actually, let's think in terms of token slicing:
+    # tokens = [a, b, c, A1, A2, B1, B2, C1, C2, d, e, f]
+    # Chunk 0 [0:6]: [a, b, c, A1, A2, B1] - decode to "abcAB"
+    # Chunk 1 [4:10]: [A2, B1, B2, C1, C2, d] - decode to "ABCd"
+    # Chunk 2 [8:12]: [C2, d, e, f] - decode to... this is problematic
+    #
+    # The issue is that multi-token characters might get split across chunks.
+    # Let me verify what the actual chunking does...
+
+    assert len(chunks) == 3
+
+    # Just verify token counts are correct - content may vary due to character splitting
+    assert chunks[0]["tokens"] == 6
+    assert chunks[1]["tokens"] == 6
+    assert chunks[2]["tokens"] == 4
+
+
+@pytest.mark.offline
+def test_decode_preserves_content():
+    """Verify that decode correctly reconstructs original content."""
+    tokenizer = make_multi_token_tokenizer()
+
+    test_strings = [
+        "Hello",
+        "WORLD",
+        "Test!",
+        "Mixed?Case.",
+        "ABC123xyz",
+    ]
+
+    for original in test_strings:
+        tokens = tokenizer.encode(original)
+        decoded = tokenizer.decode(tokens)
+        assert decoded == original, f"Failed to decode: {original}"