Fix chunk_by_word_test

2024-11-13 11:45:56 +01:00 · 2024-11-13 11:45:56 +01:00 · 830c6710e0
commit 830c6710e0
parent 98cbaaff68
1 changed files with 6 additions and 1 deletions
--- a/cognee/tests/unit/processing/chunks/chunk_by_word_test.py
+++ b/cognee/tests/unit/processing/chunks/chunk_by_word_test.py
@ -9,12 +9,17 @@ from cognee.tests.unit.processing.chunks.test_input import INPUT_TEXTS
    INPUT_TEXTS["python_code"],
    INPUT_TEXTS["chinese_text"]
 ])
 def test_chunk_by_word_isomorphism(input_text):
    chunks = chunk_by_word(input_text)
    reconstructed_text = "".join([chunk[0] for chunk in chunks])
    assert reconstructed_text == input_text, f"texts are not identical: {len(input_text) = }, {len(reconstructed_text) = }"
@pytest.mark.parametrize("input_text", [
    INPUT_TEXTS["english_text"],
    INPUT_TEXTS["english_lists"],
    INPUT_TEXTS["python_code"],
    INPUT_TEXTS["chinese_text"]
 ])
 def test_chunk_by_word_splits(input_text):
    chunks = np.array(list(chunk_by_word(input_text)))
    space_test = np.array([" " not in chunk[0].strip() for chunk in chunks])