diff --git a/cognee/tests/unit/processing/chunks/chunk_by_paragraph_test2.py b/cognee/tests/unit/processing/chunks/chunk_by_paragraph_test2.py index 2cb95f416..16767f736 100644 --- a/cognee/tests/unit/processing/chunks/chunk_by_paragraph_test2.py +++ b/cognee/tests/unit/processing/chunks/chunk_by_paragraph_test2.py @@ -1,17 +1,22 @@ -import pytest import numpy as np +import pytest + from cognee.tasks.chunks import chunk_by_paragraph from cognee.tests.unit.processing.chunks.test_input import INPUT_TEXTS -@pytest.mark.parametrize("input_text", [ - INPUT_TEXTS["english_text"], - INPUT_TEXTS["english_lists"], - INPUT_TEXTS["python_code"], - INPUT_TEXTS["chinese_text"] -]) +@pytest.mark.parametrize( + "input_text", + [ + INPUT_TEXTS["english_text"], + INPUT_TEXTS["english_lists"], + INPUT_TEXTS["python_code"], + INPUT_TEXTS["chinese_text"], + ], +) def test_chunk_by_paragraph_isomorphism(input_text): chunks = chunk_by_paragraph(input_text) reconstructed_text = "".join([chunk["text"] for chunk in chunks]) - assert reconstructed_text == input_text, f"texts are not identical: {len(input_text) = }, {len(reconstructed_text) = }" - + assert ( + reconstructed_text == input_text + ), f"texts are not identical: {len(input_text) = }, {len(reconstructed_text) = }" diff --git a/cognee/tests/unit/processing/chunks/chunk_by_sentence_test.py b/cognee/tests/unit/processing/chunks/chunk_by_sentence_test.py index a21a3e9f9..fcab02e03 100644 --- a/cognee/tests/unit/processing/chunks/chunk_by_sentence_test.py +++ b/cognee/tests/unit/processing/chunks/chunk_by_sentence_test.py @@ -1,17 +1,22 @@ -import pytest import numpy as np +import pytest + from cognee.tasks.chunks import chunk_by_sentence from cognee.tests.unit.processing.chunks.test_input import INPUT_TEXTS -@pytest.mark.parametrize("input_text", [ - INPUT_TEXTS["english_text"], - INPUT_TEXTS["english_lists"], - INPUT_TEXTS["python_code"], - INPUT_TEXTS["chinese_text"] -]) +@pytest.mark.parametrize( + "input_text", + [ + INPUT_TEXTS["english_text"], + INPUT_TEXTS["english_lists"], + INPUT_TEXTS["python_code"], + INPUT_TEXTS["chinese_text"], + ], +) def test_chunk_by_sentence_isomorphism(input_text): chunks = chunk_by_sentence(input_text) reconstructed_text = "".join([chunk[2] for chunk in chunks]) - assert reconstructed_text == input_text, f"texts are not identical: {len(input_text) = }, {len(reconstructed_text) = }" - + assert ( + reconstructed_text == input_text + ), f"texts are not identical: {len(input_text) = }, {len(reconstructed_text) = }" diff --git a/cognee/tests/unit/processing/chunks/chunk_by_word_test.py b/cognee/tests/unit/processing/chunks/chunk_by_word_test.py index 38e5d9b5a..42523c106 100644 --- a/cognee/tests/unit/processing/chunks/chunk_by_word_test.py +++ b/cognee/tests/unit/processing/chunks/chunk_by_word_test.py @@ -1,31 +1,40 @@ -import pytest import numpy as np +import pytest + from cognee.tasks.chunks import chunk_by_word from cognee.tests.unit.processing.chunks.test_input import INPUT_TEXTS -@pytest.mark.parametrize("input_text", [ - INPUT_TEXTS["english_text"], - INPUT_TEXTS["english_lists"], - INPUT_TEXTS["python_code"], - INPUT_TEXTS["chinese_text"] -]) + +@pytest.mark.parametrize( + "input_text", + [ + INPUT_TEXTS["english_text"], + INPUT_TEXTS["english_lists"], + INPUT_TEXTS["python_code"], + INPUT_TEXTS["chinese_text"], + ], +) def test_chunk_by_word_isomorphism(input_text): chunks = chunk_by_word(input_text) reconstructed_text = "".join([chunk[0] for chunk in chunks]) - assert reconstructed_text == input_text, f"texts are not identical: {len(input_text) = }, {len(reconstructed_text) = }" + assert ( + reconstructed_text == input_text + ), f"texts are not identical: {len(input_text) = }, {len(reconstructed_text) = }" -@pytest.mark.parametrize("input_text", [ - INPUT_TEXTS["english_text"], - INPUT_TEXTS["english_lists"], - INPUT_TEXTS["python_code"], - INPUT_TEXTS["chinese_text"] -]) + +@pytest.mark.parametrize( + "input_text", + [ + INPUT_TEXTS["english_text"], + INPUT_TEXTS["english_lists"], + INPUT_TEXTS["python_code"], + INPUT_TEXTS["chinese_text"], + ], +) def test_chunk_by_word_splits(input_text): chunks = np.array(list(chunk_by_word(input_text))) space_test = np.array([" " not in chunk[0].strip() for chunk in chunks]) - assert np.all(space_test), f"These chunks contain spaces within them: {chunks[space_test == False]}" - - - - + assert np.all( + space_test + ), f"These chunks contain spaces within them: {chunks[space_test == False]}" diff --git a/cognee/tests/unit/processing/chunks/test_input.py b/cognee/tests/unit/processing/chunks/test_input.py index ad6603d9d..b7a57b75a 100644 --- a/cognee/tests/unit/processing/chunks/test_input.py +++ b/cognee/tests/unit/processing/chunks/test_input.py @@ -271,5 +271,5 @@ What feign'd submission swore: ease would recant Vows made in pain, as violent and void. For never can true reconcilement grow Where wounds of deadly hate have peirc'd so deep: -Which would but lead me to a worse relapse [ 100 ]""" -} \ No newline at end of file +Which would but lead me to a worse relapse [ 100 ]""", +}