diff --git a/cognee/tests/unit/processing/chunks/chunk_by_paragraph_test2.py b/cognee/tests/unit/processing/chunks/chunk_by_paragraph_test2.py index 16767f736..d846fdfa2 100644 --- a/cognee/tests/unit/processing/chunks/chunk_by_paragraph_test2.py +++ b/cognee/tests/unit/processing/chunks/chunk_by_paragraph_test2.py @@ -6,16 +6,36 @@ from cognee.tests.unit.processing.chunks.test_input import INPUT_TEXTS @pytest.mark.parametrize( - "input_text", + "input_text,paragraph_length,batch_paragraphs", [ - INPUT_TEXTS["english_text"], - INPUT_TEXTS["english_lists"], - INPUT_TEXTS["python_code"], - INPUT_TEXTS["chinese_text"], + (INPUT_TEXTS["english_text"], 64, True), + (INPUT_TEXTS["english_text"], 64, False), + (INPUT_TEXTS["english_text"], 256, True), + (INPUT_TEXTS["english_text"], 256, False), + (INPUT_TEXTS["english_text"], 1024, True), + (INPUT_TEXTS["english_text"], 1024, False), + (INPUT_TEXTS["english_lists"], 64, True), + (INPUT_TEXTS["english_lists"], 64, False), + (INPUT_TEXTS["english_lists"], 256, True), + (INPUT_TEXTS["english_lists"], 256, False), + (INPUT_TEXTS["english_lists"], 1024, True), + (INPUT_TEXTS["english_lists"], 1024, False), + (INPUT_TEXTS["python_code"], 64, True), + (INPUT_TEXTS["python_code"], 64, False), + (INPUT_TEXTS["python_code"], 256, True), + (INPUT_TEXTS["python_code"], 256, False), + (INPUT_TEXTS["python_code"], 1024, True), + (INPUT_TEXTS["python_code"], 1024, False), + (INPUT_TEXTS["chinese_text"], 64, True), + (INPUT_TEXTS["chinese_text"], 64, False), + (INPUT_TEXTS["chinese_text"], 256, True), + (INPUT_TEXTS["chinese_text"], 256, False), + (INPUT_TEXTS["chinese_text"], 1024, True), + (INPUT_TEXTS["chinese_text"], 1024, False), ], ) -def test_chunk_by_paragraph_isomorphism(input_text): - chunks = chunk_by_paragraph(input_text) +def test_chunk_by_paragraph_isomorphism(input_text, paragraph_length, batch_paragraphs): + chunks = chunk_by_paragraph(input_text, paragraph_length, batch_paragraphs) reconstructed_text = "".join([chunk["text"] for chunk in chunks]) assert ( reconstructed_text == input_text