Adapt chunk_by_paragraph test parametrization
This commit is contained in:
parent
92a66dddb9
commit
ef7a19043d
1 changed files with 27 additions and 7 deletions
|
|
@ -6,16 +6,36 @@ from cognee.tests.unit.processing.chunks.test_input import INPUT_TEXTS
|
|||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"input_text",
|
||||
"input_text,paragraph_length,batch_paragraphs",
|
||||
[
|
||||
INPUT_TEXTS["english_text"],
|
||||
INPUT_TEXTS["english_lists"],
|
||||
INPUT_TEXTS["python_code"],
|
||||
INPUT_TEXTS["chinese_text"],
|
||||
(INPUT_TEXTS["english_text"], 64, True),
|
||||
(INPUT_TEXTS["english_text"], 64, False),
|
||||
(INPUT_TEXTS["english_text"], 256, True),
|
||||
(INPUT_TEXTS["english_text"], 256, False),
|
||||
(INPUT_TEXTS["english_text"], 1024, True),
|
||||
(INPUT_TEXTS["english_text"], 1024, False),
|
||||
(INPUT_TEXTS["english_lists"], 64, True),
|
||||
(INPUT_TEXTS["english_lists"], 64, False),
|
||||
(INPUT_TEXTS["english_lists"], 256, True),
|
||||
(INPUT_TEXTS["english_lists"], 256, False),
|
||||
(INPUT_TEXTS["english_lists"], 1024, True),
|
||||
(INPUT_TEXTS["english_lists"], 1024, False),
|
||||
(INPUT_TEXTS["python_code"], 64, True),
|
||||
(INPUT_TEXTS["python_code"], 64, False),
|
||||
(INPUT_TEXTS["python_code"], 256, True),
|
||||
(INPUT_TEXTS["python_code"], 256, False),
|
||||
(INPUT_TEXTS["python_code"], 1024, True),
|
||||
(INPUT_TEXTS["python_code"], 1024, False),
|
||||
(INPUT_TEXTS["chinese_text"], 64, True),
|
||||
(INPUT_TEXTS["chinese_text"], 64, False),
|
||||
(INPUT_TEXTS["chinese_text"], 256, True),
|
||||
(INPUT_TEXTS["chinese_text"], 256, False),
|
||||
(INPUT_TEXTS["chinese_text"], 1024, True),
|
||||
(INPUT_TEXTS["chinese_text"], 1024, False),
|
||||
],
|
||||
)
|
||||
def test_chunk_by_paragraph_isomorphism(input_text):
|
||||
chunks = chunk_by_paragraph(input_text)
|
||||
def test_chunk_by_paragraph_isomorphism(input_text, paragraph_length, batch_paragraphs):
|
||||
chunks = chunk_by_paragraph(input_text, paragraph_length, batch_paragraphs)
|
||||
reconstructed_text = "".join([chunk["text"] for chunk in chunks])
|
||||
assert (
|
||||
reconstructed_text == input_text
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue