Autoformat chunking tests

2024-11-13 12:13:12 +01:00 · 2024-11-13 12:13:12 +01:00 · 92a66dddb9
commit 92a66dddb9
parent ce498d97dd
4 changed files with 58 additions and 39 deletions
--- a/cognee/tests/unit/processing/chunks/chunk_by_paragraph_test2.py
+++ b/cognee/tests/unit/processing/chunks/chunk_by_paragraph_test2.py
@ -1,17 +1,22 @@
-import pytest
 import numpy as np
+import pytest
+
 from cognee.tasks.chunks import chunk_by_paragraph
 from cognee.tests.unit.processing.chunks.test_input import INPUT_TEXTS

-@pytest.mark.parametrize("input_text", [
-    INPUT_TEXTS["english_text"],
-    INPUT_TEXTS["english_lists"],
-    INPUT_TEXTS["python_code"],
-    INPUT_TEXTS["chinese_text"]
-])

+@pytest.mark.parametrize(
+    "input_text",
+    [
+        INPUT_TEXTS["english_text"],
+        INPUT_TEXTS["english_lists"],
+        INPUT_TEXTS["python_code"],
+        INPUT_TEXTS["chinese_text"],
+    ],
+)
 def test_chunk_by_paragraph_isomorphism(input_text):
    chunks = chunk_by_paragraph(input_text)
    reconstructed_text = "".join([chunk["text"] for chunk in chunks])
-    assert reconstructed_text == input_text, f"texts are not identical: {len(input_text) = }, {len(reconstructed_text) = }"
-
+    assert (
+        reconstructed_text == input_text
+    ), f"texts are not identical: {len(input_text) = }, {len(reconstructed_text) = }"
--- a/cognee/tests/unit/processing/chunks/chunk_by_sentence_test.py
+++ b/cognee/tests/unit/processing/chunks/chunk_by_sentence_test.py
@ -1,17 +1,22 @@
-import pytest
 import numpy as np
+import pytest
+
 from cognee.tasks.chunks import chunk_by_sentence
 from cognee.tests.unit.processing.chunks.test_input import INPUT_TEXTS

-@pytest.mark.parametrize("input_text", [
-    INPUT_TEXTS["english_text"],
-    INPUT_TEXTS["english_lists"],
-    INPUT_TEXTS["python_code"],
-    INPUT_TEXTS["chinese_text"]
-])

+@pytest.mark.parametrize(
+    "input_text",
+    [
+        INPUT_TEXTS["english_text"],
+        INPUT_TEXTS["english_lists"],
+        INPUT_TEXTS["python_code"],
+        INPUT_TEXTS["chinese_text"],
+    ],
+)
 def test_chunk_by_sentence_isomorphism(input_text):
    chunks = chunk_by_sentence(input_text)
    reconstructed_text = "".join([chunk[2] for chunk in chunks])
-    assert reconstructed_text == input_text, f"texts are not identical: {len(input_text) = }, {len(reconstructed_text) = }"
-
+    assert (
+        reconstructed_text == input_text
+    ), f"texts are not identical: {len(input_text) = }, {len(reconstructed_text) = }"
--- a/cognee/tests/unit/processing/chunks/chunk_by_word_test.py
+++ b/cognee/tests/unit/processing/chunks/chunk_by_word_test.py
@ -1,31 +1,40 @@
-import pytest
 import numpy as np
+import pytest
+
 from cognee.tasks.chunks import chunk_by_word
 from cognee.tests.unit.processing.chunks.test_input import INPUT_TEXTS

-@pytest.mark.parametrize("input_text", [
-    INPUT_TEXTS["english_text"],
-    INPUT_TEXTS["english_lists"],
-    INPUT_TEXTS["python_code"],
-    INPUT_TEXTS["chinese_text"]
-])
+
+@pytest.mark.parametrize(
+    "input_text",
+    [
+        INPUT_TEXTS["english_text"],
+        INPUT_TEXTS["english_lists"],
+        INPUT_TEXTS["python_code"],
+        INPUT_TEXTS["chinese_text"],
+    ],
+)
 def test_chunk_by_word_isomorphism(input_text):
    chunks = chunk_by_word(input_text)
    reconstructed_text = "".join([chunk[0] for chunk in chunks])
-    assert reconstructed_text == input_text, f"texts are not identical: {len(input_text) = }, {len(reconstructed_text) = }"
+    assert (
+        reconstructed_text == input_text
+    ), f"texts are not identical: {len(input_text) = }, {len(reconstructed_text) = }"

-@pytest.mark.parametrize("input_text", [
-    INPUT_TEXTS["english_text"],
-    INPUT_TEXTS["english_lists"],
-    INPUT_TEXTS["python_code"],
-    INPUT_TEXTS["chinese_text"]
-])
+
+@pytest.mark.parametrize(
+    "input_text",
+    [
+        INPUT_TEXTS["english_text"],
+        INPUT_TEXTS["english_lists"],
+        INPUT_TEXTS["python_code"],
+        INPUT_TEXTS["chinese_text"],
+    ],
+)
 def test_chunk_by_word_splits(input_text):
    chunks = np.array(list(chunk_by_word(input_text)))
    space_test = np.array([" " not in chunk[0].strip() for chunk in chunks])

-    assert np.all(space_test), f"These chunks contain spaces within them: {chunks[space_test == False]}"
-
-
-
-
+    assert np.all(
+        space_test
+    ), f"These chunks contain spaces within them: {chunks[space_test == False]}"
--- a/cognee/tests/unit/processing/chunks/test_input.py
+++ b/cognee/tests/unit/processing/chunks/test_input.py
@ -271,5 +271,5 @@ What feign'd submission swore: ease would recant
 Vows made in pain, as violent and void.
 For never can true reconcilement grow
 Where wounds of deadly hate have peirc'd so deep:
-Which would but lead me to a worse relapse [ 100 ]"""
-}
+Which would but lead me to a worse relapse [ 100 ]""",
+}