From 82606474971f6b776f157c7d55ae8ae00d677272 Mon Sep 17 00:00:00 2001 From: Leon Luithlen Date: Thu, 14 Nov 2024 12:42:10 +0100 Subject: [PATCH] Add AudioDocument and ImageDocument tests --- .../documents/AudioDocument_test.py | 46 +++++++++++++++++++ .../documents/ImageDocument_test.py | 34 ++++++++++++++ 2 files changed, 80 insertions(+) create mode 100644 cognee/tests/integration/documents/AudioDocument_test.py create mode 100644 cognee/tests/integration/documents/ImageDocument_test.py diff --git a/cognee/tests/integration/documents/AudioDocument_test.py b/cognee/tests/integration/documents/AudioDocument_test.py new file mode 100644 index 000000000..b20124456 --- /dev/null +++ b/cognee/tests/integration/documents/AudioDocument_test.py @@ -0,0 +1,46 @@ +import uuid +from unittest.mock import patch + +from cognee.modules.data.processing.document_types.AudioDocument import AudioDocument + +GROUND_TRUTH = [ + {"word_count": 60, "len_text": 318, "cut_type": "sentence_end"}, + {"word_count": 64, "len_text": 358, "cut_type": "sentence_end"}, + {"word_count": 56, "len_text": 255, "cut_type": "sentence_cut"}, +] + +TEST_TEXT = """ +"Mike, we need to talk about the payment processing service." +"Good timing. The board wants one-click checkout by end of quarter." +"That's exactly the problem. The service is held together with duct tape. One wrong move and—" +"Sarah, we've been over this. The market won't wait." +"And neither will a system collapse! The technical debt is crushing us. Every new feature takes twice as long as it should." +"Then work twice as hard. Our competitors—" +"Our competitors will laugh when our whole system goes down during Black Friday! We're talking about financial transactions here, not some blog comments section." +"Write up your concerns in a doc. Right now, we ship one-click." +"Then you'll ship it without me. I won't stake my reputation on a house of cards." +"Are you threatening to quit?" +"No, I'm threatening to be right. And when it breaks, I want it in writing that you chose this." +"The feature ships, Sarah. That's final." +""" + + +def test_AudioDocument(): + + document = AudioDocument( + id=uuid.uuid4(), name="audio-dummy-test", raw_data_location="" + ) + with patch.object(AudioDocument, "create_transcript", return_value=TEST_TEXT): + + for ground_truth, paragraph_data in zip( + GROUND_TRUTH, document.read(chunk_size=64) + ): + assert ( + ground_truth["word_count"] == paragraph_data.word_count + ), f'{ground_truth["word_count"] = } != {paragraph_data.word_count = }' + assert ground_truth["len_text"] == len( + paragraph_data.text + ), f'{ground_truth["len_text"] = } != {len(paragraph_data.text) = }' + assert ( + ground_truth["cut_type"] == paragraph_data.cut_type + ), f'{ground_truth["cut_type"] = } != {paragraph_data.cut_type = }' diff --git a/cognee/tests/integration/documents/ImageDocument_test.py b/cognee/tests/integration/documents/ImageDocument_test.py new file mode 100644 index 000000000..d34127eb3 --- /dev/null +++ b/cognee/tests/integration/documents/ImageDocument_test.py @@ -0,0 +1,34 @@ +import uuid +from unittest.mock import patch + +from cognee.modules.data.processing.document_types.ImageDocument import ImageDocument + +GROUND_TRUTH = [ + {"word_count": 51, "len_text": 298, "cut_type": "sentence_end"}, + {"word_count": 63, "len_text": 369, "cut_type": "sentence_end"}, + {"word_count": 44, "len_text": 294, "cut_type": "sentence_end"}, +] + +TEST_TEXT = """A dramatic confrontation unfolds as a red fox and river otter engage in an energetic wrestling match at the water's edge. The fox, teeth bared in a playful snarl, has its front paws locked with the otter's flippers as they roll through the shallow stream, sending water spraying in all directions. The otter, displaying its surprising agility on land, counters by twisting its sleek body and attempting to wrap itself around the fox's shoulders, its whiskered face inches from the fox's muzzle. +The commotion has attracted an audience: a murder of crows has gathered in the low branches, their harsh calls adding to the chaos as they hop excitedly from limb to limb. One particularly bold crow dive-bombs the wrestling pair, causing both animals to momentarily freeze mid-tussle, creating a perfect snapshot of suspended action—the fox's fur dripping wet, the otter's body coiled like a spring, and the crow's wings spread wide against the golden morning light.""" + + +def test_ImageDocument(): + + document = ImageDocument( + id=uuid.uuid4(), name="image-dummy-test", raw_data_location="" + ) + with patch.object(ImageDocument, "transcribe_image", return_value=TEST_TEXT): + + for ground_truth, paragraph_data in zip( + GROUND_TRUTH, document.read(chunk_size=64) + ): + assert ( + ground_truth["word_count"] == paragraph_data.word_count + ), f'{ground_truth["word_count"] = } != {paragraph_data.word_count = }' + assert ground_truth["len_text"] == len( + paragraph_data.text + ), f'{ground_truth["len_text"] = } != {len(paragraph_data.text) = }' + assert ( + ground_truth["cut_type"] == paragraph_data.cut_type + ), f'{ground_truth["cut_type"] = } != {paragraph_data.cut_type = }'