Add AudioDocument and ImageDocument tests

This commit is contained in:
Leon Luithlen 2024-11-14 12:42:10 +01:00
parent f87fd12e9b
commit 8260647497
2 changed files with 80 additions and 0 deletions

View file

@ -0,0 +1,46 @@
import uuid
from unittest.mock import patch
from cognee.modules.data.processing.document_types.AudioDocument import AudioDocument
GROUND_TRUTH = [
{"word_count": 60, "len_text": 318, "cut_type": "sentence_end"},
{"word_count": 64, "len_text": 358, "cut_type": "sentence_end"},
{"word_count": 56, "len_text": 255, "cut_type": "sentence_cut"},
]
TEST_TEXT = """
"Mike, we need to talk about the payment processing service."
"Good timing. The board wants one-click checkout by end of quarter."
"That's exactly the problem. The service is held together with duct tape. One wrong move and—"
"Sarah, we've been over this. The market won't wait."
"And neither will a system collapse! The technical debt is crushing us. Every new feature takes twice as long as it should."
"Then work twice as hard. Our competitors—"
"Our competitors will laugh when our whole system goes down during Black Friday! We're talking about financial transactions here, not some blog comments section."
"Write up your concerns in a doc. Right now, we ship one-click."
"Then you'll ship it without me. I won't stake my reputation on a house of cards."
"Are you threatening to quit?"
"No, I'm threatening to be right. And when it breaks, I want it in writing that you chose this."
"The feature ships, Sarah. That's final."
"""
def test_AudioDocument():
document = AudioDocument(
id=uuid.uuid4(), name="audio-dummy-test", raw_data_location=""
)
with patch.object(AudioDocument, "create_transcript", return_value=TEST_TEXT):
for ground_truth, paragraph_data in zip(
GROUND_TRUTH, document.read(chunk_size=64)
):
assert (
ground_truth["word_count"] == paragraph_data.word_count
), f'{ground_truth["word_count"] = } != {paragraph_data.word_count = }'
assert ground_truth["len_text"] == len(
paragraph_data.text
), f'{ground_truth["len_text"] = } != {len(paragraph_data.text) = }'
assert (
ground_truth["cut_type"] == paragraph_data.cut_type
), f'{ground_truth["cut_type"] = } != {paragraph_data.cut_type = }'

View file

@ -0,0 +1,34 @@
import uuid
from unittest.mock import patch
from cognee.modules.data.processing.document_types.ImageDocument import ImageDocument
GROUND_TRUTH = [
{"word_count": 51, "len_text": 298, "cut_type": "sentence_end"},
{"word_count": 63, "len_text": 369, "cut_type": "sentence_end"},
{"word_count": 44, "len_text": 294, "cut_type": "sentence_end"},
]
TEST_TEXT = """A dramatic confrontation unfolds as a red fox and river otter engage in an energetic wrestling match at the water's edge. The fox, teeth bared in a playful snarl, has its front paws locked with the otter's flippers as they roll through the shallow stream, sending water spraying in all directions. The otter, displaying its surprising agility on land, counters by twisting its sleek body and attempting to wrap itself around the fox's shoulders, its whiskered face inches from the fox's muzzle.
The commotion has attracted an audience: a murder of crows has gathered in the low branches, their harsh calls adding to the chaos as they hop excitedly from limb to limb. One particularly bold crow dive-bombs the wrestling pair, causing both animals to momentarily freeze mid-tussle, creating a perfect snapshot of suspended actionthe fox's fur dripping wet, the otter's body coiled like a spring, and the crow's wings spread wide against the golden morning light."""
def test_ImageDocument():
document = ImageDocument(
id=uuid.uuid4(), name="image-dummy-test", raw_data_location=""
)
with patch.object(ImageDocument, "transcribe_image", return_value=TEST_TEXT):
for ground_truth, paragraph_data in zip(
GROUND_TRUTH, document.read(chunk_size=64)
):
assert (
ground_truth["word_count"] == paragraph_data.word_count
), f'{ground_truth["word_count"] = } != {paragraph_data.word_count = }'
assert ground_truth["len_text"] == len(
paragraph_data.text
), f'{ground_truth["len_text"] = } != {len(paragraph_data.text) = }'
assert (
ground_truth["cut_type"] == paragraph_data.cut_type
), f'{ground_truth["cut_type"] = } != {paragraph_data.cut_type = }'