Add AudioDocument and ImageDocument tests
This commit is contained in:
parent
f87fd12e9b
commit
8260647497
2 changed files with 80 additions and 0 deletions
46
cognee/tests/integration/documents/AudioDocument_test.py
Normal file
46
cognee/tests/integration/documents/AudioDocument_test.py
Normal file
|
|
@ -0,0 +1,46 @@
|
|||
import uuid
|
||||
from unittest.mock import patch
|
||||
|
||||
from cognee.modules.data.processing.document_types.AudioDocument import AudioDocument
|
||||
|
||||
GROUND_TRUTH = [
|
||||
{"word_count": 60, "len_text": 318, "cut_type": "sentence_end"},
|
||||
{"word_count": 64, "len_text": 358, "cut_type": "sentence_end"},
|
||||
{"word_count": 56, "len_text": 255, "cut_type": "sentence_cut"},
|
||||
]
|
||||
|
||||
TEST_TEXT = """
|
||||
"Mike, we need to talk about the payment processing service."
|
||||
"Good timing. The board wants one-click checkout by end of quarter."
|
||||
"That's exactly the problem. The service is held together with duct tape. One wrong move and—"
|
||||
"Sarah, we've been over this. The market won't wait."
|
||||
"And neither will a system collapse! The technical debt is crushing us. Every new feature takes twice as long as it should."
|
||||
"Then work twice as hard. Our competitors—"
|
||||
"Our competitors will laugh when our whole system goes down during Black Friday! We're talking about financial transactions here, not some blog comments section."
|
||||
"Write up your concerns in a doc. Right now, we ship one-click."
|
||||
"Then you'll ship it without me. I won't stake my reputation on a house of cards."
|
||||
"Are you threatening to quit?"
|
||||
"No, I'm threatening to be right. And when it breaks, I want it in writing that you chose this."
|
||||
"The feature ships, Sarah. That's final."
|
||||
"""
|
||||
|
||||
|
||||
def test_AudioDocument():
|
||||
|
||||
document = AudioDocument(
|
||||
id=uuid.uuid4(), name="audio-dummy-test", raw_data_location=""
|
||||
)
|
||||
with patch.object(AudioDocument, "create_transcript", return_value=TEST_TEXT):
|
||||
|
||||
for ground_truth, paragraph_data in zip(
|
||||
GROUND_TRUTH, document.read(chunk_size=64)
|
||||
):
|
||||
assert (
|
||||
ground_truth["word_count"] == paragraph_data.word_count
|
||||
), f'{ground_truth["word_count"] = } != {paragraph_data.word_count = }'
|
||||
assert ground_truth["len_text"] == len(
|
||||
paragraph_data.text
|
||||
), f'{ground_truth["len_text"] = } != {len(paragraph_data.text) = }'
|
||||
assert (
|
||||
ground_truth["cut_type"] == paragraph_data.cut_type
|
||||
), f'{ground_truth["cut_type"] = } != {paragraph_data.cut_type = }'
|
||||
34
cognee/tests/integration/documents/ImageDocument_test.py
Normal file
34
cognee/tests/integration/documents/ImageDocument_test.py
Normal file
|
|
@ -0,0 +1,34 @@
|
|||
import uuid
|
||||
from unittest.mock import patch
|
||||
|
||||
from cognee.modules.data.processing.document_types.ImageDocument import ImageDocument
|
||||
|
||||
GROUND_TRUTH = [
|
||||
{"word_count": 51, "len_text": 298, "cut_type": "sentence_end"},
|
||||
{"word_count": 63, "len_text": 369, "cut_type": "sentence_end"},
|
||||
{"word_count": 44, "len_text": 294, "cut_type": "sentence_end"},
|
||||
]
|
||||
|
||||
TEST_TEXT = """A dramatic confrontation unfolds as a red fox and river otter engage in an energetic wrestling match at the water's edge. The fox, teeth bared in a playful snarl, has its front paws locked with the otter's flippers as they roll through the shallow stream, sending water spraying in all directions. The otter, displaying its surprising agility on land, counters by twisting its sleek body and attempting to wrap itself around the fox's shoulders, its whiskered face inches from the fox's muzzle.
|
||||
The commotion has attracted an audience: a murder of crows has gathered in the low branches, their harsh calls adding to the chaos as they hop excitedly from limb to limb. One particularly bold crow dive-bombs the wrestling pair, causing both animals to momentarily freeze mid-tussle, creating a perfect snapshot of suspended action—the fox's fur dripping wet, the otter's body coiled like a spring, and the crow's wings spread wide against the golden morning light."""
|
||||
|
||||
|
||||
def test_ImageDocument():
|
||||
|
||||
document = ImageDocument(
|
||||
id=uuid.uuid4(), name="image-dummy-test", raw_data_location=""
|
||||
)
|
||||
with patch.object(ImageDocument, "transcribe_image", return_value=TEST_TEXT):
|
||||
|
||||
for ground_truth, paragraph_data in zip(
|
||||
GROUND_TRUTH, document.read(chunk_size=64)
|
||||
):
|
||||
assert (
|
||||
ground_truth["word_count"] == paragraph_data.word_count
|
||||
), f'{ground_truth["word_count"] = } != {paragraph_data.word_count = }'
|
||||
assert ground_truth["len_text"] == len(
|
||||
paragraph_data.text
|
||||
), f'{ground_truth["len_text"] = } != {len(paragraph_data.text) = }'
|
||||
assert (
|
||||
ground_truth["cut_type"] == paragraph_data.cut_type
|
||||
), f'{ground_truth["cut_type"] = } != {paragraph_data.cut_type = }'
|
||||
Loading…
Add table
Reference in a new issue