Add first three unit tests

This commit is contained in:
Leon Luithlen 2024-11-11 16:39:46 +01:00
parent cdaf63f57c
commit dce894bfd3
7 changed files with 21 additions and 14 deletions

View file

@ -1,13 +0,0 @@
import os
from cognee.modules.data.processing.document_types.PdfDocument import PdfDocument
if __name__ == "__main__":
test_file_path = os.path.join(os.path.dirname(__file__), "artificial-inteligence.pdf")
pdf_doc = PdfDocument("Test document.pdf", test_file_path, chunking_strategy="paragraph")
reader = pdf_doc.get_reader()
for paragraph_data in reader.read():
print(paragraph_data["word_count"])
print(paragraph_data["text"])
print(paragraph_data["cut_type"])
print("\n")

BIN
cognee/tests/.DS_Store vendored Normal file

Binary file not shown.

View file

@ -0,0 +1,20 @@
import os
import uuid
from cognee.modules.data.processing.document_types.PdfDocument import PdfDocument
GROUND_TRUTH = [
{"word_count": 879, "len_text": 5622, "cut_type": "sentence_end"},
{"word_count": 951, "len_text": 6384, "cut_type": "sentence_end"},
]
if __name__ == "__main__":
test_file_path = os.path.join(os.sep, *(os.path.dirname(__file__).split(os.sep)[:-2]),"test_data", "artificial-intelligence.pdf")
pdf_doc = PdfDocument(id = uuid.uuid4(), name = "Test document.pdf", raw_data_location = test_file_path)
for ground_truth, paragraph_data in zip(GROUND_TRUTH, pdf_doc.read(chunk_size = 1024)):
assert ground_truth["word_count"] == paragraph_data.word_count, f'{ground_truth["word_count"] = } != {paragraph_data.word_count = }'
assert ground_truth["len_text"] == len(paragraph_data.text), f'{ground_truth["len_text"] = } != {len(paragraph_data.text) = }'
assert ground_truth["cut_type"] == paragraph_data.cut_type, f'{ground_truth["cut_type"] = } != {paragraph_data.cut_type = }'

View file

@ -23,7 +23,7 @@ async def pipeline(data_queue):
Task(multiply_by_two),
])
results = [2, 4, 6, 8, 10, 12, 14, 16, 18]
results = [2, 4, 6, 8, 10, 12, 14, 16, 18, 20]
index = 0
async for result in tasks_run:
print(result)