Add first three unit tests
This commit is contained in:
parent
cdaf63f57c
commit
dce894bfd3
7 changed files with 21 additions and 14 deletions
|
|
@ -1,13 +0,0 @@
|
|||
import os
|
||||
from cognee.modules.data.processing.document_types.PdfDocument import PdfDocument
|
||||
|
||||
if __name__ == "__main__":
|
||||
test_file_path = os.path.join(os.path.dirname(__file__), "artificial-inteligence.pdf")
|
||||
pdf_doc = PdfDocument("Test document.pdf", test_file_path, chunking_strategy="paragraph")
|
||||
reader = pdf_doc.get_reader()
|
||||
|
||||
for paragraph_data in reader.read():
|
||||
print(paragraph_data["word_count"])
|
||||
print(paragraph_data["text"])
|
||||
print(paragraph_data["cut_type"])
|
||||
print("\n")
|
||||
Binary file not shown.
Binary file not shown.
BIN
cognee/tests/.DS_Store
vendored
Normal file
BIN
cognee/tests/.DS_Store
vendored
Normal file
Binary file not shown.
20
cognee/tests/unit/documents/pdf_document.py
Normal file
20
cognee/tests/unit/documents/pdf_document.py
Normal file
|
|
@ -0,0 +1,20 @@
|
|||
import os
|
||||
import uuid
|
||||
from cognee.modules.data.processing.document_types.PdfDocument import PdfDocument
|
||||
|
||||
GROUND_TRUTH = [
|
||||
{"word_count": 879, "len_text": 5622, "cut_type": "sentence_end"},
|
||||
{"word_count": 951, "len_text": 6384, "cut_type": "sentence_end"},
|
||||
]
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
test_file_path = os.path.join(os.sep, *(os.path.dirname(__file__).split(os.sep)[:-2]),"test_data", "artificial-intelligence.pdf")
|
||||
pdf_doc = PdfDocument(id = uuid.uuid4(), name = "Test document.pdf", raw_data_location = test_file_path)
|
||||
|
||||
for ground_truth, paragraph_data in zip(GROUND_TRUTH, pdf_doc.read(chunk_size = 1024)):
|
||||
assert ground_truth["word_count"] == paragraph_data.word_count, f'{ground_truth["word_count"] = } != {paragraph_data.word_count = }'
|
||||
assert ground_truth["len_text"] == len(paragraph_data.text), f'{ground_truth["len_text"] = } != {len(paragraph_data.text) = }'
|
||||
assert ground_truth["cut_type"] == paragraph_data.cut_type, f'{ground_truth["cut_type"] = } != {paragraph_data.cut_type = }'
|
||||
|
||||
|
||||
|
|
@ -23,7 +23,7 @@ async def pipeline(data_queue):
|
|||
Task(multiply_by_two),
|
||||
])
|
||||
|
||||
results = [2, 4, 6, 8, 10, 12, 14, 16, 18]
|
||||
results = [2, 4, 6, 8, 10, 12, 14, 16, 18, 20]
|
||||
index = 0
|
||||
async for result in tasks_run:
|
||||
print(result)
|
||||
Loading…
Add table
Reference in a new issue