diff --git a/cognee/modules/data/processing/document_types/__tests__/PdfDocument.test.py b/cognee/modules/data/processing/document_types/__tests__/PdfDocument.test.py deleted file mode 100644 index 57aa1fa5c..000000000 --- a/cognee/modules/data/processing/document_types/__tests__/PdfDocument.test.py +++ /dev/null @@ -1,13 +0,0 @@ -import os -from cognee.modules.data.processing.document_types.PdfDocument import PdfDocument - -if __name__ == "__main__": - test_file_path = os.path.join(os.path.dirname(__file__), "artificial-inteligence.pdf") - pdf_doc = PdfDocument("Test document.pdf", test_file_path, chunking_strategy="paragraph") - reader = pdf_doc.get_reader() - - for paragraph_data in reader.read(): - print(paragraph_data["word_count"]) - print(paragraph_data["text"]) - print(paragraph_data["cut_type"]) - print("\n") diff --git a/cognee/modules/pipelines/operations/__tests__/artificial-inteligence.v1.pdf b/cognee/modules/pipelines/operations/__tests__/artificial-inteligence.v1.pdf deleted file mode 100644 index 7de338b8c..000000000 Binary files a/cognee/modules/pipelines/operations/__tests__/artificial-inteligence.v1.pdf and /dev/null differ diff --git a/cognee/modules/pipelines/operations/__tests__/artificial-inteligence.v2.pdf b/cognee/modules/pipelines/operations/__tests__/artificial-inteligence.v2.pdf deleted file mode 100644 index 601c6297d..000000000 Binary files a/cognee/modules/pipelines/operations/__tests__/artificial-inteligence.v2.pdf and /dev/null differ diff --git a/cognee/tests/.DS_Store b/cognee/tests/.DS_Store new file mode 100644 index 000000000..c3f46f9a8 Binary files /dev/null and b/cognee/tests/.DS_Store differ diff --git a/cognee/tests/unit/documents/pdf_document.py b/cognee/tests/unit/documents/pdf_document.py new file mode 100644 index 000000000..5756eca30 --- /dev/null +++ b/cognee/tests/unit/documents/pdf_document.py @@ -0,0 +1,20 @@ +import os +import uuid +from cognee.modules.data.processing.document_types.PdfDocument import PdfDocument + +GROUND_TRUTH = [ + {"word_count": 879, "len_text": 5622, "cut_type": "sentence_end"}, + {"word_count": 951, "len_text": 6384, "cut_type": "sentence_end"}, +] + + +if __name__ == "__main__": + test_file_path = os.path.join(os.sep, *(os.path.dirname(__file__).split(os.sep)[:-2]),"test_data", "artificial-intelligence.pdf") + pdf_doc = PdfDocument(id = uuid.uuid4(), name = "Test document.pdf", raw_data_location = test_file_path) + + for ground_truth, paragraph_data in zip(GROUND_TRUTH, pdf_doc.read(chunk_size = 1024)): + assert ground_truth["word_count"] == paragraph_data.word_count, f'{ground_truth["word_count"] = } != {paragraph_data.word_count = }' + assert ground_truth["len_text"] == len(paragraph_data.text), f'{ground_truth["len_text"] = } != {len(paragraph_data.text) = }' + assert ground_truth["cut_type"] == paragraph_data.cut_type, f'{ground_truth["cut_type"] = } != {paragraph_data.cut_type = }' + + diff --git a/cognee/modules/pipelines/operations/__tests__/run_tasks.test.py b/cognee/tests/unit/run_tasks/run_tasks.py similarity index 100% rename from cognee/modules/pipelines/operations/__tests__/run_tasks.test.py rename to cognee/tests/unit/run_tasks/run_tasks.py diff --git a/cognee/modules/pipelines/operations/__tests__/run_tasks_from_queue.test.py b/cognee/tests/unit/run_tasks/run_tasks_from_queue.py similarity index 95% rename from cognee/modules/pipelines/operations/__tests__/run_tasks_from_queue.test.py rename to cognee/tests/unit/run_tasks/run_tasks_from_queue.py index 387d22ce6..bf9fbb8f5 100644 --- a/cognee/modules/pipelines/operations/__tests__/run_tasks_from_queue.test.py +++ b/cognee/tests/unit/run_tasks/run_tasks_from_queue.py @@ -23,7 +23,7 @@ async def pipeline(data_queue): Task(multiply_by_two), ]) - results = [2, 4, 6, 8, 10, 12, 14, 16, 18] + results = [2, 4, 6, 8, 10, 12, 14, 16, 18, 20] index = 0 async for result in tasks_run: print(result)