diff --git a/.DS_Store b/.DS_Store new file mode 100644 index 000000000..c53aac86a Binary files /dev/null and b/.DS_Store differ diff --git a/.github/workflows/auto-comment.yml b/.github/workflows/auto-comment.yml deleted file mode 100644 index f38948f94..000000000 --- a/.github/workflows/auto-comment.yml +++ /dev/null @@ -1,81 +0,0 @@ -name: Issue and PR Auto Comments -on: - issues: - types: - - opened - - closed - - assigned - pull_request_target: - types: - - opened - - closed - -permissions: - contents: read - -jobs: - auto-comment: - permissions: - issues: write - pull-requests: write - runs-on: ubuntu-latest - steps: - # configuration for auto-comment actions - - name: Configure Auto Comments - uses: wow-actions/auto-comment@v1 - with: - GITHUB_TOKEN: ${{ secrets.GH_TOKEN }} - issuesOpened: | - 👋 @{{ author }} - - Thank you for raising an issue. We will investigate the matter and get back to you as soon as possible. - - To help us address your issue efficiently, please ensure you have provided: - - A clear description of the problem - - Steps to reproduce (if applicable) - - Expected vs actual behavior - - Any relevant screenshots or error messages - - Our team typically responds within 2-3 business days. - - issuesClosed: | - ✅ @{{ author }} - - This issue has been closed. If you have any further questions or if the issue resurfaces, - please feel free to: - - Add a comment to this thread - - Open a new issue with reference to this one - - Thank you for helping us improve! - - pullRequestOpened: | - 👍 @{{ author }} - - Thank you for your pull request and contributing to our community! - - Please ensure you have: - - [ ] Followed our contributing guidelines - - [ ] Added/updated tests (if applicable) - - [ ] Updated documentation (if applicable) - - [ ] Added a descriptive PR title - - Our team will review your contribution as soon as possible. Feel free to reach out if you need any assistance. - - # Separate action for merged PRs - - name: Handle Merged Pull Requests - if: github.event.pull_request.merged == true - uses: actions-cool/pr-welcome@v1.4.0 - with: - token: ${{ secrets.GH_TOKEN }} - comment: | - 🎉 Fantastic work @${{ github.event.pull_request.user.login }}! 🎉 - - Your pull request has been merged successfully. Thank you for your valuable contribution! - - We appreciate the time and effort you've put into improving our project. - Your changes will be included in our next release. - - Keep up the great work! 💪 - emoji: 'rocket' - pr-emoji: '+1, heart, rocket' - diff --git a/.github/workflows/test_python_3_10.yml b/.github/workflows/test_python_3_10.yml index 5a7954033..7f762d778 100644 --- a/.github/workflows/test_python_3_10.yml +++ b/.github/workflows/test_python_3_10.yml @@ -50,8 +50,11 @@ jobs: - name: Install dependencies run: poetry install --no-interaction - - name: Run tests - run: poetry run pytest tests/ + - name: Run unit tests + run: poetry run pytest cognee/tests/unit/ + + - name: Run integration tests + run: poetry run pytest cognee/tests/integration/ - name: Run default basic pipeline env: diff --git a/.github/workflows/test_python_3_11.yml b/.github/workflows/test_python_3_11.yml index 22cdad320..b05d901dc 100644 --- a/.github/workflows/test_python_3_11.yml +++ b/.github/workflows/test_python_3_11.yml @@ -50,8 +50,11 @@ jobs: - name: Install dependencies run: poetry install --no-interaction - - name: Run tests - run: poetry run pytest tests/ + - name: Run unit tests + run: poetry run pytest cognee/tests/unit/ + + - name: Run integration tests + run: poetry run pytest cognee/tests/integration/ - name: Run default basic pipeline env: diff --git a/.github/workflows/test_python_3_9.yml b/.github/workflows/test_python_3_9.yml index d6e7f8b97..47c5ddc41 100644 --- a/.github/workflows/test_python_3_9.yml +++ b/.github/workflows/test_python_3_9.yml @@ -50,8 +50,11 @@ jobs: - name: Install dependencies run: poetry install --no-interaction - - name: Run tests - run: poetry run pytest tests/ + - name: Run unit tests + run: poetry run pytest cognee/tests/unit/ + + - name: Run integration tests + run: poetry run pytest cognee/tests/integration/ - name: Run default basic pipeline env: diff --git a/cognee/.DS_Store b/cognee/.DS_Store new file mode 100644 index 000000000..51227d71a Binary files /dev/null and b/cognee/.DS_Store differ diff --git a/cognee/infrastructure/databases/graph/networkx/adapter.py b/cognee/infrastructure/databases/graph/networkx/adapter.py index 65aeea289..a72376082 100644 --- a/cognee/infrastructure/databases/graph/networkx/adapter.py +++ b/cognee/infrastructure/databases/graph/networkx/adapter.py @@ -284,16 +284,10 @@ class NetworkXAdapter(GraphDBInterface): os.makedirs(file_dir, exist_ok = True) await self.save_graph_to_file(file_path) - except Exception as e: - logger.error("Failed to load graph from file: %s \n %s", file_path, str(e)) - # Initialize an empty graph in case of error - self.graph = nx.MultiDiGraph() - file_dir = os.path.dirname(file_path) - if not os.path.exists(file_dir): - os.makedirs(file_dir, exist_ok = True) + except Exception: + logger.error("Failed to load graph from file: %s", file_path) - await self.save_graph_to_file(file_path) async def delete_graph(self, file_path: str = None): """Asynchronously delete the graph file from the filesystem.""" diff --git a/cognee/infrastructure/engine/__tests__/model_to_graph_to_model.test.py b/cognee/infrastructure/engine/__tests__/model_to_graph_to_model.test.py deleted file mode 100644 index 5d3908fac..000000000 --- a/cognee/infrastructure/engine/__tests__/model_to_graph_to_model.test.py +++ /dev/null @@ -1,72 +0,0 @@ -from enum import Enum -from typing import Optional -from cognee.infrastructure.engine import DataPoint -from cognee.modules.graph.utils import get_graph_from_model, get_model_instance_from_graph - - -if __name__ == "__main__": - - class CarTypeName(Enum): - Pickup = "Pickup" - Sedan = "Sedan" - SUV = "SUV" - Coupe = "Coupe" - Convertible = "Convertible" - Hatchback = "Hatchback" - Wagon = "Wagon" - Minivan = "Minivan" - Van = "Van" - - class CarType(DataPoint): - id: str - name: CarTypeName - _metadata: dict = dict(index_fields = ["name"]) - - class Car(DataPoint): - id: str - brand: str - model: str - year: int - color: str - is_type: CarType - - class Person(DataPoint): - id: str - name: str - age: int - owns_car: list[Car] - driving_licence: Optional[dict] - _metadata: dict = dict(index_fields = ["name"]) - - boris = Person( - id = "boris", - name = "Boris", - age = 30, - owns_car = [ - Car( - id = "car1", - brand = "Toyota", - model = "Camry", - year = 2020, - color = "Blue", - is_type = CarType(id = "sedan", name = CarTypeName.Sedan), - ), - ], - driving_licence = { - "issued_by": "PU Vrsac", - "issued_on": "2025-11-06", - "number": "1234567890", - "expires_on": "2025-11-06", - }, - ) - - nodes, edges = get_graph_from_model(boris) - - print(nodes) - print(edges) - - person_data = nodes[len(nodes) - 1] - - parsed_person = get_model_instance_from_graph(nodes, edges, 'boris') - - print(parsed_person) diff --git a/cognee/modules/data/processing/document_types/__tests__/PdfDocument.test.py b/cognee/modules/data/processing/document_types/__tests__/PdfDocument.test.py deleted file mode 100644 index 57aa1fa5c..000000000 --- a/cognee/modules/data/processing/document_types/__tests__/PdfDocument.test.py +++ /dev/null @@ -1,13 +0,0 @@ -import os -from cognee.modules.data.processing.document_types.PdfDocument import PdfDocument - -if __name__ == "__main__": - test_file_path = os.path.join(os.path.dirname(__file__), "artificial-inteligence.pdf") - pdf_doc = PdfDocument("Test document.pdf", test_file_path, chunking_strategy="paragraph") - reader = pdf_doc.get_reader() - - for paragraph_data in reader.read(): - print(paragraph_data["word_count"]) - print(paragraph_data["text"]) - print(paragraph_data["cut_type"]) - print("\n") diff --git a/cognee/modules/data/processing/document_types/__tests__/artificial-inteligence.pdf b/cognee/modules/data/processing/document_types/__tests__/artificial-inteligence.pdf deleted file mode 100644 index 7de338b8c..000000000 Binary files a/cognee/modules/data/processing/document_types/__tests__/artificial-inteligence.pdf and /dev/null differ diff --git a/cognee/modules/data/processing/document_types/__tests__/soldiers-home.pdf b/cognee/modules/data/processing/document_types/__tests__/soldiers-home.pdf deleted file mode 100644 index e453ca4bc..000000000 Binary files a/cognee/modules/data/processing/document_types/__tests__/soldiers-home.pdf and /dev/null differ diff --git a/cognee/modules/pipelines/operations/__tests__/__init__.py b/cognee/modules/pipelines/operations/__tests__/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/cognee/modules/pipelines/operations/__tests__/artificial-inteligence.v1.pdf b/cognee/modules/pipelines/operations/__tests__/artificial-inteligence.v1.pdf deleted file mode 100644 index 7de338b8c..000000000 Binary files a/cognee/modules/pipelines/operations/__tests__/artificial-inteligence.v1.pdf and /dev/null differ diff --git a/cognee/modules/pipelines/operations/__tests__/artificial-inteligence.v2.pdf b/cognee/modules/pipelines/operations/__tests__/artificial-inteligence.v2.pdf deleted file mode 100644 index 601c6297d..000000000 Binary files a/cognee/modules/pipelines/operations/__tests__/artificial-inteligence.v2.pdf and /dev/null differ diff --git a/cognee/modules/pipelines/operations/__tests__/get_graph_url.py b/cognee/modules/pipelines/operations/__tests__/get_graph_url.py deleted file mode 100644 index 7a954c8c0..000000000 --- a/cognee/modules/pipelines/operations/__tests__/get_graph_url.py +++ /dev/null @@ -1,14 +0,0 @@ -import asyncio -from cognee.shared.utils import render_graph -from cognee.infrastructure.databases.graph import get_graph_engine - -if __name__ == "__main__": - async def main(): - graph_client = await get_graph_engine() - graph = graph_client.graph - - graph_url = await render_graph(graph) - - print(graph_url) - - asyncio.run(main()) diff --git a/cognee/tasks/chunks/__tests__/chunk_by_paragraph.test.py b/cognee/tasks/chunks/__tests__/chunk_by_paragraph.test.py deleted file mode 100644 index b63be0eb7..000000000 --- a/cognee/tasks/chunks/__tests__/chunk_by_paragraph.test.py +++ /dev/null @@ -1,53 +0,0 @@ -from cognee.tasks.chunks import chunk_by_paragraph - -if __name__ == "__main__": - def test_chunking_on_whole_text(): - test_text = """This is example text. It contains multiple sentences. - This is a second paragraph. First two paragraphs are whole. - Third paragraph is a bit longer and is finished with a dot.""" - - chunks = [] - - for chunk_data in chunk_by_paragraph(test_text, 12, batch_paragraphs = False): - chunks.append(chunk_data) - - assert len(chunks) == 3 - - assert chunks[0]["text"] == "This is example text. It contains multiple sentences." - assert chunks[0]["word_count"] == 8 - assert chunks[0]["cut_type"] == "paragraph_end" - - assert chunks[1]["text"] == "This is a second paragraph. First two paragraphs are whole." - assert chunks[1]["word_count"] == 10 - assert chunks[1]["cut_type"] == "paragraph_end" - - assert chunks[2]["text"] == "Third paragraph is a bit longer and is finished with a dot." - assert chunks[2]["word_count"] == 12 - assert chunks[2]["cut_type"] == "sentence_end" - - def test_chunking_on_cut_text(): - test_text = """This is example text. It contains multiple sentences. - This is a second paragraph. First two paragraphs are whole. - Third paragraph is cut and is missing the dot at the end""" - - chunks = [] - - for chunk_data in chunk_by_paragraph(test_text, 12, batch_paragraphs = False): - chunks.append(chunk_data) - - assert len(chunks) == 3 - - assert chunks[0]["text"] == "This is example text. It contains multiple sentences." - assert chunks[0]["word_count"] == 8 - assert chunks[0]["cut_type"] == "paragraph_end" - - assert chunks[1]["text"] == "This is a second paragraph. First two paragraphs are whole." - assert chunks[1]["word_count"] == 10 - assert chunks[1]["cut_type"] == "paragraph_end" - - assert chunks[2]["text"] == "Third paragraph is cut and is missing the dot at the end" - assert chunks[2]["word_count"] == 12 - assert chunks[2]["cut_type"] == "sentence_cut" - - test_chunking_on_whole_text() - test_chunking_on_cut_text() diff --git a/cognee/tasks/documents/classify_documents.py b/cognee/tasks/documents/classify_documents.py index 64ed808d6..d881514a2 100644 --- a/cognee/tasks/documents/classify_documents.py +++ b/cognee/tasks/documents/classify_documents.py @@ -1,13 +1,16 @@ from cognee.modules.data.models import Data from cognee.modules.data.processing.document_types import Document, PdfDocument, AudioDocument, ImageDocument, TextDocument +EXTENSION_TO_DOCUMENT_CLASS = { + "pdf": PdfDocument, + "audio": AudioDocument, + "image": ImageDocument, + "txt": TextDocument +} + def classify_documents(data_documents: list[Data]) -> list[Document]: documents = [ - PdfDocument(id = data_item.id, name=f"{data_item.name}.{data_item.extension}", raw_data_location=data_item.raw_data_location) if data_item.extension == "pdf" else - AudioDocument(id = data_item.id, name=f"{data_item.name}.{data_item.extension}", raw_data_location=data_item.raw_data_location) if data_item.extension == "audio" else - ImageDocument(id = data_item.id, name=f"{data_item.name}.{data_item.extension}", raw_data_location=data_item.raw_data_location) if data_item.extension == "image" else - TextDocument(id = data_item.id, name=f"{data_item.name}.{data_item.extension}", raw_data_location=data_item.raw_data_location) + EXTENSION_TO_DOCUMENT_CLASS[data_item.extension](id = data_item.id, title=f"{data_item.name}.{data_item.extension}", raw_data_location=data_item.raw_data_location, name=data_item.name) for data_item in data_documents ] - return documents diff --git a/cognee/tasks/graph/infer_data_ontology.py b/cognee/tasks/graph/infer_data_ontology.py index e1a710fa2..eea378eb1 100644 --- a/cognee/tasks/graph/infer_data_ontology.py +++ b/cognee/tasks/graph/infer_data_ontology.py @@ -18,7 +18,7 @@ from cognee.infrastructure.databases.graph.get_graph_engine import get_graph_eng from cognee.infrastructure.files.utils.extract_text_from_file import extract_text_from_file from cognee.infrastructure.files.utils.guess_file_type import guess_file_type, FileTypeException from cognee.modules.data.extraction.knowledge_graph.add_model_class_to_graph import add_model_class_to_graph -from cognee.tasks.infer_data_ontology.models.models import NodeModel, GraphOntology +from cognee.tasks.graph.models import NodeModel, GraphOntology from cognee.shared.data_models import KnowledgeGraph from cognee.modules.engine.utils import generate_node_id, generate_node_name diff --git a/cognee/tasks/infer_data_ontology/models/__pycache__/models.py b/cognee/tasks/graph/models.py similarity index 100% rename from cognee/tasks/infer_data_ontology/models/__pycache__/models.py rename to cognee/tasks/graph/models.py diff --git a/cognee/tasks/search_evaluate/__init__.py b/cognee/tasks/search_evaluate/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/cognee/tasks/summarization/models/TextSummary.py b/cognee/tasks/summarization/models.py similarity index 100% rename from cognee/tasks/summarization/models/TextSummary.py rename to cognee/tasks/summarization/models.py diff --git a/cognee/tasks/summarization/summarize_text.py b/cognee/tasks/summarization/summarize_text.py index 47d6946bb..2dab3b0f8 100644 --- a/cognee/tasks/summarization/summarize_text.py +++ b/cognee/tasks/summarization/summarize_text.py @@ -5,7 +5,7 @@ from pydantic import BaseModel from cognee.modules.data.extraction.extract_summary import extract_summary from cognee.modules.chunking.models.DocumentChunk import DocumentChunk from cognee.tasks.storage import add_data_points -from .models.TextSummary import TextSummary +from .models import TextSummary async def summarize_text(data_chunks: list[DocumentChunk], summarization_model: Type[BaseModel]): if len(data_chunks) == 0: diff --git a/cognee/tests/integration/run_toy_tasks/conftest.py b/cognee/tests/integration/run_toy_tasks/conftest.py new file mode 100644 index 000000000..09e98328c --- /dev/null +++ b/cognee/tests/integration/run_toy_tasks/conftest.py @@ -0,0 +1,11 @@ +import os + +import pytest + + +@pytest.fixture(autouse=True, scope="session") +def copy_cognee_db_to_target_location(): + os.makedirs("cognee/.cognee_system/databases/", exist_ok=True) + os.system( + "cp cognee/tests/integration/run_toy_tasks/data/cognee_db cognee/.cognee_system/databases/cognee_db" + ) diff --git a/cognee/tests/integration/run_toy_tasks/data/cognee_db b/cognee/tests/integration/run_toy_tasks/data/cognee_db new file mode 100644 index 000000000..60455ad29 Binary files /dev/null and b/cognee/tests/integration/run_toy_tasks/data/cognee_db differ diff --git a/cognee/modules/pipelines/operations/__tests__/run_tasks_from_queue.test.py b/cognee/tests/integration/run_toy_tasks/run_task_from_queue_test.py similarity index 65% rename from cognee/modules/pipelines/operations/__tests__/run_tasks_from_queue.test.py rename to cognee/tests/integration/run_toy_tasks/run_task_from_queue_test.py index 387d22ce6..e57b16f39 100644 --- a/cognee/modules/pipelines/operations/__tests__/run_tasks_from_queue.test.py +++ b/cognee/tests/integration/run_toy_tasks/run_task_from_queue_test.py @@ -1,8 +1,10 @@ import asyncio from queue import Queue + from cognee.modules.pipelines.operations.run_tasks import run_tasks from cognee.modules.pipelines.tasks.Task import Task + async def pipeline(data_queue): async def queue_consumer(): while not data_queue.is_closed: @@ -17,20 +19,25 @@ async def pipeline(data_queue): async def multiply_by_two(num): yield num * 2 - tasks_run = run_tasks([ - Task(queue_consumer), - Task(add_one), - Task(multiply_by_two), - ]) + tasks_run = run_tasks( + [ + Task(queue_consumer), + Task(add_one), + Task(multiply_by_two), + ], + pipeline_name="test_run_tasks_from_queue", + ) - results = [2, 4, 6, 8, 10, 12, 14, 16, 18] + results = [2, 4, 6, 8, 10, 12, 14, 16, 18, 20] index = 0 async for result in tasks_run: - print(result) - assert result == results[index] + assert ( + result == results[index] + ), f"at {index = }: {result = } != {results[index] = }" index += 1 -async def main(): + +async def run_queue(): data_queue = Queue() data_queue.is_closed = False @@ -42,5 +49,6 @@ async def main(): await asyncio.gather(pipeline(data_queue), queue_producer()) -if __name__ == "__main__": - asyncio.run(main()) + +def test_run_tasks_from_queue(): + asyncio.run(run_queue()) diff --git a/cognee/modules/pipelines/operations/__tests__/run_tasks.test.py b/cognee/tests/integration/run_toy_tasks/run_tasks_test.py similarity index 51% rename from cognee/modules/pipelines/operations/__tests__/run_tasks.test.py rename to cognee/tests/integration/run_toy_tasks/run_tasks_test.py index 2fef802fd..d0a2af80b 100644 --- a/cognee/modules/pipelines/operations/__tests__/run_tasks.test.py +++ b/cognee/tests/integration/run_toy_tasks/run_tasks_test.py @@ -1,9 +1,10 @@ import asyncio + from cognee.modules.pipelines.operations.run_tasks import run_tasks from cognee.modules.pipelines.tasks.Task import Task -async def main(): +async def run_and_check_tasks(): def number_generator(num): for i in range(num): yield i + 1 @@ -18,19 +19,25 @@ async def main(): async def add_one_single(num): yield num + 1 - pipeline = run_tasks([ - Task(number_generator), - Task(add_one, task_config = {"batch_size": 5}), - Task(multiply_by_two, task_config = {"batch_size": 1}), - Task(add_one_single), - ], 10) + pipeline = run_tasks( + [ + Task(number_generator), + Task(add_one, task_config={"batch_size": 5}), + Task(multiply_by_two, task_config={"batch_size": 1}), + Task(add_one_single), + ], + 10, + pipeline_name="test_run_tasks", + ) results = [5, 7, 9, 11, 13, 15, 17, 19, 21, 23] index = 0 async for result in pipeline: - print(result) - assert result == results[index] + assert ( + result == results[index] + ), f"at {index = }: {result = } != {results[index] = }" index += 1 -if __name__ == "__main__": - asyncio.run(main()) + +def test_run_tasks(): + asyncio.run(run_and_check_tasks()) diff --git a/cognee/tests/unit/documents/PdfDocument_test.py b/cognee/tests/unit/documents/PdfDocument_test.py new file mode 100644 index 000000000..917e9c3e0 --- /dev/null +++ b/cognee/tests/unit/documents/PdfDocument_test.py @@ -0,0 +1,34 @@ +import os +import uuid + +from cognee.modules.data.processing.document_types.PdfDocument import PdfDocument + +GROUND_TRUTH = [ + {"word_count": 879, "len_text": 5622, "cut_type": "sentence_end"}, + {"word_count": 951, "len_text": 6384, "cut_type": "sentence_end"}, +] + + +def test_PdfDocument(): + test_file_path = os.path.join( + os.sep, + *(os.path.dirname(__file__).split(os.sep)[:-2]), + "test_data", + "artificial-intelligence.pdf", + ) + pdf_doc = PdfDocument( + id=uuid.uuid4(), name="Test document.pdf", raw_data_location=test_file_path + ) + + for ground_truth, paragraph_data in zip( + GROUND_TRUTH, pdf_doc.read(chunk_size=1024) + ): + assert ( + ground_truth["word_count"] == paragraph_data.word_count + ), f'{ground_truth["word_count"] = } != {paragraph_data.word_count = }' + assert ground_truth["len_text"] == len( + paragraph_data.text + ), f'{ground_truth["len_text"] = } != {len(paragraph_data.text) = }' + assert ( + ground_truth["cut_type"] == paragraph_data.cut_type + ), f'{ground_truth["cut_type"] = } != {paragraph_data.cut_type = }' diff --git a/cognee/tests/unit/interfaces/graph/conftest.py b/cognee/tests/unit/interfaces/graph/conftest.py new file mode 100644 index 000000000..9a784bb53 --- /dev/null +++ b/cognee/tests/unit/interfaces/graph/conftest.py @@ -0,0 +1,80 @@ +from datetime import datetime, timezone +from enum import Enum +from typing import Optional + +import pytest + +from cognee.infrastructure.engine import DataPoint +from cognee.modules.graph.utils import ( + get_graph_from_model, + get_model_instance_from_graph, +) + + +class CarTypeName(Enum): + Pickup = "Pickup" + Sedan = "Sedan" + SUV = "SUV" + Coupe = "Coupe" + Convertible = "Convertible" + Hatchback = "Hatchback" + Wagon = "Wagon" + Minivan = "Minivan" + Van = "Van" + + +class CarType(DataPoint): + id: str + name: CarTypeName + _metadata: dict = dict(index_fields=["name"]) + + +class Car(DataPoint): + id: str + brand: str + model: str + year: int + color: str + is_type: CarType + + +class Person(DataPoint): + id: str + name: str + age: int + owns_car: list[Car] + driving_license: Optional[dict] + _metadata: dict = dict(index_fields=["name"]) + + +@pytest.fixture(scope="session") +def graph_outputs(): + boris = Person( + id="boris", + name="Boris", + age=30, + owns_car=[ + Car( + id="car1", + brand="Toyota", + model="Camry", + year=2020, + color="Blue", + is_type=CarType(id="sedan", name=CarTypeName.Sedan), + ) + ], + driving_license={ + "issued_by": "PU Vrsac", + "issued_on": "2025-11-06", + "number": "1234567890", + "expires_on": "2025-11-06", + }, + ) + nodes, edges = get_graph_from_model(boris) + + car, person = nodes[0], nodes[1] + edge = edges[0] + + parsed_person = get_model_instance_from_graph(nodes, edges, "boris") + + return (car, person, edge, parsed_person) diff --git a/cognee/tests/unit/interfaces/graph/get_graph_from_model_test.py b/cognee/tests/unit/interfaces/graph/get_graph_from_model_test.py new file mode 100644 index 000000000..17dd69a0e --- /dev/null +++ b/cognee/tests/unit/interfaces/graph/get_graph_from_model_test.py @@ -0,0 +1,54 @@ +from cognee.tests.unit.interfaces.graph.util import run_test_against_ground_truth + +EDGE_GROUND_TRUTH = ( + "boris", + "car1", + "owns_car", + { + "source_node_id": "boris", + "target_node_id": "car1", + "relationship_name": "owns_car", + "metadata": {"type": "list"}, + }, +) + +CAR_GROUND_TRUTH = { + "id": "car1", + "brand": "Toyota", + "model": "Camry", + "year": 2020, + "color": "Blue", +} + +PERSON_GROUND_TRUTH = { + "id": "boris", + "name": "Boris", + "age": 30, + "driving_license": { + "issued_by": "PU Vrsac", + "issued_on": "2025-11-06", + "number": "1234567890", + "expires_on": "2025-11-06", + }, +} + + +def test_extracted_person(graph_outputs): + (_, person, _, _) = graph_outputs + + run_test_against_ground_truth("person", person, PERSON_GROUND_TRUTH) + + +def test_extracted_car(graph_outputs): + (car, _, _, _) = graph_outputs + run_test_against_ground_truth("car", car, CAR_GROUND_TRUTH) + + +def test_extracted_edge(graph_outputs): + (_, _, edge, _) = graph_outputs + + assert ( + EDGE_GROUND_TRUTH[:3] == edge[:3] + ), f"{EDGE_GROUND_TRUTH[:3] = } != {edge[:3] = }" + for key, ground_truth in EDGE_GROUND_TRUTH[3].items(): + assert ground_truth == edge[3][key], f"{ground_truth = } != {edge[3][key] = }" diff --git a/cognee/tests/unit/interfaces/graph/get_model_instance_from_graph_test.py b/cognee/tests/unit/interfaces/graph/get_model_instance_from_graph_test.py new file mode 100644 index 000000000..98ba501bd --- /dev/null +++ b/cognee/tests/unit/interfaces/graph/get_model_instance_from_graph_test.py @@ -0,0 +1,29 @@ +from cognee.tests.unit.interfaces.graph.util import run_test_against_ground_truth + +PARSED_PERSON_GROUND_TRUTH = { + "id": "boris", + "name": "Boris", + "age": 30, + "driving_license": { + "issued_by": "PU Vrsac", + "issued_on": "2025-11-06", + "number": "1234567890", + "expires_on": "2025-11-06", + }, +} + +CAR_GROUND_TRUTH = { + "id": "car1", + "brand": "Toyota", + "model": "Camry", + "year": 2020, + "color": "Blue", +} + + +def test_parsed_person(graph_outputs): + (_, _, _, parsed_person) = graph_outputs + run_test_against_ground_truth( + "parsed_person", parsed_person, PARSED_PERSON_GROUND_TRUTH + ) + run_test_against_ground_truth("car", parsed_person.owns_car[0], CAR_GROUND_TRUTH) diff --git a/cognee/tests/unit/interfaces/graph/util.py b/cognee/tests/unit/interfaces/graph/util.py new file mode 100644 index 000000000..764eafa11 --- /dev/null +++ b/cognee/tests/unit/interfaces/graph/util.py @@ -0,0 +1,30 @@ +from datetime import datetime, timezone +from typing import Any, Dict + + +def run_test_against_ground_truth( + test_target_item_name: str, test_target_item: Any, ground_truth_dict: Dict[str, Any] +): + """Validates test target item attributes against ground truth values. + + Args: + test_target_item_name: Name of the item being tested (for error messages) + test_target_item: Object whose attributes are being validated + ground_truth_dict: Dictionary containing expected values + + Raises: + AssertionError: If any attribute doesn't match ground truth or if update timestamp is too old + """ + for key, ground_truth in ground_truth_dict.items(): + if isinstance(ground_truth, dict): + for key2, ground_truth2 in ground_truth.items(): + assert ( + ground_truth2 == getattr(test_target_item, key)[key2] + ), f"{test_target_item_name}/{key = }/{key2 = }: {ground_truth2 = } != {getattr(test_target_item, key)[key2] = }" + else: + assert ground_truth == getattr( + test_target_item, key + ), f"{test_target_item_name}/{key = }: {ground_truth = } != {getattr(test_target_item, key) = }" + time_delta = datetime.now(timezone.utc) - getattr(test_target_item, "updated_at") + + assert time_delta.total_seconds() < 60, f"{ time_delta.total_seconds() = }" diff --git a/cognee/tests/unit/processing/chunks/chunk_by_paragraph_test.py b/cognee/tests/unit/processing/chunks/chunk_by_paragraph_test.py new file mode 100644 index 000000000..24c3cc147 --- /dev/null +++ b/cognee/tests/unit/processing/chunks/chunk_by_paragraph_test.py @@ -0,0 +1,69 @@ +from cognee.tasks.chunks import chunk_by_paragraph + +GROUND_TRUTH = { + "whole_text": [ + { + "text": "This is example text. It contains multiple sentences.", + "word_count": 8, + "cut_type": "paragraph_end", + }, + { + "text": "This is a second paragraph. First two paragraphs are whole.", + "word_count": 10, + "cut_type": "paragraph_end", + }, + { + "text": "Third paragraph is a bit longer and is finished with a dot.", + "word_count": 12, + "cut_type": "sentence_end", + }, + ], + "cut_text": [ + { + "text": "This is example text. It contains multiple sentences.", + "word_count": 8, + "cut_type": "paragraph_end", + }, + { + "text": "This is a second paragraph. First two paragraphs are whole.", + "word_count": 10, + "cut_type": "paragraph_end", + }, + { + "text": "Third paragraph is cut and is missing the dot at the end", + "word_count": 12, + "cut_type": "sentence_cut", + }, + ], +} + +INPUT_TEXT = { + "whole_text": """This is example text. It contains multiple sentences. + This is a second paragraph. First two paragraphs are whole. + Third paragraph is a bit longer and is finished with a dot.""", + "cut_text": """This is example text. It contains multiple sentences. + This is a second paragraph. First two paragraphs are whole. + Third paragraph is cut and is missing the dot at the end""", +} + + +def run_chunking_test(test_text, expected_chunks): + chunks = [] + for chunk_data in chunk_by_paragraph(test_text, 12, batch_paragraphs=False): + chunks.append(chunk_data) + + assert len(chunks) == 3 + + for expected_chunks_item, chunk in zip(expected_chunks, chunks): + for key in ["text", "word_count", "cut_type"]: + assert ( + chunk[key] == expected_chunks_item[key] + ), f"{key = }: {chunk[key] = } != {expected_chunks_item[key] = }" + + +def test_chunking_whole_text(): + run_chunking_test(INPUT_TEXT["whole_text"], GROUND_TRUTH["whole_text"]) + + +def test_chunking_cut_text(): + run_chunking_test(INPUT_TEXT["cut_text"], GROUND_TRUTH["cut_text"]) diff --git a/pytest.ini b/pytest.ini deleted file mode 100644 index f1c52b6fe..000000000 --- a/pytest.ini +++ /dev/null @@ -1,2 +0,0 @@ -[pytest] -addopts = tests/ \ No newline at end of file