From 928e1075c6c6c5c6d3fba8ac58ed347a81506c00 Mon Sep 17 00:00:00 2001 From: Leon Luithlen Date: Thu, 14 Nov 2024 16:55:24 +0100 Subject: [PATCH] Test chunk_by_paragraph chunk numbering --- .../graph/cognee_graph_elements_test.py | 24 ++++++++++++++++--- .../unit/modules/graph/cognee_graph_test.py | 10 +++++++- .../chunks/chunk_by_paragraph_2_test.py | 14 +++++++++++ 3 files changed, 44 insertions(+), 4 deletions(-) diff --git a/cognee/tests/unit/modules/graph/cognee_graph_elements_test.py b/cognee/tests/unit/modules/graph/cognee_graph_elements_test.py index 137b9f7e2..d2a1b6c59 100644 --- a/cognee/tests/unit/modules/graph/cognee_graph_elements_test.py +++ b/cognee/tests/unit/modules/graph/cognee_graph_elements_test.py @@ -1,7 +1,7 @@ -import pytest import numpy as np +import pytest -from cognee.modules.graph.cognee_graph.CogneeGraphElements import Node, Edge +from cognee.modules.graph.cognee_graph.CogneeGraphElements import Edge, Node def test_node_initialization(): @@ -12,11 +12,13 @@ def test_node_initialization(): assert len(node.status) == 2 assert np.all(node.status == 1) + def test_node_invalid_dimension(): """Test that initializing a Node with a non-positive dimension raises an error.""" with pytest.raises(ValueError, match="Dimension must be a positive integer"): Node("node1", dimension=0) + def test_add_skeleton_neighbor(): """Test adding a neighbor to a node.""" node1 = Node("node1") @@ -24,6 +26,7 @@ def test_add_skeleton_neighbor(): node1.add_skeleton_neighbor(node2) assert node2 in node1.skeleton_neighbours + def test_remove_skeleton_neighbor(): """Test removing a neighbor from a node.""" node1 = Node("node1") @@ -32,6 +35,7 @@ def test_remove_skeleton_neighbor(): node1.remove_skeleton_neighbor(node2) assert node2 not in node1.skeleton_neighbours + def test_add_skeleton_edge(): """Test adding an edge updates both skeleton_edges and skeleton_neighbours.""" node1 = Node("node1") @@ -41,6 +45,7 @@ def test_add_skeleton_edge(): assert edge in node1.skeleton_edges assert node2 in node1.skeleton_neighbours + def test_remove_skeleton_edge(): """Test removing an edge updates both skeleton_edges and skeleton_neighbours.""" node1 = Node("node1") @@ -51,6 +56,7 @@ def test_remove_skeleton_edge(): assert edge not in node1.skeleton_edges assert node2 not in node1.skeleton_neighbours + def test_is_node_alive_in_dimension(): """Test checking node's alive status in a specific dimension.""" node = Node("node1", dimension=2) @@ -58,25 +64,30 @@ def test_is_node_alive_in_dimension(): node.status[1] = 0 assert not node.is_node_alive_in_dimension(1) + def test_node_alive_invalid_dimension(): """Test that checking alive status with an invalid dimension raises an error.""" node = Node("node1", dimension=1) with pytest.raises(ValueError, match="Dimension 1 is out of range"): node.is_node_alive_in_dimension(1) + def test_node_equality(): """Test equality between nodes.""" node1 = Node("node1") node2 = Node("node1") assert node1 == node2 + def test_node_hash(): """Test hashing for Node.""" node = Node("node1") assert hash(node) == hash("node1") + ### Tests for Edge ### + def test_edge_initialization(): """Test that an Edge is initialized correctly.""" node1 = Node("node1") @@ -89,6 +100,7 @@ def test_edge_initialization(): assert len(edge.status) == 2 assert np.all(edge.status == 1) + def test_edge_invalid_dimension(): """Test that initializing an Edge with a non-positive dimension raises an error.""" node1 = Node("node1") @@ -96,6 +108,7 @@ def test_edge_invalid_dimension(): with pytest.raises(ValueError, match="Dimensions must be a positive integer."): Edge(node1, node2, dimension=0) + def test_is_edge_alive_in_dimension(): """Test checking edge's alive status in a specific dimension.""" node1 = Node("node1") @@ -105,6 +118,7 @@ def test_is_edge_alive_in_dimension(): edge.status[1] = 0 assert not edge.is_edge_alive_in_dimension(1) + def test_edge_alive_invalid_dimension(): """Test that checking alive status with an invalid dimension raises an error.""" node1 = Node("node1") @@ -113,6 +127,7 @@ def test_edge_alive_invalid_dimension(): with pytest.raises(ValueError, match="Dimension 1 is out of range"): edge.is_edge_alive_in_dimension(1) + def test_edge_equality_directed(): """Test equality between directed edges.""" node1 = Node("node1") @@ -121,6 +136,7 @@ def test_edge_equality_directed(): edge2 = Edge(node1, node2, directed=True) assert edge1 == edge2 + def test_edge_equality_undirected(): """Test equality between undirected edges.""" node1 = Node("node1") @@ -129,6 +145,7 @@ def test_edge_equality_undirected(): edge2 = Edge(node2, node1, directed=False) assert edge1 == edge2 + def test_edge_hash_directed(): """Test hashing for directed edges.""" node1 = Node("node1") @@ -136,9 +153,10 @@ def test_edge_hash_directed(): edge = Edge(node1, node2, directed=True) assert hash(edge) == hash((node1, node2)) + def test_edge_hash_undirected(): """Test hashing for undirected edges.""" node1 = Node("node1") node2 = Node("node2") edge = Edge(node1, node2, directed=False) - assert hash(edge) == hash(frozenset({node1, node2})) \ No newline at end of file + assert hash(edge) == hash(frozenset({node1, node2})) diff --git a/cognee/tests/unit/modules/graph/cognee_graph_test.py b/cognee/tests/unit/modules/graph/cognee_graph_test.py index 235ccf11d..d05292d75 100644 --- a/cognee/tests/unit/modules/graph/cognee_graph_test.py +++ b/cognee/tests/unit/modules/graph/cognee_graph_test.py @@ -1,7 +1,7 @@ import pytest -from cognee.modules.graph.cognee_graph.CogneeGraphElements import Node, Edge from cognee.modules.graph.cognee_graph.CogneeGraph import CogneeGraph +from cognee.modules.graph.cognee_graph.CogneeGraphElements import Edge, Node @pytest.fixture @@ -9,6 +9,7 @@ def setup_graph(): """Fixture to initialize a CogneeGraph instance.""" return CogneeGraph() + def test_add_node_success(setup_graph): """Test successful addition of a node.""" graph = setup_graph @@ -16,6 +17,7 @@ def test_add_node_success(setup_graph): graph.add_node(node) assert graph.get_node("node1") == node + def test_add_duplicate_node(setup_graph): """Test adding a duplicate node raises an exception.""" graph = setup_graph @@ -24,6 +26,7 @@ def test_add_duplicate_node(setup_graph): with pytest.raises(ValueError, match="Node with id node1 already exists."): graph.add_node(node) + def test_add_edge_success(setup_graph): """Test successful addition of an edge.""" graph = setup_graph @@ -37,6 +40,7 @@ def test_add_edge_success(setup_graph): assert edge in node1.skeleton_edges assert edge in node2.skeleton_edges + def test_add_duplicate_edge(setup_graph): """Test adding a duplicate edge raises an exception.""" graph = setup_graph @@ -49,6 +53,7 @@ def test_add_duplicate_edge(setup_graph): with pytest.raises(ValueError, match="Edge .* already exists in the graph."): graph.add_edge(edge) + def test_get_node_success(setup_graph): """Test retrieving an existing node.""" graph = setup_graph @@ -56,11 +61,13 @@ def test_get_node_success(setup_graph): graph.add_node(node) assert graph.get_node("node1") == node + def test_get_node_nonexistent(setup_graph): """Test retrieving a nonexistent node returns None.""" graph = setup_graph assert graph.get_node("nonexistent") is None + def test_get_edges_success(setup_graph): """Test retrieving edges of a node.""" graph = setup_graph @@ -72,6 +79,7 @@ def test_get_edges_success(setup_graph): graph.add_edge(edge) assert edge in graph.get_edges("node1") + def test_get_edges_nonexistent_node(setup_graph): """Test retrieving edges for a nonexistent node raises an exception.""" graph = setup_graph diff --git a/cognee/tests/unit/processing/chunks/chunk_by_paragraph_2_test.py b/cognee/tests/unit/processing/chunks/chunk_by_paragraph_2_test.py index ad09c9671..8e900727d 100644 --- a/cognee/tests/unit/processing/chunks/chunk_by_paragraph_2_test.py +++ b/cognee/tests/unit/processing/chunks/chunk_by_paragraph_2_test.py @@ -37,3 +37,17 @@ def test_paragraph_chunk_length(input_text, paragraph_length, batch_paragraphs): assert np.all( chunk_lengths <= paragraph_length ), f"{paragraph_length = }: {larger_chunks} are too large" + + +@pytest.mark.parametrize( + "input_text,paragraph_length,batch_paragraphs", + list(product(list(INPUT_TEXTS.values()), paragraph_lengths, batch_paragraphs_vals)), +) +def test_chunk_by_paragraph_chunk_numbering( + input_text, paragraph_length, batch_paragraphs +): + chunks = chunk_by_paragraph(input_text, paragraph_length, batch_paragraphs) + chunk_indices = np.array([chunk["chunk_index"] for chunk in chunks]) + assert np.all( + chunk_indices == np.arange(len(chunk_indices)) + ), f"{chunk_indices = } are not monotonically increasing"