diff --git a/.github/workflows/e2e_tests.yml b/.github/workflows/e2e_tests.yml index 4e50cd325..4a4648579 100644 --- a/.github/workflows/e2e_tests.yml +++ b/.github/workflows/e2e_tests.yml @@ -273,3 +273,32 @@ jobs: EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }} EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }} run: poetry run python ./cognee/tests/test_permissions.py + + test-graph-edges: + name: Test graph edge ingestion + runs-on: ubuntu-22.04 + steps: + - name: Check out repository + uses: actions/checkout@v4 + + - name: Cognee Setup + uses: ./.github/actions/cognee_setup + with: + python-version: '3.11.x' + + - name: Install specific graph db dependency + run: | + poetry install + + - name: Run graph edges test + env: + ENV: 'dev' + LLM_MODEL: ${{ secrets.LLM_MODEL }} + LLM_ENDPOINT: ${{ secrets.LLM_ENDPOINT }} + LLM_API_KEY: ${{ secrets.LLM_API_KEY }} + LLM_API_VERSION: ${{ secrets.LLM_API_VERSION }} + EMBEDDING_MODEL: ${{ secrets.EMBEDDING_MODEL }} + EMBEDDING_ENDPOINT: ${{ secrets.EMBEDDING_ENDPOINT }} + EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }} + EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }} + run: poetry run python ./cognee/tests/test_edge_ingestion.py diff --git a/cognee/modules/graph/utils/expand_with_nodes_and_edges.py b/cognee/modules/graph/utils/expand_with_nodes_and_edges.py index c1f55d4fc..125f59e72 100644 --- a/cognee/modules/graph/utils/expand_with_nodes_and_edges.py +++ b/cognee/modules/graph/utils/expand_with_nodes_and_edges.py @@ -351,7 +351,7 @@ def expand_with_nodes_and_edges( _process_graph_edges(graph, name_mapping, existing_edges_map, relationships) # Return combined results - graph_nodes = list(added_ontology_nodes_map.values()) + graph_nodes = data_chunks + list(added_ontology_nodes_map.values()) graph_edges = relationships + ontology_relationships return graph_nodes, graph_edges diff --git a/cognee/tasks/graph/extract_graph_from_data.py b/cognee/tasks/graph/extract_graph_from_data.py index 7e675229d..c60ad350c 100644 --- a/cognee/tasks/graph/extract_graph_from_data.py +++ b/cognee/tasks/graph/extract_graph_from_data.py @@ -11,6 +11,7 @@ from cognee.modules.graph.utils import ( retrieve_existing_edges, ) from cognee.shared.data_models import KnowledgeGraph +from cognee.tasks.storage.add_data_points import add_data_points async def integrate_chunk_graphs( @@ -38,8 +39,7 @@ async def integrate_chunk_graphs( ) if len(graph_nodes) > 0: - await graph_engine.add_nodes(graph_nodes) - + await add_data_points(graph_nodes) if len(graph_edges) > 0: await graph_engine.add_edges(graph_edges) diff --git a/cognee/tests/test_edge_ingestion.py b/cognee/tests/test_edge_ingestion.py new file mode 100755 index 000000000..5b23f7819 --- /dev/null +++ b/cognee/tests/test_edge_ingestion.py @@ -0,0 +1,88 @@ +import os +import asyncio +import cognee +import pathlib + +from cognee.infrastructure.databases.graph import get_graph_engine +from collections import Counter +from cognee.modules.users.methods import get_default_user +from cognee.shared.logging_utils import get_logger + +logger = get_logger() + + +async def test_edge_ingestion(): + """ + Tests whether we ingest additional entity to entity edges + """ + + data_directory_path = str( + pathlib.Path( + os.path.join(pathlib.Path(__file__).parent, ".data_storage/test_edge_ingestion") + ).resolve() + ) + cognee_directory_path = str( + pathlib.Path( + os.path.join(pathlib.Path(__file__).parent, ".cognee_system/test_edge_ingestion") + ).resolve() + ) + + cognee.config.data_root_directory(data_directory_path) + cognee.config.system_root_directory(cognee_directory_path) + + await cognee.prune.prune_data() + await cognee.prune.prune_system(metadata=True) + + basic_nested_edges = ["is_a", "is_part_of", "contains", "made_from"] + + entity_to_entity_edges = ["likes", "prefers", "watches"] + + text1 = "Dave watches Dexter Resurrection" + text2 = "Ana likes apples" + text3 = "Bob prefers Cognee over other solutions" + + await cognee.add([text1, text2, text3], dataset_name="edge_ingestion_test") + + user = await get_default_user() + + await cognee.cognify(["edge_ingestion_test"], user=user) + + graph_engine = await get_graph_engine() + graph = await graph_engine.get_graph_data() + + edge_type_counts = Counter(edge_type[2] for edge_type in graph[1]) + + "Tests the presence of basic nested edges" + for basic_nested_edge in basic_nested_edges: + assert edge_type_counts.get(basic_nested_edge, 0) >= 1, ( + f"Expected at least one {basic_nested_edge} edge, but found {edge_type_counts.get(basic_nested_edge, 0)}" + ) + + "Tests the presence of additional entity to entity edges" + assert len(edge_type_counts) > 4, ( + f"Expected at least {5} edges (4 structural plus entity to entity edges), but found only {len(edge_type_counts)}" + ) + + "Tests the consistency of basic nested edges" + assert edge_type_counts.get("made_from", 0) == edge_type_counts.get("is_part_of", 0), ( + f"Number of made_from and is_part_of edges are not matching, found {edge_type_counts.get('made_from', 0)} made from and {edge_type_counts.get('is_part_of', 0)} is_part_of." + ) + + "Tests whether we generate is_a for all entity that is contained by a chunk" + assert edge_type_counts.get("contains", 0) == edge_type_counts.get("is_a", 0), ( + f"Number of contains and is_a edges are not matching, found {edge_type_counts.get('is_a', 0)} is_a and {edge_type_counts.get('is_part_of', 0)} contains." + ) + + found_edges = 0 + for entity_to_entity_edge in entity_to_entity_edges: + if entity_to_entity_edge in edge_type_counts: + found_edges = found_edges + 1 + + "Tests the presence of extected entity to entity edges" + assert found_edges >= 2, ( + f"Expected at least 2 entity to entity edges, but found only {found_edges}" + ) + + +if __name__ == "__main__": + asyncio.run(test_edge_ingestion())