Test: test descriptive graph metric calculation in neo4j and networkx adapters [COG-1188] (#500)
<!-- .github/pull_request_template.md --> ## Description <!-- Provide a clear description of the changes in this PR --> ## DCO Affirmation I affirm that all code in every commit of this pull request conforms to the terms of the Topoteretes Developer Certificate of Origin <!-- This is an auto-generated comment: release notes by coderabbit.ai --> ## Summary by CodeRabbit - **Refactor** - Updated the default processing flow by removing a descriptive metrics task. - **New Features** - Introduced asynchronous graph management capabilities including checks, projection, and deletion. - Enhanced graph metrics extraction with additional analytics. - **Chores** - Improved timestamp handling using database-driven defaults. - **Tests** - Added tests to verify graph metrics consistency and accuracy. - Integrated a new CI workflow for automated testing of graph metrics. <!-- end of auto-generated comment: release notes by coderabbit.ai --> --------- Co-authored-by: Igor Ilic <30923996+dexters1@users.noreply.github.com> Co-authored-by: Boris <boris@topoteretes.com>
This commit is contained in:
parent
de67a44918
commit
460691b76a
5 changed files with 161 additions and 0 deletions
28
.github/workflows/test_descriptive_graph_metrics.yml
vendored
Normal file
28
.github/workflows/test_descriptive_graph_metrics.yml
vendored
Normal file
|
|
@ -0,0 +1,28 @@
|
|||
name: test | descriptive graph metrics
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
pull_request:
|
||||
types: [labeled, synchronize]
|
||||
|
||||
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
run_networkx_metrics_test:
|
||||
uses: ./.github/workflows/reusable_python_example.yml
|
||||
with:
|
||||
example-location: ./cognee/tests/tasks/descriptive_metrics/networkx_metrics_test.py
|
||||
secrets:
|
||||
LLM_MODEL: ${{ secrets.LLM_MODEL }}
|
||||
LLM_ENDPOINT: ${{ secrets.LLM_ENDPOINT }}
|
||||
LLM_API_KEY: ${{ secrets.LLM_API_KEY }}
|
||||
LLM_API_VERSION: ${{ secrets.LLM_API_VERSION }}
|
||||
EMBEDDING_MODEL: ${{ secrets.EMBEDDING_MODEL }}
|
||||
EMBEDDING_ENDPOINT: ${{ secrets.EMBEDDING_ENDPOINT }}
|
||||
EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }}
|
||||
EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }}
|
||||
GRAPHISTRY_USERNAME: ${{ secrets.GRAPHISTRY_USERNAME }}
|
||||
GRAPHISTRY_PASSWORD: ${{ secrets.GRAPHISTRY_PASSWORD }}
|
||||
|
|
@ -0,0 +1,13 @@
|
|||
from cognee.tests.tasks.descriptive_metrics.networkx_metrics_test import get_networkx_metrics
|
||||
from cognee.tests.tasks.descriptive_metrics.neo4j_metrics_test import get_neo4j_metrics
|
||||
import asyncio
|
||||
|
||||
|
||||
async def check_graph_metrics_consistency_across_adapters():
|
||||
neo4j_metrics = await get_neo4j_metrics(include_optional=False)
|
||||
networkx_metrics = await get_networkx_metrics(include_optional=False)
|
||||
assert networkx_metrics == neo4j_metrics
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(check_graph_metrics_consistency_across_adapters())
|
||||
25
cognee/tests/tasks/descriptive_metrics/metrics_test_utils.py
Normal file
25
cognee/tests/tasks/descriptive_metrics/metrics_test_utils.py
Normal file
|
|
@ -0,0 +1,25 @@
|
|||
from cognee.tests.unit.interfaces.graph.get_graph_from_model_test import (
|
||||
Document,
|
||||
DocumentChunk,
|
||||
Entity,
|
||||
EntityType,
|
||||
)
|
||||
from cognee.tasks.storage.add_data_points import add_data_points
|
||||
|
||||
|
||||
async def create_disconnected_test_graph():
|
||||
doc = Document(path="test/path")
|
||||
doc_chunk = DocumentChunk(part_of=doc, text="This is a chunk of text", contains=[])
|
||||
entity_type = EntityType(name="Person")
|
||||
entity = Entity(name="Alice", is_type=entity_type)
|
||||
entity2 = Entity(name="Alice2", is_type=entity_type)
|
||||
# the following self-loop is intentional and serves the purpose of testing the self-loop counting functionality
|
||||
doc_chunk.contains.extend([entity, entity2, doc_chunk])
|
||||
|
||||
doc2 = Document(path="test/path2")
|
||||
doc_chunk2 = DocumentChunk(part_of=doc2, text="This is a chunk of text", contains=[])
|
||||
entity_type2 = EntityType(name="Person")
|
||||
entity3 = Entity(name="Bob", is_type=entity_type2)
|
||||
doc_chunk2.contains.extend([entity3])
|
||||
|
||||
await add_data_points([doc_chunk, doc_chunk2])
|
||||
42
cognee/tests/tasks/descriptive_metrics/neo4j_metrics_test.py
Normal file
42
cognee/tests/tasks/descriptive_metrics/neo4j_metrics_test.py
Normal file
|
|
@ -0,0 +1,42 @@
|
|||
from cognee.tests.tasks.descriptive_metrics.metrics_test_utils import create_disconnected_test_graph
|
||||
from cognee.infrastructure.databases.graph.get_graph_engine import create_graph_engine
|
||||
from cognee.infrastructure.databases.graph import get_graph_engine
|
||||
import cognee
|
||||
import asyncio
|
||||
import pytest
|
||||
|
||||
|
||||
async def get_neo4j_metrics(include_optional=True):
|
||||
create_graph_engine.cache_clear()
|
||||
cognee.config.set_graph_database_provider("neo4j")
|
||||
graph_engine = await get_graph_engine()
|
||||
await graph_engine.delete_graph()
|
||||
await create_disconnected_test_graph()
|
||||
neo4j_graph_metrics = await graph_engine.get_graph_metrics(include_optional=include_optional)
|
||||
return neo4j_graph_metrics
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_neo4j_metrics():
|
||||
neo4j_metrics = await get_neo4j_metrics(include_optional=True)
|
||||
assert neo4j_metrics["num_nodes"] == 9, f"Expected 9 nodes, got {neo4j_metrics['num_nodes']}"
|
||||
assert neo4j_metrics["num_edges"] == 9, f"Expected 9 edges, got {neo4j_metrics['num_edges']}"
|
||||
assert neo4j_metrics["mean_degree"] == 2, (
|
||||
f"Expected mean degree is 2, got {neo4j_metrics['mean_degree']}"
|
||||
)
|
||||
assert neo4j_metrics["edge_density"] == 0.125, (
|
||||
f"Expected edge density is 0.125, got {neo4j_metrics['edge_density']}"
|
||||
)
|
||||
assert neo4j_metrics["num_connected_components"] == 2, (
|
||||
f"Expected 2 connected components, got {neo4j_metrics['num_connected_components']}"
|
||||
)
|
||||
assert neo4j_metrics["sizes_of_connected_components"] == [5, 4], (
|
||||
f"Expected connected components of size [5, 4], got {neo4j_metrics['sizes_of_connected_components']}"
|
||||
)
|
||||
assert neo4j_metrics["num_selfloops"] == 1, (
|
||||
f"Expected 1 self-loop, got {neo4j_metrics['num_selfloops']}"
|
||||
)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(test_neo4j_metrics())
|
||||
|
|
@ -0,0 +1,53 @@
|
|||
from cognee.tests.tasks.descriptive_metrics.metrics_test_utils import create_disconnected_test_graph
|
||||
from cognee.infrastructure.databases.graph.get_graph_engine import create_graph_engine
|
||||
from cognee.infrastructure.databases.graph import get_graph_engine
|
||||
import cognee
|
||||
import asyncio
|
||||
|
||||
|
||||
async def get_networkx_metrics(include_optional=True):
|
||||
create_graph_engine.cache_clear()
|
||||
cognee.config.set_graph_database_provider("networkx")
|
||||
graph_engine = await get_graph_engine()
|
||||
await graph_engine.delete_graph()
|
||||
await create_disconnected_test_graph()
|
||||
networkx_graph_metrics = await graph_engine.get_graph_metrics(include_optional=include_optional)
|
||||
return networkx_graph_metrics
|
||||
|
||||
|
||||
async def assert_networkx_metrics():
|
||||
networkx_metrics = await get_networkx_metrics(include_optional=True)
|
||||
assert networkx_metrics["num_nodes"] == 9, (
|
||||
f"Expected 9 nodes, got {networkx_metrics['num_nodes']}"
|
||||
)
|
||||
assert networkx_metrics["num_edges"] == 9, (
|
||||
f"Expected 9 edges, got {networkx_metrics['num_edges']}"
|
||||
)
|
||||
assert networkx_metrics["mean_degree"] == 2, (
|
||||
f"Expected mean degree is 2, got {networkx_metrics['mean_degree']}"
|
||||
)
|
||||
assert networkx_metrics["edge_density"] == 0.125, (
|
||||
f"Expected edge density is 0.125, got {networkx_metrics['edge_density']}"
|
||||
)
|
||||
assert networkx_metrics["num_connected_components"] == 2, (
|
||||
f"Expected 2 connected components, got {networkx_metrics['num_connected_components']}"
|
||||
)
|
||||
assert networkx_metrics["sizes_of_connected_components"] == [5, 4], (
|
||||
f"Expected connected components of size [5, 4], got {networkx_metrics['sizes_of_connected_components']}"
|
||||
)
|
||||
assert networkx_metrics["num_selfloops"] == 1, (
|
||||
f"Expected 1 self-loop, got {networkx_metrics['num_selfloops']}"
|
||||
)
|
||||
assert networkx_metrics["diameter"] is None, (
|
||||
f"Diameter should be None for disconnected graphs, got {networkx_metrics['diameter']}"
|
||||
)
|
||||
assert networkx_metrics["avg_shortest_path_length"] is None, (
|
||||
f"Average shortest path should be None for disconnected graphs, got {networkx_metrics['avg_shortest_path_length']}"
|
||||
)
|
||||
assert networkx_metrics["avg_clustering"] == 0, (
|
||||
f"Expected 0 average clustering, got {networkx_metrics['avg_clustering']}"
|
||||
)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(assert_networkx_metrics())
|
||||
Loading…
Add table
Reference in a new issue