Test: test descriptive graph metric calculation in neo4j and networkx adapters [COG-1188] (#500)

<!-- .github/pull_request_template.md -->

## Description
<!-- Provide a clear description of the changes in this PR -->

## DCO Affirmation
I affirm that all code in every commit of this pull request conforms to
the terms of the Topoteretes Developer Certificate of Origin


<!-- This is an auto-generated comment: release notes by coderabbit.ai
-->
## Summary by CodeRabbit

- **Refactor**
- Updated the default processing flow by removing a descriptive metrics
task.
- **New Features**
- Introduced asynchronous graph management capabilities including
checks, projection, and deletion.
  - Enhanced graph metrics extraction with additional analytics.
- **Chores**
  - Improved timestamp handling using database-driven defaults.
- **Tests**
  - Added tests to verify graph metrics consistency and accuracy.
  - Integrated a new CI workflow for automated testing of graph metrics.
<!-- end of auto-generated comment: release notes by coderabbit.ai -->

---------

Co-authored-by: Igor Ilic <30923996+dexters1@users.noreply.github.com>
Co-authored-by: Boris <boris@topoteretes.com>
This commit is contained in:
alekszievr 2025-02-07 17:27:44 +01:00 committed by GitHub
parent de67a44918
commit 460691b76a
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
5 changed files with 161 additions and 0 deletions

View file

@ -0,0 +1,28 @@
name: test | descriptive graph metrics
on:
workflow_dispatch:
pull_request:
types: [labeled, synchronize]
concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
cancel-in-progress: true
jobs:
run_networkx_metrics_test:
uses: ./.github/workflows/reusable_python_example.yml
with:
example-location: ./cognee/tests/tasks/descriptive_metrics/networkx_metrics_test.py
secrets:
LLM_MODEL: ${{ secrets.LLM_MODEL }}
LLM_ENDPOINT: ${{ secrets.LLM_ENDPOINT }}
LLM_API_KEY: ${{ secrets.LLM_API_KEY }}
LLM_API_VERSION: ${{ secrets.LLM_API_VERSION }}
EMBEDDING_MODEL: ${{ secrets.EMBEDDING_MODEL }}
EMBEDDING_ENDPOINT: ${{ secrets.EMBEDDING_ENDPOINT }}
EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }}
EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }}
GRAPHISTRY_USERNAME: ${{ secrets.GRAPHISTRY_USERNAME }}
GRAPHISTRY_PASSWORD: ${{ secrets.GRAPHISTRY_PASSWORD }}

View file

@ -0,0 +1,13 @@
from cognee.tests.tasks.descriptive_metrics.networkx_metrics_test import get_networkx_metrics
from cognee.tests.tasks.descriptive_metrics.neo4j_metrics_test import get_neo4j_metrics
import asyncio
async def check_graph_metrics_consistency_across_adapters():
neo4j_metrics = await get_neo4j_metrics(include_optional=False)
networkx_metrics = await get_networkx_metrics(include_optional=False)
assert networkx_metrics == neo4j_metrics
if __name__ == "__main__":
asyncio.run(check_graph_metrics_consistency_across_adapters())

View file

@ -0,0 +1,25 @@
from cognee.tests.unit.interfaces.graph.get_graph_from_model_test import (
Document,
DocumentChunk,
Entity,
EntityType,
)
from cognee.tasks.storage.add_data_points import add_data_points
async def create_disconnected_test_graph():
doc = Document(path="test/path")
doc_chunk = DocumentChunk(part_of=doc, text="This is a chunk of text", contains=[])
entity_type = EntityType(name="Person")
entity = Entity(name="Alice", is_type=entity_type)
entity2 = Entity(name="Alice2", is_type=entity_type)
# the following self-loop is intentional and serves the purpose of testing the self-loop counting functionality
doc_chunk.contains.extend([entity, entity2, doc_chunk])
doc2 = Document(path="test/path2")
doc_chunk2 = DocumentChunk(part_of=doc2, text="This is a chunk of text", contains=[])
entity_type2 = EntityType(name="Person")
entity3 = Entity(name="Bob", is_type=entity_type2)
doc_chunk2.contains.extend([entity3])
await add_data_points([doc_chunk, doc_chunk2])

View file

@ -0,0 +1,42 @@
from cognee.tests.tasks.descriptive_metrics.metrics_test_utils import create_disconnected_test_graph
from cognee.infrastructure.databases.graph.get_graph_engine import create_graph_engine
from cognee.infrastructure.databases.graph import get_graph_engine
import cognee
import asyncio
import pytest
async def get_neo4j_metrics(include_optional=True):
create_graph_engine.cache_clear()
cognee.config.set_graph_database_provider("neo4j")
graph_engine = await get_graph_engine()
await graph_engine.delete_graph()
await create_disconnected_test_graph()
neo4j_graph_metrics = await graph_engine.get_graph_metrics(include_optional=include_optional)
return neo4j_graph_metrics
@pytest.mark.asyncio
async def test_neo4j_metrics():
neo4j_metrics = await get_neo4j_metrics(include_optional=True)
assert neo4j_metrics["num_nodes"] == 9, f"Expected 9 nodes, got {neo4j_metrics['num_nodes']}"
assert neo4j_metrics["num_edges"] == 9, f"Expected 9 edges, got {neo4j_metrics['num_edges']}"
assert neo4j_metrics["mean_degree"] == 2, (
f"Expected mean degree is 2, got {neo4j_metrics['mean_degree']}"
)
assert neo4j_metrics["edge_density"] == 0.125, (
f"Expected edge density is 0.125, got {neo4j_metrics['edge_density']}"
)
assert neo4j_metrics["num_connected_components"] == 2, (
f"Expected 2 connected components, got {neo4j_metrics['num_connected_components']}"
)
assert neo4j_metrics["sizes_of_connected_components"] == [5, 4], (
f"Expected connected components of size [5, 4], got {neo4j_metrics['sizes_of_connected_components']}"
)
assert neo4j_metrics["num_selfloops"] == 1, (
f"Expected 1 self-loop, got {neo4j_metrics['num_selfloops']}"
)
if __name__ == "__main__":
asyncio.run(test_neo4j_metrics())

View file

@ -0,0 +1,53 @@
from cognee.tests.tasks.descriptive_metrics.metrics_test_utils import create_disconnected_test_graph
from cognee.infrastructure.databases.graph.get_graph_engine import create_graph_engine
from cognee.infrastructure.databases.graph import get_graph_engine
import cognee
import asyncio
async def get_networkx_metrics(include_optional=True):
create_graph_engine.cache_clear()
cognee.config.set_graph_database_provider("networkx")
graph_engine = await get_graph_engine()
await graph_engine.delete_graph()
await create_disconnected_test_graph()
networkx_graph_metrics = await graph_engine.get_graph_metrics(include_optional=include_optional)
return networkx_graph_metrics
async def assert_networkx_metrics():
networkx_metrics = await get_networkx_metrics(include_optional=True)
assert networkx_metrics["num_nodes"] == 9, (
f"Expected 9 nodes, got {networkx_metrics['num_nodes']}"
)
assert networkx_metrics["num_edges"] == 9, (
f"Expected 9 edges, got {networkx_metrics['num_edges']}"
)
assert networkx_metrics["mean_degree"] == 2, (
f"Expected mean degree is 2, got {networkx_metrics['mean_degree']}"
)
assert networkx_metrics["edge_density"] == 0.125, (
f"Expected edge density is 0.125, got {networkx_metrics['edge_density']}"
)
assert networkx_metrics["num_connected_components"] == 2, (
f"Expected 2 connected components, got {networkx_metrics['num_connected_components']}"
)
assert networkx_metrics["sizes_of_connected_components"] == [5, 4], (
f"Expected connected components of size [5, 4], got {networkx_metrics['sizes_of_connected_components']}"
)
assert networkx_metrics["num_selfloops"] == 1, (
f"Expected 1 self-loop, got {networkx_metrics['num_selfloops']}"
)
assert networkx_metrics["diameter"] is None, (
f"Diameter should be None for disconnected graphs, got {networkx_metrics['diameter']}"
)
assert networkx_metrics["avg_shortest_path_length"] is None, (
f"Average shortest path should be None for disconnected graphs, got {networkx_metrics['avg_shortest_path_length']}"
)
assert networkx_metrics["avg_clustering"] == 0, (
f"Expected 0 average clustering, got {networkx_metrics['avg_clustering']}"
)
if __name__ == "__main__":
asyncio.run(assert_networkx_metrics())