feat: Add graph metrics getter in graph db interface and adapters [COG-1082] (#483)

Dummy implementation of graph metrics to demonstrate how the interface
will look like

<!-- .github/pull_request_template.md -->

## Description
<!-- Provide a clear description of the changes in this PR -->

## DCO Affirmation
I affirm that all code in every commit of this pull request conforms to
the terms of the Topoteretes Developer Certificate of Origin


<!-- This is an auto-generated comment: release notes by coderabbit.ai
-->
## Summary by CodeRabbit

- **New Features**
- Introduced asynchronous functionality for retrieving comprehensive
graph metrics, including counts and connectivity details, across
different systems.
  
- **Refactor**
- Streamlined metrics processing and storage by shifting to direct
retrieval from the graph engine.
- Updated naming conventions for the `GraphMetrics` database table and
reorganized module imports to enhance internal consistency.
  
- **Chores**
- Removed dataset deletion functionalities while introducing the ability
to store descriptive metrics.
<!-- end of auto-generated comment: release notes by coderabbit.ai -->

---------

Co-authored-by: Igor Ilic <30923996+dexters1@users.noreply.github.com>
This commit is contained in:
alekszievr 2025-02-03 15:25:04 +01:00 committed by GitHub
parent 44a4f8fd0d
commit 5119992fd8
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
8 changed files with 55 additions and 18 deletions

View file

@ -25,7 +25,7 @@ from cognee.tasks.documents import (
)
from cognee.tasks.graph import extract_graph_from_data
from cognee.tasks.storage import add_data_points
from cognee.tasks.storage.descriptive_metrics import store_descriptive_metrics
from cognee.modules.data.methods import store_descriptive_metrics
from cognee.tasks.storage.index_graph_edges import index_graph_edges
from cognee.tasks.summarization import summarize_text

View file

@ -54,3 +54,7 @@ class GraphDBInterface(Protocol):
@abstractmethod
async def get_graph_data(self):
raise NotImplementedError
@abstractmethod
async def get_graph_metrics(self):
raise NotImplementedError

View file

@ -530,3 +530,17 @@ class Neo4jAdapter(GraphDBInterface):
]
return (nodes, edges)
async def get_graph_metrics(self):
return {
"num_nodes": -1,
"num_edges": -1,
"mean_degree": -1,
"edge_density": -1,
"num_connected_components": -1,
"sizes_of_connected_components": -1,
"num_selfloops": -1,
"diameter": -1,
"avg_shortest_path_length": -1,
"avg_clustering": -1,
}

View file

@ -385,3 +385,17 @@ class NetworkXAdapter(GraphDBInterface):
]
return filtered_nodes, filtered_edges
async def get_graph_metrics(self):
return {
"num_nodes": -1,
"num_edges": -1,
"mean_degree": -1,
"edge_density": -1,
"num_connected_components": -1,
"sizes_of_connected_components": -1,
"num_selfloops": -1,
"diameter": -1,
"avg_shortest_path_length": -1,
"avg_clustering": -1,
}

View file

@ -11,3 +11,5 @@ from .get_data import get_data
# Delete
from .delete_dataset import delete_dataset
from .delete_data import delete_data
from .store_descriptive_metrics import store_descriptive_metrics

View file

@ -1,5 +1,4 @@
from cognee.infrastructure.engine import DataPoint
from cognee.modules.data.processing.document_types import Document
from cognee.infrastructure.databases.relational import get_relational_engine
from sqlalchemy import select
from sqlalchemy.sql import func
@ -24,25 +23,28 @@ async def fetch_token_count(db_engine) -> int:
return token_count_sum
async def calculate_graph_metrics(graph_data):
nodes, edges = graph_data
graph_metrics = {
"num_nodes": len(nodes),
"num_edges": len(edges),
}
return graph_metrics
async def store_descriptive_metrics(data_points: list[DataPoint]):
db_engine = get_relational_engine()
graph_engine = await get_graph_engine()
graph_data = await graph_engine.get_graph_data()
graph_metrics = await graph_engine.get_graph_metrics()
token_count_sum = await fetch_token_count(db_engine)
graph_metrics = await calculate_graph_metrics(graph_data)
async with db_engine.get_async_session() as session:
metrics = GraphMetrics(
id=uuid.uuid4(),
num_tokens=await fetch_token_count(db_engine),
num_nodes=graph_metrics["num_nodes"],
num_edges=graph_metrics["num_edges"],
mean_degree=graph_metrics["mean_degree"],
edge_density=graph_metrics["edge_density"],
num_connected_components=graph_metrics["num_connected_components"],
sizes_of_connected_components=graph_metrics["sizes_of_connected_components"],
num_selfloops=graph_metrics["num_selfloops"],
diameter=graph_metrics["diameter"],
avg_shortest_path_length=graph_metrics["avg_shortest_path_length"],
avg_clustering=graph_metrics["avg_clustering"],
)
table_name = "graph_metrics_table"
metrics_dict = {"id": uuid.uuid4(), "num_tokens": token_count_sum} | graph_metrics
session.add(metrics)
await session.commit()
await db_engine.insert_data(table_name, metrics_dict)
return data_points

View file

@ -7,7 +7,7 @@ from uuid import uuid4
class GraphMetrics(Base):
__tablename__ = "graph_metrics_table"
__tablename__ = "graph_metrics"
# TODO: Change ID to reflect unique id of graph database
id = Column(UUID, primary_key=True, default=uuid4)

View file

@ -1,3 +1,4 @@
from .Data import Data
from .Dataset import Dataset
from .DatasetData import DatasetData
from .GraphMetrics import GraphMetrics