feat: Add graph metrics getter in graph db interface and adapters [COG-1082] (#483)
Dummy implementation of graph metrics to demonstrate how the interface will look like <!-- .github/pull_request_template.md --> ## Description <!-- Provide a clear description of the changes in this PR --> ## DCO Affirmation I affirm that all code in every commit of this pull request conforms to the terms of the Topoteretes Developer Certificate of Origin <!-- This is an auto-generated comment: release notes by coderabbit.ai --> ## Summary by CodeRabbit - **New Features** - Introduced asynchronous functionality for retrieving comprehensive graph metrics, including counts and connectivity details, across different systems. - **Refactor** - Streamlined metrics processing and storage by shifting to direct retrieval from the graph engine. - Updated naming conventions for the `GraphMetrics` database table and reorganized module imports to enhance internal consistency. - **Chores** - Removed dataset deletion functionalities while introducing the ability to store descriptive metrics. <!-- end of auto-generated comment: release notes by coderabbit.ai --> --------- Co-authored-by: Igor Ilic <30923996+dexters1@users.noreply.github.com>
This commit is contained in:
parent
44a4f8fd0d
commit
5119992fd8
8 changed files with 55 additions and 18 deletions
|
|
@ -25,7 +25,7 @@ from cognee.tasks.documents import (
|
|||
)
|
||||
from cognee.tasks.graph import extract_graph_from_data
|
||||
from cognee.tasks.storage import add_data_points
|
||||
from cognee.tasks.storage.descriptive_metrics import store_descriptive_metrics
|
||||
from cognee.modules.data.methods import store_descriptive_metrics
|
||||
from cognee.tasks.storage.index_graph_edges import index_graph_edges
|
||||
from cognee.tasks.summarization import summarize_text
|
||||
|
||||
|
|
|
|||
|
|
@ -54,3 +54,7 @@ class GraphDBInterface(Protocol):
|
|||
@abstractmethod
|
||||
async def get_graph_data(self):
|
||||
raise NotImplementedError
|
||||
|
||||
@abstractmethod
|
||||
async def get_graph_metrics(self):
|
||||
raise NotImplementedError
|
||||
|
|
|
|||
|
|
@ -530,3 +530,17 @@ class Neo4jAdapter(GraphDBInterface):
|
|||
]
|
||||
|
||||
return (nodes, edges)
|
||||
|
||||
async def get_graph_metrics(self):
|
||||
return {
|
||||
"num_nodes": -1,
|
||||
"num_edges": -1,
|
||||
"mean_degree": -1,
|
||||
"edge_density": -1,
|
||||
"num_connected_components": -1,
|
||||
"sizes_of_connected_components": -1,
|
||||
"num_selfloops": -1,
|
||||
"diameter": -1,
|
||||
"avg_shortest_path_length": -1,
|
||||
"avg_clustering": -1,
|
||||
}
|
||||
|
|
|
|||
|
|
@ -385,3 +385,17 @@ class NetworkXAdapter(GraphDBInterface):
|
|||
]
|
||||
|
||||
return filtered_nodes, filtered_edges
|
||||
|
||||
async def get_graph_metrics(self):
|
||||
return {
|
||||
"num_nodes": -1,
|
||||
"num_edges": -1,
|
||||
"mean_degree": -1,
|
||||
"edge_density": -1,
|
||||
"num_connected_components": -1,
|
||||
"sizes_of_connected_components": -1,
|
||||
"num_selfloops": -1,
|
||||
"diameter": -1,
|
||||
"avg_shortest_path_length": -1,
|
||||
"avg_clustering": -1,
|
||||
}
|
||||
|
|
|
|||
|
|
@ -11,3 +11,5 @@ from .get_data import get_data
|
|||
# Delete
|
||||
from .delete_dataset import delete_dataset
|
||||
from .delete_data import delete_data
|
||||
|
||||
from .store_descriptive_metrics import store_descriptive_metrics
|
||||
|
|
|
|||
|
|
@ -1,5 +1,4 @@
|
|||
from cognee.infrastructure.engine import DataPoint
|
||||
from cognee.modules.data.processing.document_types import Document
|
||||
from cognee.infrastructure.databases.relational import get_relational_engine
|
||||
from sqlalchemy import select
|
||||
from sqlalchemy.sql import func
|
||||
|
|
@ -24,25 +23,28 @@ async def fetch_token_count(db_engine) -> int:
|
|||
return token_count_sum
|
||||
|
||||
|
||||
async def calculate_graph_metrics(graph_data):
|
||||
nodes, edges = graph_data
|
||||
graph_metrics = {
|
||||
"num_nodes": len(nodes),
|
||||
"num_edges": len(edges),
|
||||
}
|
||||
return graph_metrics
|
||||
|
||||
|
||||
async def store_descriptive_metrics(data_points: list[DataPoint]):
|
||||
db_engine = get_relational_engine()
|
||||
graph_engine = await get_graph_engine()
|
||||
graph_data = await graph_engine.get_graph_data()
|
||||
graph_metrics = await graph_engine.get_graph_metrics()
|
||||
|
||||
token_count_sum = await fetch_token_count(db_engine)
|
||||
graph_metrics = await calculate_graph_metrics(graph_data)
|
||||
async with db_engine.get_async_session() as session:
|
||||
metrics = GraphMetrics(
|
||||
id=uuid.uuid4(),
|
||||
num_tokens=await fetch_token_count(db_engine),
|
||||
num_nodes=graph_metrics["num_nodes"],
|
||||
num_edges=graph_metrics["num_edges"],
|
||||
mean_degree=graph_metrics["mean_degree"],
|
||||
edge_density=graph_metrics["edge_density"],
|
||||
num_connected_components=graph_metrics["num_connected_components"],
|
||||
sizes_of_connected_components=graph_metrics["sizes_of_connected_components"],
|
||||
num_selfloops=graph_metrics["num_selfloops"],
|
||||
diameter=graph_metrics["diameter"],
|
||||
avg_shortest_path_length=graph_metrics["avg_shortest_path_length"],
|
||||
avg_clustering=graph_metrics["avg_clustering"],
|
||||
)
|
||||
|
||||
table_name = "graph_metrics_table"
|
||||
metrics_dict = {"id": uuid.uuid4(), "num_tokens": token_count_sum} | graph_metrics
|
||||
session.add(metrics)
|
||||
await session.commit()
|
||||
|
||||
await db_engine.insert_data(table_name, metrics_dict)
|
||||
return data_points
|
||||
|
|
@ -7,7 +7,7 @@ from uuid import uuid4
|
|||
|
||||
|
||||
class GraphMetrics(Base):
|
||||
__tablename__ = "graph_metrics_table"
|
||||
__tablename__ = "graph_metrics"
|
||||
|
||||
# TODO: Change ID to reflect unique id of graph database
|
||||
id = Column(UUID, primary_key=True, default=uuid4)
|
||||
|
|
|
|||
|
|
@ -1,3 +1,4 @@
|
|||
from .Data import Data
|
||||
from .Dataset import Dataset
|
||||
from .DatasetData import DatasetData
|
||||
from .GraphMetrics import GraphMetrics
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue