From 5119992fd85d69c315603c76d56351643fbbbb17 Mon Sep 17 00:00:00 2001 From: alekszievr <44192193+alekszievr@users.noreply.github.com> Date: Mon, 3 Feb 2025 15:25:04 +0100 Subject: [PATCH] feat: Add graph metrics getter in graph db interface and adapters [COG-1082] (#483) Dummy implementation of graph metrics to demonstrate how the interface will look like ## Description ## DCO Affirmation I affirm that all code in every commit of this pull request conforms to the terms of the Topoteretes Developer Certificate of Origin ## Summary by CodeRabbit - **New Features** - Introduced asynchronous functionality for retrieving comprehensive graph metrics, including counts and connectivity details, across different systems. - **Refactor** - Streamlined metrics processing and storage by shifting to direct retrieval from the graph engine. - Updated naming conventions for the `GraphMetrics` database table and reorganized module imports to enhance internal consistency. - **Chores** - Removed dataset deletion functionalities while introducing the ability to store descriptive metrics. --------- Co-authored-by: Igor Ilic <30923996+dexters1@users.noreply.github.com> --- cognee/api/v1/cognify/cognify_v2.py | 2 +- .../databases/graph/graph_db_interface.py | 4 +++ .../databases/graph/neo4j_driver/adapter.py | 14 ++++++++ .../databases/graph/networkx/adapter.py | 14 ++++++++ cognee/modules/data/methods/__init__.py | 2 ++ .../methods/store_descriptive_metrics.py} | 34 ++++++++++--------- cognee/modules/data/models/GraphMetrics.py | 2 +- cognee/modules/data/models/__init__.py | 1 + 8 files changed, 55 insertions(+), 18 deletions(-) rename cognee/{tasks/storage/descriptive_metrics.py => modules/data/methods/store_descriptive_metrics.py} (50%) diff --git a/cognee/api/v1/cognify/cognify_v2.py b/cognee/api/v1/cognify/cognify_v2.py index 978205d2f..c727230d7 100644 --- a/cognee/api/v1/cognify/cognify_v2.py +++ b/cognee/api/v1/cognify/cognify_v2.py @@ -25,7 +25,7 @@ from cognee.tasks.documents import ( ) from cognee.tasks.graph import extract_graph_from_data from cognee.tasks.storage import add_data_points -from cognee.tasks.storage.descriptive_metrics import store_descriptive_metrics +from cognee.modules.data.methods import store_descriptive_metrics from cognee.tasks.storage.index_graph_edges import index_graph_edges from cognee.tasks.summarization import summarize_text diff --git a/cognee/infrastructure/databases/graph/graph_db_interface.py b/cognee/infrastructure/databases/graph/graph_db_interface.py index 30acc1b95..dfb955cd7 100644 --- a/cognee/infrastructure/databases/graph/graph_db_interface.py +++ b/cognee/infrastructure/databases/graph/graph_db_interface.py @@ -54,3 +54,7 @@ class GraphDBInterface(Protocol): @abstractmethod async def get_graph_data(self): raise NotImplementedError + + @abstractmethod + async def get_graph_metrics(self): + raise NotImplementedError diff --git a/cognee/infrastructure/databases/graph/neo4j_driver/adapter.py b/cognee/infrastructure/databases/graph/neo4j_driver/adapter.py index a5c1f3eb3..4f6f1180c 100644 --- a/cognee/infrastructure/databases/graph/neo4j_driver/adapter.py +++ b/cognee/infrastructure/databases/graph/neo4j_driver/adapter.py @@ -530,3 +530,17 @@ class Neo4jAdapter(GraphDBInterface): ] return (nodes, edges) + + async def get_graph_metrics(self): + return { + "num_nodes": -1, + "num_edges": -1, + "mean_degree": -1, + "edge_density": -1, + "num_connected_components": -1, + "sizes_of_connected_components": -1, + "num_selfloops": -1, + "diameter": -1, + "avg_shortest_path_length": -1, + "avg_clustering": -1, + } diff --git a/cognee/infrastructure/databases/graph/networkx/adapter.py b/cognee/infrastructure/databases/graph/networkx/adapter.py index ddc1707d3..018799a08 100644 --- a/cognee/infrastructure/databases/graph/networkx/adapter.py +++ b/cognee/infrastructure/databases/graph/networkx/adapter.py @@ -385,3 +385,17 @@ class NetworkXAdapter(GraphDBInterface): ] return filtered_nodes, filtered_edges + + async def get_graph_metrics(self): + return { + "num_nodes": -1, + "num_edges": -1, + "mean_degree": -1, + "edge_density": -1, + "num_connected_components": -1, + "sizes_of_connected_components": -1, + "num_selfloops": -1, + "diameter": -1, + "avg_shortest_path_length": -1, + "avg_clustering": -1, + } diff --git a/cognee/modules/data/methods/__init__.py b/cognee/modules/data/methods/__init__.py index c32db1d2f..57ac00c1a 100644 --- a/cognee/modules/data/methods/__init__.py +++ b/cognee/modules/data/methods/__init__.py @@ -11,3 +11,5 @@ from .get_data import get_data # Delete from .delete_dataset import delete_dataset from .delete_data import delete_data + +from .store_descriptive_metrics import store_descriptive_metrics diff --git a/cognee/tasks/storage/descriptive_metrics.py b/cognee/modules/data/methods/store_descriptive_metrics.py similarity index 50% rename from cognee/tasks/storage/descriptive_metrics.py rename to cognee/modules/data/methods/store_descriptive_metrics.py index f7a854e53..8e5c3b598 100644 --- a/cognee/tasks/storage/descriptive_metrics.py +++ b/cognee/modules/data/methods/store_descriptive_metrics.py @@ -1,5 +1,4 @@ from cognee.infrastructure.engine import DataPoint -from cognee.modules.data.processing.document_types import Document from cognee.infrastructure.databases.relational import get_relational_engine from sqlalchemy import select from sqlalchemy.sql import func @@ -24,25 +23,28 @@ async def fetch_token_count(db_engine) -> int: return token_count_sum -async def calculate_graph_metrics(graph_data): - nodes, edges = graph_data - graph_metrics = { - "num_nodes": len(nodes), - "num_edges": len(edges), - } - return graph_metrics - - async def store_descriptive_metrics(data_points: list[DataPoint]): db_engine = get_relational_engine() graph_engine = await get_graph_engine() - graph_data = await graph_engine.get_graph_data() + graph_metrics = await graph_engine.get_graph_metrics() - token_count_sum = await fetch_token_count(db_engine) - graph_metrics = await calculate_graph_metrics(graph_data) + async with db_engine.get_async_session() as session: + metrics = GraphMetrics( + id=uuid.uuid4(), + num_tokens=await fetch_token_count(db_engine), + num_nodes=graph_metrics["num_nodes"], + num_edges=graph_metrics["num_edges"], + mean_degree=graph_metrics["mean_degree"], + edge_density=graph_metrics["edge_density"], + num_connected_components=graph_metrics["num_connected_components"], + sizes_of_connected_components=graph_metrics["sizes_of_connected_components"], + num_selfloops=graph_metrics["num_selfloops"], + diameter=graph_metrics["diameter"], + avg_shortest_path_length=graph_metrics["avg_shortest_path_length"], + avg_clustering=graph_metrics["avg_clustering"], + ) - table_name = "graph_metrics_table" - metrics_dict = {"id": uuid.uuid4(), "num_tokens": token_count_sum} | graph_metrics + session.add(metrics) + await session.commit() - await db_engine.insert_data(table_name, metrics_dict) return data_points diff --git a/cognee/modules/data/models/GraphMetrics.py b/cognee/modules/data/models/GraphMetrics.py index 2103214c8..d86a2048b 100644 --- a/cognee/modules/data/models/GraphMetrics.py +++ b/cognee/modules/data/models/GraphMetrics.py @@ -7,7 +7,7 @@ from uuid import uuid4 class GraphMetrics(Base): - __tablename__ = "graph_metrics_table" + __tablename__ = "graph_metrics" # TODO: Change ID to reflect unique id of graph database id = Column(UUID, primary_key=True, default=uuid4) diff --git a/cognee/modules/data/models/__init__.py b/cognee/modules/data/models/__init__.py index bd5774f88..51d6ad1d5 100644 --- a/cognee/modules/data/models/__init__.py +++ b/cognee/modules/data/models/__init__.py @@ -1,3 +1,4 @@ from .Data import Data from .Dataset import Dataset from .DatasetData import DatasetData +from .GraphMetrics import GraphMetrics