diff --git a/cognee/api/v1/cognify/cognify_v2.py b/cognee/api/v1/cognify/cognify_v2.py index c727230d7..55792f2da 100644 --- a/cognee/api/v1/cognify/cognify_v2.py +++ b/cognee/api/v1/cognify/cognify_v2.py @@ -165,7 +165,7 @@ async def get_default_tasks( task_config={"batch_size": 10}, ), Task(add_data_points, task_config={"batch_size": 10}), - Task(store_descriptive_metrics), + Task(store_descriptive_metrics, include_optional=True), ] except Exception as error: send_telemetry("cognee.cognify DEFAULT TASKS CREATION ERRORED", user.id) diff --git a/cognee/infrastructure/databases/graph/graph_db_interface.py b/cognee/infrastructure/databases/graph/graph_db_interface.py index dfb955cd7..345261438 100644 --- a/cognee/infrastructure/databases/graph/graph_db_interface.py +++ b/cognee/infrastructure/databases/graph/graph_db_interface.py @@ -56,5 +56,5 @@ class GraphDBInterface(Protocol): raise NotImplementedError @abstractmethod - async def get_graph_metrics(self): + async def get_graph_metrics(self, include_optional): raise NotImplementedError diff --git a/cognee/infrastructure/databases/graph/neo4j_driver/adapter.py b/cognee/infrastructure/databases/graph/neo4j_driver/adapter.py index 4f6f1180c..f2685e73d 100644 --- a/cognee/infrastructure/databases/graph/neo4j_driver/adapter.py +++ b/cognee/infrastructure/databases/graph/neo4j_driver/adapter.py @@ -531,7 +531,7 @@ class Neo4jAdapter(GraphDBInterface): return (nodes, edges) - async def get_graph_metrics(self): + async def get_graph_metrics(self, include_optional=False): return { "num_nodes": -1, "num_edges": -1, diff --git a/cognee/infrastructure/databases/graph/networkx/adapter.py b/cognee/infrastructure/databases/graph/networkx/adapter.py index 018799a08..89dcca021 100644 --- a/cognee/infrastructure/databases/graph/networkx/adapter.py +++ b/cognee/infrastructure/databases/graph/networkx/adapter.py @@ -14,6 +14,7 @@ import networkx as nx from cognee.infrastructure.databases.graph.graph_db_interface import GraphDBInterface from cognee.infrastructure.engine import DataPoint from cognee.modules.storage.utils import JSONEncoder +import numpy as np logger = logging.getLogger("NetworkXAdapter") @@ -386,16 +387,63 @@ class NetworkXAdapter(GraphDBInterface): return filtered_nodes, filtered_edges - async def get_graph_metrics(self): - return { - "num_nodes": -1, - "num_edges": -1, - "mean_degree": -1, - "edge_density": -1, - "num_connected_components": -1, - "sizes_of_connected_components": -1, - "num_selfloops": -1, - "diameter": -1, - "avg_shortest_path_length": -1, - "avg_clustering": -1, + async def get_graph_metrics(self, include_optional=False): + graph = self.graph + + def _get_mean_degree(graph): + degrees = [d for _, d in graph.degree()] + return np.mean(degrees) if degrees else 0 + + def _get_edge_density(graph): + num_nodes = graph.number_of_nodes() + num_edges = graph.number_of_edges() + num_possible_edges = num_nodes * (num_nodes - 1) + edge_density = num_edges / num_possible_edges if num_possible_edges > 0 else 0 + return edge_density + + def _get_diameter(graph): + if nx.is_strongly_connected(graph): + return nx.diameter(graph.to_undirected()) + else: + return None + + def _get_avg_shortest_path_length(graph): + if nx.is_strongly_connected(graph): + return nx.average_shortest_path_length(graph) + else: + return None + + def _get_avg_clustering(graph): + try: + return nx.average_clustering(nx.DiGraph(graph)) + except Exception as e: + logger.warning("Failed to calculate clustering coefficient: %s", e) + return None + + mandatory_metrics = { + "num_nodes": graph.number_of_nodes(), + "num_edges": graph.number_of_edges(), + "mean_degree": _get_mean_degree(graph), + "edge_density": _get_edge_density(graph), + "num_connected_components": nx.number_weakly_connected_components(graph), + "sizes_of_connected_components": [ + len(c) for c in nx.weakly_connected_components(graph) + ], } + + if include_optional: + optional_metrics = { + "num_selfloops": sum(1 for u, v in graph.edges() if u == v), + "diameter": _get_diameter(graph), + "avg_shortest_path_length": _get_avg_shortest_path_length(graph), + "avg_clustering": _get_avg_clustering(graph), + } + else: + optional_metrics = { + "num_selfloops": -1, + "diameter": -1, + "avg_shortest_path_length": -1, + "avg_clustering": -1, + } + + return mandatory_metrics | optional_metrics diff --git a/cognee/modules/data/methods/store_descriptive_metrics.py b/cognee/modules/data/methods/store_descriptive_metrics.py index 8e5c3b598..c39571d0a 100644 --- a/cognee/modules/data/methods/store_descriptive_metrics.py +++ b/cognee/modules/data/methods/store_descriptive_metrics.py @@ -23,10 +23,10 @@ async def fetch_token_count(db_engine) -> int: return token_count_sum -async def store_descriptive_metrics(data_points: list[DataPoint]): +async def store_descriptive_metrics(data_points: list[DataPoint], include_optional: bool): db_engine = get_relational_engine() graph_engine = await get_graph_engine() - graph_metrics = await graph_engine.get_graph_metrics() + graph_metrics = await graph_engine.get_graph_metrics(include_optional) async with db_engine.get_async_session() as session: metrics = GraphMetrics( diff --git a/cognee/modules/data/models/GraphMetrics.py b/cognee/modules/data/models/GraphMetrics.py index d86a2048b..5ce0d7ded 100644 --- a/cognee/modules/data/models/GraphMetrics.py +++ b/cognee/modules/data/models/GraphMetrics.py @@ -1,4 +1,5 @@ from datetime import datetime, timezone +from sqlalchemy.sql import func from sqlalchemy import Column, DateTime, Float, Integer, JSON, UUID