cognee/cognee/tasks/storage/descriptive_metrics.py
alekszievr a79f7133fd
Feat: add number of tokens and descriptive graph metrics to metric table [COG-1132] (#481)
* Count the number of tokens in documents

* save token count to relational db

* Add metrics to metric table

* Store list as json instead of array in relational db table

* Sum in sql instead of python

* Unify naming

* Return data_points in descriptive metric calculation task

---------

Co-authored-by: Igor Ilic <30923996+dexters1@users.noreply.github.com>
2025-01-30 12:39:14 +01:00

48 lines
1.5 KiB
Python

from cognee.infrastructure.engine import DataPoint
from cognee.modules.data.processing.document_types import Document
from cognee.infrastructure.databases.relational import get_relational_engine
from sqlalchemy import select
from sqlalchemy.sql import func
from cognee.modules.data.models import Data
from cognee.modules.data.models import GraphMetrics
import uuid
from cognee.infrastructure.databases.graph import get_graph_engine
async def fetch_token_count(db_engine) -> int:
"""
Fetches and sums token counts from the database.
Returns:
int: The total number of tokens across all documents.
"""
async with db_engine.get_async_session() as session:
token_count_sum = await session.execute(select(func.sum(Data.token_count)))
token_count_sum = token_count_sum.scalar()
return token_count_sum
async def calculate_graph_metrics(graph_data):
nodes, edges = graph_data
graph_metrics = {
"num_nodes": len(nodes),
"num_edges": len(edges),
}
return graph_metrics
async def store_descriptive_metrics(data_points: list[DataPoint]):
db_engine = get_relational_engine()
graph_engine = await get_graph_engine()
graph_data = await graph_engine.get_graph_data()
token_count_sum = await fetch_token_count(db_engine)
graph_metrics = await calculate_graph_metrics(graph_data)
table_name = "graph_metrics_table"
metrics_dict = {"id": uuid.uuid4(), "num_tokens": token_count_sum} | graph_metrics
await db_engine.insert_data(table_name, metrics_dict)
return data_points