Add hint log for when data is added but not cognified

This commit is contained in:
Daulet Amirkhanov 2025-10-20 23:01:14 +01:00
parent ee7db762e6
commit e4cbbcbf51
4 changed files with 48 additions and 1 deletions

View file

@ -1,4 +1,5 @@
from uuid import UUID
from cognee.modules.data.methods import has_dataset_data
from cognee.modules.users.methods import get_default_user
from cognee.modules.ingestion import discover_directory_datasets
from cognee.modules.pipelines.operations.get_pipeline_status import get_pipeline_status
@ -26,6 +27,16 @@ class datasets:
return await get_dataset_data(dataset.id)
@staticmethod
async def has_data(dataset_id: str) -> bool:
from cognee.modules.data.methods import get_dataset
user = await get_default_user()
dataset = await get_dataset(user.id, dataset_id)
return await has_dataset_data(dataset.id)
@staticmethod
async def get_status(dataset_ids: list[UUID]) -> dict:
return await get_pipeline_status(dataset_ids, pipeline_name="cognify_pipeline")

View file

@ -23,3 +23,6 @@ from .create_authorized_dataset import create_authorized_dataset
# Check
from .check_dataset_name import check_dataset_name
# Boolean check
from .has_dataset_data import has_dataset_data

View file

@ -0,0 +1,21 @@
from uuid import UUID
from sqlalchemy import select
from sqlalchemy.sql import func
from cognee.infrastructure.databases.relational import get_relational_engine
from cognee.modules.data.models import DatasetData
async def has_dataset_data(dataset_id: UUID) -> bool:
db_engine = get_relational_engine()
async with db_engine.get_async_session() as session:
count_query = (
select(func.count())
.select_from(DatasetData)
.where(DatasetData.dataset_id == dataset_id)
)
count = await session.execute(count_query)
return count.scalar_one() > 0

View file

@ -338,7 +338,19 @@ async def search_in_datasets_context(
if is_empty:
# TODO: we can log here, but not all search types use graph. Still keeping this here for reviewer input
logger.warning("Search attempt on an empty knowledge graph")
from cognee.modules.data.methods import get_dataset_data
dataset_data = await get_dataset_data(dataset.id)
if len(dataset_data) > 0:
logger.warning(
f"Dataset '{dataset.name}' has {len(dataset_data)} data item(s) but the knowledge graph is empty. "
"Please run cognify to process the data before searching."
)
else:
logger.warning(
"Search attempt on an empty knowledge graph - no data has been added to this dataset"
)
specific_search_tools = await get_search_type_tools(
query_type=query_type,