diff --git a/cognee/api/v1/datasets/datasets.py b/cognee/api/v1/datasets/datasets.py index 3534132e0..7088531bc 100644 --- a/cognee/api/v1/datasets/datasets.py +++ b/cognee/api/v1/datasets/datasets.py @@ -1,4 +1,5 @@ from uuid import UUID +from cognee.modules.data.methods import has_dataset_data from cognee.modules.users.methods import get_default_user from cognee.modules.ingestion import discover_directory_datasets from cognee.modules.pipelines.operations.get_pipeline_status import get_pipeline_status @@ -26,6 +27,16 @@ class datasets: return await get_dataset_data(dataset.id) + @staticmethod + async def has_data(dataset_id: str) -> bool: + from cognee.modules.data.methods import get_dataset + + user = await get_default_user() + + dataset = await get_dataset(user.id, dataset_id) + + return await has_dataset_data(dataset.id) + @staticmethod async def get_status(dataset_ids: list[UUID]) -> dict: return await get_pipeline_status(dataset_ids, pipeline_name="cognify_pipeline") diff --git a/cognee/modules/data/methods/__init__.py b/cognee/modules/data/methods/__init__.py index d9716de95..83913085c 100644 --- a/cognee/modules/data/methods/__init__.py +++ b/cognee/modules/data/methods/__init__.py @@ -23,3 +23,6 @@ from .create_authorized_dataset import create_authorized_dataset # Check from .check_dataset_name import check_dataset_name + +# Boolean check +from .has_dataset_data import has_dataset_data diff --git a/cognee/modules/data/methods/has_dataset_data.py b/cognee/modules/data/methods/has_dataset_data.py new file mode 100644 index 000000000..473da1e07 --- /dev/null +++ b/cognee/modules/data/methods/has_dataset_data.py @@ -0,0 +1,21 @@ +from uuid import UUID + +from sqlalchemy import select +from sqlalchemy.sql import func + +from cognee.infrastructure.databases.relational import get_relational_engine +from cognee.modules.data.models import DatasetData + + +async def has_dataset_data(dataset_id: UUID) -> bool: + db_engine = get_relational_engine() + + async with db_engine.get_async_session() as session: + count_query = ( + select(func.count()) + .select_from(DatasetData) + .where(DatasetData.dataset_id == dataset_id) + ) + count = await session.execute(count_query) + + return count.scalar_one() > 0 diff --git a/cognee/modules/search/methods/search.py b/cognee/modules/search/methods/search.py index aeab21a02..29f50119c 100644 --- a/cognee/modules/search/methods/search.py +++ b/cognee/modules/search/methods/search.py @@ -338,7 +338,19 @@ async def search_in_datasets_context( if is_empty: # TODO: we can log here, but not all search types use graph. Still keeping this here for reviewer input - logger.warning("Search attempt on an empty knowledge graph") + from cognee.modules.data.methods import get_dataset_data + + dataset_data = await get_dataset_data(dataset.id) + + if len(dataset_data) > 0: + logger.warning( + f"Dataset '{dataset.name}' has {len(dataset_data)} data item(s) but the knowledge graph is empty. " + "Please run cognify to process the data before searching." + ) + else: + logger.warning( + "Search attempt on an empty knowledge graph - no data has been added to this dataset" + ) specific_search_tools = await get_search_type_tools( query_type=query_type,