From 8692cd13381204a98100fb296bf971ad14ea7a32 Mon Sep 17 00:00:00 2001 From: Daulet Amirkhanov Date: Wed, 15 Oct 2025 16:03:17 +0100 Subject: [PATCH 01/11] feat: add count_nodes and count_edges methods to GraphDBInterface --- .../infrastructure/databases/graph/graph_db_interface.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/cognee/infrastructure/databases/graph/graph_db_interface.py b/cognee/infrastructure/databases/graph/graph_db_interface.py index 65afdf275..abfdff784 100644 --- a/cognee/infrastructure/databases/graph/graph_db_interface.py +++ b/cognee/infrastructure/databases/graph/graph_db_interface.py @@ -159,6 +159,14 @@ class GraphDBInterface(ABC): - get_connections """ + @abstractmethod + async def count_nodes(self) -> int: + raise NotImplementedError + + @abstractmethod + async def count_edges(self) -> int: + raise NotImplementedError + @abstractmethod async def query(self, query: str, params: dict) -> List[Any]: """ From f3ec1801025eb5cc1c2dc899a8aa3eca02ae4165 Mon Sep 17 00:00:00 2001 From: Daulet Amirkhanov Date: Wed, 15 Oct 2025 16:39:25 +0100 Subject: [PATCH 02/11] Implement count_edges and count_methods for Kuzu --- .../databases/graph/kuzu/adapter.py | 16 ++++++++++ cognee/tests/test_kuzu.py | 29 ++++++++++++++++--- 2 files changed, 41 insertions(+), 4 deletions(-) diff --git a/cognee/infrastructure/databases/graph/kuzu/adapter.py b/cognee/infrastructure/databases/graph/kuzu/adapter.py index 7b772097f..a31726c9a 100644 --- a/cognee/infrastructure/databases/graph/kuzu/adapter.py +++ b/cognee/infrastructure/databases/graph/kuzu/adapter.py @@ -185,6 +185,22 @@ class KuzuAdapter(GraphDBInterface): except FileNotFoundError: logger.warning(f"Kuzu S3 storage file not found: {self.db_path}") + async def count_edges(self) -> int: + query = """ + MATCH ()-[r]->() + RETURN COUNT(r); + """ + query_result = await self.query(query) + return query_result[0][0] + + async def count_nodes(self) -> int: + query = """ + MATCH (n) + RETURN COUNT(n); + """ + query_result = await self.query(query) + return query_result[0][0] + async def query(self, query: str, params: Optional[dict] = None) -> List[Tuple]: """ Execute a Kuzu query asynchronously with automatic reconnection. diff --git a/cognee/tests/test_kuzu.py b/cognee/tests/test_kuzu.py index 8749e42d0..e39edd06a 100644 --- a/cognee/tests/test_kuzu.py +++ b/cognee/tests/test_kuzu.py @@ -47,10 +47,31 @@ async def main(): pathlib.Path(__file__).parent, "test_data/Quantum_computers.txt" ) + from cognee.infrastructure.databases.graph import get_graph_engine + + graph_engine = await get_graph_engine() + + edges_count = await graph_engine.count_edges() + nodes_count = await graph_engine.count_nodes() + + assert edges_count == 0 and nodes_count == 0, "Kuzu graph database is not empty" + await cognee.add([explanation_file_path_quantum], dataset_name) + edges_count = await graph_engine.count_edges() + nodes_count = await graph_engine.count_nodes() + + assert edges_count == 0 and nodes_count == 0, ( + "Kuzu graph database should be empty before cognify" + ) + await cognee.cognify([dataset_name]) + edges_count = await graph_engine.count_edges() + nodes_count = await graph_engine.count_nodes() + + assert edges_count != 0 and nodes_count != 0, "Kuzu graph database should not be empty" + from cognee.infrastructure.databases.vector import get_vector_engine vector_engine = get_vector_engine() @@ -114,11 +135,11 @@ async def main(): assert not os.path.isdir(data_root_directory), "Local data files are not deleted" await cognee.prune.prune_system(metadata=True) - from cognee.infrastructure.databases.graph import get_graph_engine - graph_engine = await get_graph_engine() - nodes, edges = await graph_engine.get_graph_data() - assert len(nodes) == 0 and len(edges) == 0, "Kuzu graph database is not empty" + edges_count = await graph_engine.count_edges() + nodes_count = await graph_engine.count_nodes() + + assert edges_count == 0 and nodes_count == 0, "Kuzu graph database is not empty" finally: # Ensure cleanup even if tests fail From 9367fa5d03f42e3a1feb4d7d0de61cd1bb547fd0 Mon Sep 17 00:00:00 2001 From: Daulet Amirkhanov Date: Wed, 15 Oct 2025 16:39:48 +0100 Subject: [PATCH 03/11] Prior to search, check if knowledge graph is empty --- cognee/api/v1/search/search.py | 12 +++++++++++- cognee/modules/data/exceptions/__init__.py | 1 + cognee/modules/data/exceptions/exceptions.py | 10 ++++++++++ 3 files changed, 22 insertions(+), 1 deletion(-) diff --git a/cognee/api/v1/search/search.py b/cognee/api/v1/search/search.py index 0a9e76e96..32035e612 100644 --- a/cognee/api/v1/search/search.py +++ b/cognee/api/v1/search/search.py @@ -1,13 +1,14 @@ from uuid import UUID from typing import Union, Optional, List, Type +from cognee.infrastructure.databases.graph import get_graph_engine from cognee.modules.engine.models.node_set import NodeSet from cognee.modules.users.models import User from cognee.modules.search.types import SearchResult, SearchType, CombinedSearchResult from cognee.modules.users.methods import get_default_user from cognee.modules.search.methods import search as search_function from cognee.modules.data.methods import get_authorized_existing_datasets -from cognee.modules.data.exceptions import DatasetNotFoundError +from cognee.modules.data.exceptions import DatasetNotFoundError, SearchOnEmptyGraphError async def search( @@ -175,6 +176,15 @@ async def search( if not datasets: raise DatasetNotFoundError(message="No datasets found.") + graph_engine = await get_graph_engine() + edges_count = await graph_engine.count_edges() + nodes_count = await graph_engine.count_nodes() + + if nodes_count == 0 or edges_count == 0: + raise SearchOnEmptyGraphError( + message="Knowledge graph is empty, please ensure data is added and cognified." + ) + filtered_search_results = await search_function( query_text=query_text, query_type=query_type, diff --git a/cognee/modules/data/exceptions/__init__.py b/cognee/modules/data/exceptions/__init__.py index 54af81070..ba943634d 100644 --- a/cognee/modules/data/exceptions/__init__.py +++ b/cognee/modules/data/exceptions/__init__.py @@ -9,4 +9,5 @@ from .exceptions import ( UnauthorizedDataAccessError, DatasetNotFoundError, DatasetTypeError, + SearchOnEmptyGraphError, ) diff --git a/cognee/modules/data/exceptions/exceptions.py b/cognee/modules/data/exceptions/exceptions.py index ac3b68e64..c2921750a 100644 --- a/cognee/modules/data/exceptions/exceptions.py +++ b/cognee/modules/data/exceptions/exceptions.py @@ -35,6 +35,16 @@ class DatasetNotFoundError(CogneeValidationError): super().__init__(message, name, status_code) +class SearchOnEmptyGraphError(CogneeValidationError): + def __init__( + self, + message: str = "Knowledge graph is empty, please ensure data is added and cognified.", + name: str = "SearchOnEmptyGraphError", + status_code=status.HTTP_400_BAD_REQUEST, + ): + super().__init__(message, name, status_code) + + class DatasetTypeError(CogneeValidationError): def __init__( self, From ea4a93efb172a82754a342084aa95393a0f11759 Mon Sep 17 00:00:00 2001 From: Daulet Amirkhanov Date: Wed, 15 Oct 2025 16:57:53 +0100 Subject: [PATCH 04/11] Implement count_nodes and count_edges methods for Neo4j --- .../databases/graph/neo4j_driver/adapter.py | 16 +++++++++++ cognee/tests/test_neo4j.py | 27 +++++++++++++++---- 2 files changed, 38 insertions(+), 5 deletions(-) diff --git a/cognee/infrastructure/databases/graph/neo4j_driver/adapter.py b/cognee/infrastructure/databases/graph/neo4j_driver/adapter.py index 520295ed2..a61ab6f0b 100644 --- a/cognee/infrastructure/databases/graph/neo4j_driver/adapter.py +++ b/cognee/infrastructure/databases/graph/neo4j_driver/adapter.py @@ -87,6 +87,22 @@ class Neo4jAdapter(GraphDBInterface): async with self.driver.session(database=self.graph_database_name) as session: yield session + async def count_edges(self) -> int: + query = """ + MATCH ()-[r]->() + RETURN COUNT(r) as total_edges; + """ + query_result = await self.query(query) + return query_result[0]["total_edges"] + + async def count_nodes(self) -> int: + query = """ + MATCH (n) + RETURN COUNT(n) as total_nodes; + """ + query_result = await self.query(query) + return query_result[0]["total_nodes"] + @deadlock_retry() async def query( self, diff --git a/cognee/tests/test_neo4j.py b/cognee/tests/test_neo4j.py index c74b4ab65..11f6156bd 100644 --- a/cognee/tests/test_neo4j.py +++ b/cognee/tests/test_neo4j.py @@ -35,6 +35,15 @@ async def main(): explanation_file_path_nlp = os.path.join( pathlib.Path(__file__).parent, "test_data/Natural_language_processing.txt" ) + from cognee.infrastructure.databases.graph import get_graph_engine + + graph_engine = await get_graph_engine() + + edges_count = await graph_engine.count_edges() + nodes_count = await graph_engine.count_nodes() + + assert edges_count == 0 and nodes_count == 0, "Graph has to be empty" + await cognee.add([explanation_file_path_nlp], dataset_name) explanation_file_path_quantum = os.path.join( @@ -43,8 +52,18 @@ async def main(): await cognee.add([explanation_file_path_quantum], dataset_name) + edges_count = await graph_engine.count_edges() + nodes_count = await graph_engine.count_nodes() + + assert edges_count == 0 and nodes_count == 0, "Graph has to be empty before cognify" + await cognee.cognify([dataset_name]) + edges_count = await graph_engine.count_edges() + nodes_count = await graph_engine.count_nodes() + + assert edges_count != 0 and nodes_count != 0, "Graph shouldn't be empty" + from cognee.infrastructure.databases.vector import get_vector_engine vector_engine = get_vector_engine() @@ -117,11 +136,9 @@ async def main(): assert not os.path.isdir(data_root_directory), "Local data files are not deleted" await cognee.prune.prune_system(metadata=True) - from cognee.infrastructure.databases.graph import get_graph_engine - - graph_engine = await get_graph_engine() - nodes, edges = await graph_engine.get_graph_data() - assert len(nodes) == 0 and len(edges) == 0, "Neo4j graph database is not empty" + edges_count = await graph_engine.count_edges() + nodes_count = await graph_engine.count_nodes() + assert nodes_count == 0 and edges_count == 0, "Neo4j graph database is not empty" if __name__ == "__main__": From dede5fa6fdc5c42e6ad36826c72f8c62d91eacae Mon Sep 17 00:00:00 2001 From: Daulet Amirkhanov Date: Wed, 15 Oct 2025 17:09:13 +0100 Subject: [PATCH 05/11] add unit tests for empty graph check on search --- cognee/tests/unit/api/test_search.py | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) create mode 100644 cognee/tests/unit/api/test_search.py diff --git a/cognee/tests/unit/api/test_search.py b/cognee/tests/unit/api/test_search.py new file mode 100644 index 000000000..aff9e5d38 --- /dev/null +++ b/cognee/tests/unit/api/test_search.py @@ -0,0 +1,23 @@ +import pytest +import cognee +from cognee.modules.data.exceptions import SearchOnEmptyGraphError + + +@pytest.mark.asyncio +async def test_empty_search_raises_SearchOnEmptyGraphError_on_empty_graph(): + await cognee.prune.prune_data() + await cognee.prune.prune_system(metadata=True) + await cognee.add("Sample input") + with pytest.raises(SearchOnEmptyGraphError): + await cognee.search("Sample query") + + +async def test_empty_search_doesnt_raise_SearchOnEmptyGraphError(): + await cognee.prune.prune_data() + await cognee.prune.prune_system(metadata=True) + await cognee.add("Sample input") + await cognee.cognify() + try: + await cognee.search("Sample query") + except SearchOnEmptyGraphError: + pytest.fail("Should not raise SearchOnEmptyGraphError when data was added and cognified") From 9e38a30c4945e1d5f3596550bd32ab26463cca03 Mon Sep 17 00:00:00 2001 From: Daulet Amirkhanov Date: Wed, 15 Oct 2025 17:20:45 +0100 Subject: [PATCH 06/11] refactor: keep only count_nodes --- cognee/api/v1/search/search.py | 3 +-- .../databases/graph/graph_db_interface.py | 4 ---- .../infrastructure/databases/graph/kuzu/adapter.py | 8 -------- .../databases/graph/neo4j_driver/adapter.py | 8 -------- cognee/tests/test_kuzu.py | 14 ++++---------- cognee/tests/test_neo4j.py | 13 ++++--------- examples/python/dynamic_steps_example.py | 2 +- 7 files changed, 10 insertions(+), 42 deletions(-) diff --git a/cognee/api/v1/search/search.py b/cognee/api/v1/search/search.py index 32035e612..880a57b99 100644 --- a/cognee/api/v1/search/search.py +++ b/cognee/api/v1/search/search.py @@ -177,10 +177,9 @@ async def search( raise DatasetNotFoundError(message="No datasets found.") graph_engine = await get_graph_engine() - edges_count = await graph_engine.count_edges() nodes_count = await graph_engine.count_nodes() - if nodes_count == 0 or edges_count == 0: + if nodes_count == 0: raise SearchOnEmptyGraphError( message="Knowledge graph is empty, please ensure data is added and cognified." ) diff --git a/cognee/infrastructure/databases/graph/graph_db_interface.py b/cognee/infrastructure/databases/graph/graph_db_interface.py index abfdff784..a4542cefe 100644 --- a/cognee/infrastructure/databases/graph/graph_db_interface.py +++ b/cognee/infrastructure/databases/graph/graph_db_interface.py @@ -163,10 +163,6 @@ class GraphDBInterface(ABC): async def count_nodes(self) -> int: raise NotImplementedError - @abstractmethod - async def count_edges(self) -> int: - raise NotImplementedError - @abstractmethod async def query(self, query: str, params: dict) -> List[Any]: """ diff --git a/cognee/infrastructure/databases/graph/kuzu/adapter.py b/cognee/infrastructure/databases/graph/kuzu/adapter.py index a31726c9a..04c163efa 100644 --- a/cognee/infrastructure/databases/graph/kuzu/adapter.py +++ b/cognee/infrastructure/databases/graph/kuzu/adapter.py @@ -185,14 +185,6 @@ class KuzuAdapter(GraphDBInterface): except FileNotFoundError: logger.warning(f"Kuzu S3 storage file not found: {self.db_path}") - async def count_edges(self) -> int: - query = """ - MATCH ()-[r]->() - RETURN COUNT(r); - """ - query_result = await self.query(query) - return query_result[0][0] - async def count_nodes(self) -> int: query = """ MATCH (n) diff --git a/cognee/infrastructure/databases/graph/neo4j_driver/adapter.py b/cognee/infrastructure/databases/graph/neo4j_driver/adapter.py index a61ab6f0b..ac19069f4 100644 --- a/cognee/infrastructure/databases/graph/neo4j_driver/adapter.py +++ b/cognee/infrastructure/databases/graph/neo4j_driver/adapter.py @@ -87,14 +87,6 @@ class Neo4jAdapter(GraphDBInterface): async with self.driver.session(database=self.graph_database_name) as session: yield session - async def count_edges(self) -> int: - query = """ - MATCH ()-[r]->() - RETURN COUNT(r) as total_edges; - """ - query_result = await self.query(query) - return query_result[0]["total_edges"] - async def count_nodes(self) -> int: query = """ MATCH (n) diff --git a/cognee/tests/test_kuzu.py b/cognee/tests/test_kuzu.py index e39edd06a..c07a51104 100644 --- a/cognee/tests/test_kuzu.py +++ b/cognee/tests/test_kuzu.py @@ -51,26 +51,21 @@ async def main(): graph_engine = await get_graph_engine() - edges_count = await graph_engine.count_edges() nodes_count = await graph_engine.count_nodes() - assert edges_count == 0 and nodes_count == 0, "Kuzu graph database is not empty" + assert nodes_count == 0, "Kuzu graph database is not empty" await cognee.add([explanation_file_path_quantum], dataset_name) - edges_count = await graph_engine.count_edges() nodes_count = await graph_engine.count_nodes() - assert edges_count == 0 and nodes_count == 0, ( - "Kuzu graph database should be empty before cognify" - ) + assert nodes_count == 0, "Kuzu graph database should be empty before cognify" await cognee.cognify([dataset_name]) - edges_count = await graph_engine.count_edges() nodes_count = await graph_engine.count_nodes() - assert edges_count != 0 and nodes_count != 0, "Kuzu graph database should not be empty" + assert nodes_count != 0, "Kuzu graph database should not be empty" from cognee.infrastructure.databases.vector import get_vector_engine @@ -136,10 +131,9 @@ async def main(): await cognee.prune.prune_system(metadata=True) - edges_count = await graph_engine.count_edges() nodes_count = await graph_engine.count_nodes() - assert edges_count == 0 and nodes_count == 0, "Kuzu graph database is not empty" + assert nodes_count == 0, "Kuzu graph database is not empty" finally: # Ensure cleanup even if tests fail diff --git a/cognee/tests/test_neo4j.py b/cognee/tests/test_neo4j.py index 11f6156bd..6f1fcf975 100644 --- a/cognee/tests/test_neo4j.py +++ b/cognee/tests/test_neo4j.py @@ -39,10 +39,9 @@ async def main(): graph_engine = await get_graph_engine() - edges_count = await graph_engine.count_edges() nodes_count = await graph_engine.count_nodes() - assert edges_count == 0 and nodes_count == 0, "Graph has to be empty" + assert nodes_count == 0, "Graph has to be empty" await cognee.add([explanation_file_path_nlp], dataset_name) @@ -51,18 +50,15 @@ async def main(): ) await cognee.add([explanation_file_path_quantum], dataset_name) - - edges_count = await graph_engine.count_edges() nodes_count = await graph_engine.count_nodes() - assert edges_count == 0 and nodes_count == 0, "Graph has to be empty before cognify" + assert nodes_count == 0, "Graph has to be empty before cognify" await cognee.cognify([dataset_name]) - edges_count = await graph_engine.count_edges() nodes_count = await graph_engine.count_nodes() - assert edges_count != 0 and nodes_count != 0, "Graph shouldn't be empty" + assert nodes_count != 0, "Graph shouldn't be empty" from cognee.infrastructure.databases.vector import get_vector_engine @@ -136,9 +132,8 @@ async def main(): assert not os.path.isdir(data_root_directory), "Local data files are not deleted" await cognee.prune.prune_system(metadata=True) - edges_count = await graph_engine.count_edges() nodes_count = await graph_engine.count_nodes() - assert nodes_count == 0 and edges_count == 0, "Neo4j graph database is not empty" + assert nodes_count == 0, "Neo4j graph database is not empty" if __name__ == "__main__": diff --git a/examples/python/dynamic_steps_example.py b/examples/python/dynamic_steps_example.py index bce2ea8be..5ff68cecc 100644 --- a/examples/python/dynamic_steps_example.py +++ b/examples/python/dynamic_steps_example.py @@ -199,7 +199,7 @@ if __name__ == "__main__": "prune_data": rebuild_kg, "prune_system": rebuild_kg, "add_text": rebuild_kg, - "cognify": rebuild_kg, + "cognify": False, "graph_metrics": rebuild_kg, "retriever": retrieve, } From a854e4f42689d7c7fb567c6e4b62443fbb818b19 Mon Sep 17 00:00:00 2001 From: Daulet Amirkhanov Date: Wed, 15 Oct 2025 17:22:51 +0100 Subject: [PATCH 07/11] chore: update GraphDBInterface to not throw NotImplementedError for count_nodes() --- cognee/infrastructure/databases/graph/graph_db_interface.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/cognee/infrastructure/databases/graph/graph_db_interface.py b/cognee/infrastructure/databases/graph/graph_db_interface.py index a4542cefe..d7542eac6 100644 --- a/cognee/infrastructure/databases/graph/graph_db_interface.py +++ b/cognee/infrastructure/databases/graph/graph_db_interface.py @@ -161,7 +161,8 @@ class GraphDBInterface(ABC): @abstractmethod async def count_nodes(self) -> int: - raise NotImplementedError + logger.warning("count_nodes is not implemented") + return 1 # dummy value to not fail search() @abstractmethod async def query(self, query: str, params: dict) -> List[Any]: From 2a6256634e2829a63e11f2c5de9f7d0ad7dac44f Mon Sep 17 00:00:00 2001 From: Daulet Amirkhanov Date: Wed, 15 Oct 2025 17:35:46 +0100 Subject: [PATCH 08/11] chore: revert temporary change to dynamic_steps_example.py --- examples/python/dynamic_steps_example.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/python/dynamic_steps_example.py b/examples/python/dynamic_steps_example.py index 5ff68cecc..bce2ea8be 100644 --- a/examples/python/dynamic_steps_example.py +++ b/examples/python/dynamic_steps_example.py @@ -199,7 +199,7 @@ if __name__ == "__main__": "prune_data": rebuild_kg, "prune_system": rebuild_kg, "add_text": rebuild_kg, - "cognify": False, + "cognify": rebuild_kg, "graph_metrics": rebuild_kg, "retriever": retrieve, } From 3ee50c192f0b3469858e5caf4992e8cfd8901d36 Mon Sep 17 00:00:00 2001 From: Daulet Amirkhanov Date: Fri, 17 Oct 2025 12:01:06 +0100 Subject: [PATCH 09/11] refactor emptiness check to be boolean, and optimize query --- cognee/api/v1/search/search.py | 10 ++++------ .../databases/graph/graph_db_interface.py | 6 +++--- .../databases/graph/kuzu/adapter.py | 7 ++++--- .../databases/graph/neo4j_driver/adapter.py | 7 ++++--- cognee/modules/data/exceptions/__init__.py | 1 - cognee/modules/data/exceptions/exceptions.py | 10 ---------- cognee/tests/test_kuzu.py | 16 ++++++++-------- cognee/tests/test_neo4j.py | 16 ++++++++-------- 8 files changed, 31 insertions(+), 42 deletions(-) diff --git a/cognee/api/v1/search/search.py b/cognee/api/v1/search/search.py index 880a57b99..0caca619a 100644 --- a/cognee/api/v1/search/search.py +++ b/cognee/api/v1/search/search.py @@ -8,7 +8,7 @@ from cognee.modules.search.types import SearchResult, SearchType, CombinedSearch from cognee.modules.users.methods import get_default_user from cognee.modules.search.methods import search as search_function from cognee.modules.data.methods import get_authorized_existing_datasets -from cognee.modules.data.exceptions import DatasetNotFoundError, SearchOnEmptyGraphError +from cognee.modules.data.exceptions import DatasetNotFoundError async def search( @@ -177,12 +177,10 @@ async def search( raise DatasetNotFoundError(message="No datasets found.") graph_engine = await get_graph_engine() - nodes_count = await graph_engine.count_nodes() + is_empty = await graph_engine.is_empty() - if nodes_count == 0: - raise SearchOnEmptyGraphError( - message="Knowledge graph is empty, please ensure data is added and cognified." - ) + if is_empty: + return [] filtered_search_results = await search_function( query_text=query_text, diff --git a/cognee/infrastructure/databases/graph/graph_db_interface.py b/cognee/infrastructure/databases/graph/graph_db_interface.py index d7542eac6..67df1a27c 100644 --- a/cognee/infrastructure/databases/graph/graph_db_interface.py +++ b/cognee/infrastructure/databases/graph/graph_db_interface.py @@ -160,9 +160,9 @@ class GraphDBInterface(ABC): """ @abstractmethod - async def count_nodes(self) -> int: - logger.warning("count_nodes is not implemented") - return 1 # dummy value to not fail search() + async def is_empty(self) -> bool: + logger.warning("is_empty() is not implemented") + return True @abstractmethod async def query(self, query: str, params: dict) -> List[Any]: diff --git a/cognee/infrastructure/databases/graph/kuzu/adapter.py b/cognee/infrastructure/databases/graph/kuzu/adapter.py index 04c163efa..29ff92247 100644 --- a/cognee/infrastructure/databases/graph/kuzu/adapter.py +++ b/cognee/infrastructure/databases/graph/kuzu/adapter.py @@ -185,13 +185,14 @@ class KuzuAdapter(GraphDBInterface): except FileNotFoundError: logger.warning(f"Kuzu S3 storage file not found: {self.db_path}") - async def count_nodes(self) -> int: + async def is_empty(self) -> bool: query = """ MATCH (n) - RETURN COUNT(n); + RETURN true + LIMIT 1; """ query_result = await self.query(query) - return query_result[0][0] + return len(query_result) == 0 async def query(self, query: str, params: Optional[dict] = None) -> List[Tuple]: """ diff --git a/cognee/infrastructure/databases/graph/neo4j_driver/adapter.py b/cognee/infrastructure/databases/graph/neo4j_driver/adapter.py index ac19069f4..5861b69cb 100644 --- a/cognee/infrastructure/databases/graph/neo4j_driver/adapter.py +++ b/cognee/infrastructure/databases/graph/neo4j_driver/adapter.py @@ -87,13 +87,14 @@ class Neo4jAdapter(GraphDBInterface): async with self.driver.session(database=self.graph_database_name) as session: yield session - async def count_nodes(self) -> int: + async def is_empty(self) -> bool: query = """ + RETURN EXISTS { MATCH (n) - RETURN COUNT(n) as total_nodes; + } AS node_exists; """ query_result = await self.query(query) - return query_result[0]["total_nodes"] + return not query_result[0]["node_exists"] @deadlock_retry() async def query( diff --git a/cognee/modules/data/exceptions/__init__.py b/cognee/modules/data/exceptions/__init__.py index ba943634d..54af81070 100644 --- a/cognee/modules/data/exceptions/__init__.py +++ b/cognee/modules/data/exceptions/__init__.py @@ -9,5 +9,4 @@ from .exceptions import ( UnauthorizedDataAccessError, DatasetNotFoundError, DatasetTypeError, - SearchOnEmptyGraphError, ) diff --git a/cognee/modules/data/exceptions/exceptions.py b/cognee/modules/data/exceptions/exceptions.py index c2921750a..ac3b68e64 100644 --- a/cognee/modules/data/exceptions/exceptions.py +++ b/cognee/modules/data/exceptions/exceptions.py @@ -35,16 +35,6 @@ class DatasetNotFoundError(CogneeValidationError): super().__init__(message, name, status_code) -class SearchOnEmptyGraphError(CogneeValidationError): - def __init__( - self, - message: str = "Knowledge graph is empty, please ensure data is added and cognified.", - name: str = "SearchOnEmptyGraphError", - status_code=status.HTTP_400_BAD_REQUEST, - ): - super().__init__(message, name, status_code) - - class DatasetTypeError(CogneeValidationError): def __init__( self, diff --git a/cognee/tests/test_kuzu.py b/cognee/tests/test_kuzu.py index c07a51104..fe9da6dcb 100644 --- a/cognee/tests/test_kuzu.py +++ b/cognee/tests/test_kuzu.py @@ -51,21 +51,21 @@ async def main(): graph_engine = await get_graph_engine() - nodes_count = await graph_engine.count_nodes() + is_empty = await graph_engine.is_empty() - assert nodes_count == 0, "Kuzu graph database is not empty" + assert is_empty, "Kuzu graph database is not empty" await cognee.add([explanation_file_path_quantum], dataset_name) - nodes_count = await graph_engine.count_nodes() + is_empty = await graph_engine.is_empty() - assert nodes_count == 0, "Kuzu graph database should be empty before cognify" + assert is_empty, "Kuzu graph database should be empty before cognify" await cognee.cognify([dataset_name]) - nodes_count = await graph_engine.count_nodes() + is_empty = await graph_engine.is_empty() - assert nodes_count != 0, "Kuzu graph database should not be empty" + assert not is_empty, "Kuzu graph database should not be empty" from cognee.infrastructure.databases.vector import get_vector_engine @@ -131,9 +131,9 @@ async def main(): await cognee.prune.prune_system(metadata=True) - nodes_count = await graph_engine.count_nodes() + is_empty = await graph_engine.is_empty() - assert nodes_count == 0, "Kuzu graph database is not empty" + assert is_empty, "Kuzu graph database is not empty" finally: # Ensure cleanup even if tests fail diff --git a/cognee/tests/test_neo4j.py b/cognee/tests/test_neo4j.py index 6f1fcf975..925614e67 100644 --- a/cognee/tests/test_neo4j.py +++ b/cognee/tests/test_neo4j.py @@ -39,9 +39,9 @@ async def main(): graph_engine = await get_graph_engine() - nodes_count = await graph_engine.count_nodes() + is_empty = await graph_engine.is_empty() - assert nodes_count == 0, "Graph has to be empty" + assert is_empty, "Graph has to be empty" await cognee.add([explanation_file_path_nlp], dataset_name) @@ -50,15 +50,15 @@ async def main(): ) await cognee.add([explanation_file_path_quantum], dataset_name) - nodes_count = await graph_engine.count_nodes() + is_empty = await graph_engine.is_empty() - assert nodes_count == 0, "Graph has to be empty before cognify" + assert is_empty, "Graph has to be empty before cognify" await cognee.cognify([dataset_name]) - nodes_count = await graph_engine.count_nodes() + is_empty = await graph_engine.is_empty() - assert nodes_count != 0, "Graph shouldn't be empty" + assert not is_empty, "Graph shouldn't be empty" from cognee.infrastructure.databases.vector import get_vector_engine @@ -132,8 +132,8 @@ async def main(): assert not os.path.isdir(data_root_directory), "Local data files are not deleted" await cognee.prune.prune_system(metadata=True) - nodes_count = await graph_engine.count_nodes() - assert nodes_count == 0, "Neo4j graph database is not empty" + is_empty = await graph_engine.is_empty() + assert is_empty, "Neo4j graph database is not empty" if __name__ == "__main__": From c313fcd02924eff3a08a8129b3b3b14f93f67ca0 Mon Sep 17 00:00:00 2001 From: Daulet Amirkhanov Date: Fri, 17 Oct 2025 12:06:35 +0100 Subject: [PATCH 10/11] log warning on attempts to search on an empty knowledge graph --- cognee/api/v1/search/search.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/cognee/api/v1/search/search.py b/cognee/api/v1/search/search.py index 0caca619a..9f158e9d0 100644 --- a/cognee/api/v1/search/search.py +++ b/cognee/api/v1/search/search.py @@ -9,6 +9,9 @@ from cognee.modules.users.methods import get_default_user from cognee.modules.search.methods import search as search_function from cognee.modules.data.methods import get_authorized_existing_datasets from cognee.modules.data.exceptions import DatasetNotFoundError +from cognee.shared.logging_utils import get_logger + +logger = get_logger() async def search( @@ -180,6 +183,7 @@ async def search( is_empty = await graph_engine.is_empty() if is_empty: + logger.warning("Search attempt on an empty knowledge graph") return [] filtered_search_results = await search_function( From 4e2a7778600bcea3992dbec4466939022e9b53c8 Mon Sep 17 00:00:00 2001 From: Daulet Amirkhanov Date: Fri, 17 Oct 2025 14:18:44 +0100 Subject: [PATCH 11/11] tests: update tests after last refactoring --- cognee/tests/unit/api/test_search.py | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/cognee/tests/unit/api/test_search.py b/cognee/tests/unit/api/test_search.py index aff9e5d38..54a4cc35f 100644 --- a/cognee/tests/unit/api/test_search.py +++ b/cognee/tests/unit/api/test_search.py @@ -1,6 +1,5 @@ import pytest import cognee -from cognee.modules.data.exceptions import SearchOnEmptyGraphError @pytest.mark.asyncio @@ -8,16 +7,15 @@ async def test_empty_search_raises_SearchOnEmptyGraphError_on_empty_graph(): await cognee.prune.prune_data() await cognee.prune.prune_system(metadata=True) await cognee.add("Sample input") - with pytest.raises(SearchOnEmptyGraphError): - await cognee.search("Sample query") + result = await cognee.search("Sample query") + assert result == [] +@pytest.mark.asyncio async def test_empty_search_doesnt_raise_SearchOnEmptyGraphError(): await cognee.prune.prune_data() await cognee.prune.prune_system(metadata=True) await cognee.add("Sample input") await cognee.cognify() - try: - await cognee.search("Sample query") - except SearchOnEmptyGraphError: - pytest.fail("Should not raise SearchOnEmptyGraphError when data was added and cognified") + result = await cognee.search("Sample query") + assert result != []