refactor emptiness check to be boolean, and optimize query

This commit is contained in:
Daulet Amirkhanov 2025-10-17 12:01:06 +01:00
parent 2a6256634e
commit 3ee50c192f
8 changed files with 31 additions and 42 deletions

View file

@ -8,7 +8,7 @@ from cognee.modules.search.types import SearchResult, SearchType, CombinedSearch
from cognee.modules.users.methods import get_default_user from cognee.modules.users.methods import get_default_user
from cognee.modules.search.methods import search as search_function from cognee.modules.search.methods import search as search_function
from cognee.modules.data.methods import get_authorized_existing_datasets from cognee.modules.data.methods import get_authorized_existing_datasets
from cognee.modules.data.exceptions import DatasetNotFoundError, SearchOnEmptyGraphError from cognee.modules.data.exceptions import DatasetNotFoundError
async def search( async def search(
@ -177,12 +177,10 @@ async def search(
raise DatasetNotFoundError(message="No datasets found.") raise DatasetNotFoundError(message="No datasets found.")
graph_engine = await get_graph_engine() graph_engine = await get_graph_engine()
nodes_count = await graph_engine.count_nodes() is_empty = await graph_engine.is_empty()
if nodes_count == 0: if is_empty:
raise SearchOnEmptyGraphError( return []
message="Knowledge graph is empty, please ensure data is added and cognified."
)
filtered_search_results = await search_function( filtered_search_results = await search_function(
query_text=query_text, query_text=query_text,

View file

@ -160,9 +160,9 @@ class GraphDBInterface(ABC):
""" """
@abstractmethod @abstractmethod
async def count_nodes(self) -> int: async def is_empty(self) -> bool:
logger.warning("count_nodes is not implemented") logger.warning("is_empty() is not implemented")
return 1 # dummy value to not fail search() return True
@abstractmethod @abstractmethod
async def query(self, query: str, params: dict) -> List[Any]: async def query(self, query: str, params: dict) -> List[Any]:

View file

@ -185,13 +185,14 @@ class KuzuAdapter(GraphDBInterface):
except FileNotFoundError: except FileNotFoundError:
logger.warning(f"Kuzu S3 storage file not found: {self.db_path}") logger.warning(f"Kuzu S3 storage file not found: {self.db_path}")
async def count_nodes(self) -> int: async def is_empty(self) -> bool:
query = """ query = """
MATCH (n) MATCH (n)
RETURN COUNT(n); RETURN true
LIMIT 1;
""" """
query_result = await self.query(query) query_result = await self.query(query)
return query_result[0][0] return len(query_result) == 0
async def query(self, query: str, params: Optional[dict] = None) -> List[Tuple]: async def query(self, query: str, params: Optional[dict] = None) -> List[Tuple]:
""" """

View file

@ -87,13 +87,14 @@ class Neo4jAdapter(GraphDBInterface):
async with self.driver.session(database=self.graph_database_name) as session: async with self.driver.session(database=self.graph_database_name) as session:
yield session yield session
async def count_nodes(self) -> int: async def is_empty(self) -> bool:
query = """ query = """
RETURN EXISTS {
MATCH (n) MATCH (n)
RETURN COUNT(n) as total_nodes; } AS node_exists;
""" """
query_result = await self.query(query) query_result = await self.query(query)
return query_result[0]["total_nodes"] return not query_result[0]["node_exists"]
@deadlock_retry() @deadlock_retry()
async def query( async def query(

View file

@ -9,5 +9,4 @@ from .exceptions import (
UnauthorizedDataAccessError, UnauthorizedDataAccessError,
DatasetNotFoundError, DatasetNotFoundError,
DatasetTypeError, DatasetTypeError,
SearchOnEmptyGraphError,
) )

View file

@ -35,16 +35,6 @@ class DatasetNotFoundError(CogneeValidationError):
super().__init__(message, name, status_code) super().__init__(message, name, status_code)
class SearchOnEmptyGraphError(CogneeValidationError):
def __init__(
self,
message: str = "Knowledge graph is empty, please ensure data is added and cognified.",
name: str = "SearchOnEmptyGraphError",
status_code=status.HTTP_400_BAD_REQUEST,
):
super().__init__(message, name, status_code)
class DatasetTypeError(CogneeValidationError): class DatasetTypeError(CogneeValidationError):
def __init__( def __init__(
self, self,

View file

@ -51,21 +51,21 @@ async def main():
graph_engine = await get_graph_engine() graph_engine = await get_graph_engine()
nodes_count = await graph_engine.count_nodes() is_empty = await graph_engine.is_empty()
assert nodes_count == 0, "Kuzu graph database is not empty" assert is_empty, "Kuzu graph database is not empty"
await cognee.add([explanation_file_path_quantum], dataset_name) await cognee.add([explanation_file_path_quantum], dataset_name)
nodes_count = await graph_engine.count_nodes() is_empty = await graph_engine.is_empty()
assert nodes_count == 0, "Kuzu graph database should be empty before cognify" assert is_empty, "Kuzu graph database should be empty before cognify"
await cognee.cognify([dataset_name]) await cognee.cognify([dataset_name])
nodes_count = await graph_engine.count_nodes() is_empty = await graph_engine.is_empty()
assert nodes_count != 0, "Kuzu graph database should not be empty" assert not is_empty, "Kuzu graph database should not be empty"
from cognee.infrastructure.databases.vector import get_vector_engine from cognee.infrastructure.databases.vector import get_vector_engine
@ -131,9 +131,9 @@ async def main():
await cognee.prune.prune_system(metadata=True) await cognee.prune.prune_system(metadata=True)
nodes_count = await graph_engine.count_nodes() is_empty = await graph_engine.is_empty()
assert nodes_count == 0, "Kuzu graph database is not empty" assert is_empty, "Kuzu graph database is not empty"
finally: finally:
# Ensure cleanup even if tests fail # Ensure cleanup even if tests fail

View file

@ -39,9 +39,9 @@ async def main():
graph_engine = await get_graph_engine() graph_engine = await get_graph_engine()
nodes_count = await graph_engine.count_nodes() is_empty = await graph_engine.is_empty()
assert nodes_count == 0, "Graph has to be empty" assert is_empty, "Graph has to be empty"
await cognee.add([explanation_file_path_nlp], dataset_name) await cognee.add([explanation_file_path_nlp], dataset_name)
@ -50,15 +50,15 @@ async def main():
) )
await cognee.add([explanation_file_path_quantum], dataset_name) await cognee.add([explanation_file_path_quantum], dataset_name)
nodes_count = await graph_engine.count_nodes() is_empty = await graph_engine.is_empty()
assert nodes_count == 0, "Graph has to be empty before cognify" assert is_empty, "Graph has to be empty before cognify"
await cognee.cognify([dataset_name]) await cognee.cognify([dataset_name])
nodes_count = await graph_engine.count_nodes() is_empty = await graph_engine.is_empty()
assert nodes_count != 0, "Graph shouldn't be empty" assert not is_empty, "Graph shouldn't be empty"
from cognee.infrastructure.databases.vector import get_vector_engine from cognee.infrastructure.databases.vector import get_vector_engine
@ -132,8 +132,8 @@ async def main():
assert not os.path.isdir(data_root_directory), "Local data files are not deleted" assert not os.path.isdir(data_root_directory), "Local data files are not deleted"
await cognee.prune.prune_system(metadata=True) await cognee.prune.prune_system(metadata=True)
nodes_count = await graph_engine.count_nodes() is_empty = await graph_engine.is_empty()
assert nodes_count == 0, "Neo4j graph database is not empty" assert is_empty, "Neo4j graph database is not empty"
if __name__ == "__main__": if __name__ == "__main__":