refactor emptiness check to be boolean, and optimize query

This commit is contained in:
Daulet Amirkhanov 2025-10-17 12:01:06 +01:00
parent 2a6256634e
commit 3ee50c192f
8 changed files with 31 additions and 42 deletions

View file

@ -8,7 +8,7 @@ from cognee.modules.search.types import SearchResult, SearchType, CombinedSearch
from cognee.modules.users.methods import get_default_user
from cognee.modules.search.methods import search as search_function
from cognee.modules.data.methods import get_authorized_existing_datasets
from cognee.modules.data.exceptions import DatasetNotFoundError, SearchOnEmptyGraphError
from cognee.modules.data.exceptions import DatasetNotFoundError
async def search(
@ -177,12 +177,10 @@ async def search(
raise DatasetNotFoundError(message="No datasets found.")
graph_engine = await get_graph_engine()
nodes_count = await graph_engine.count_nodes()
is_empty = await graph_engine.is_empty()
if nodes_count == 0:
raise SearchOnEmptyGraphError(
message="Knowledge graph is empty, please ensure data is added and cognified."
)
if is_empty:
return []
filtered_search_results = await search_function(
query_text=query_text,

View file

@ -160,9 +160,9 @@ class GraphDBInterface(ABC):
"""
@abstractmethod
async def count_nodes(self) -> int:
logger.warning("count_nodes is not implemented")
return 1 # dummy value to not fail search()
async def is_empty(self) -> bool:
logger.warning("is_empty() is not implemented")
return True
@abstractmethod
async def query(self, query: str, params: dict) -> List[Any]:

View file

@ -185,13 +185,14 @@ class KuzuAdapter(GraphDBInterface):
except FileNotFoundError:
logger.warning(f"Kuzu S3 storage file not found: {self.db_path}")
async def count_nodes(self) -> int:
async def is_empty(self) -> bool:
query = """
MATCH (n)
RETURN COUNT(n);
RETURN true
LIMIT 1;
"""
query_result = await self.query(query)
return query_result[0][0]
return len(query_result) == 0
async def query(self, query: str, params: Optional[dict] = None) -> List[Tuple]:
"""

View file

@ -87,13 +87,14 @@ class Neo4jAdapter(GraphDBInterface):
async with self.driver.session(database=self.graph_database_name) as session:
yield session
async def count_nodes(self) -> int:
async def is_empty(self) -> bool:
query = """
RETURN EXISTS {
MATCH (n)
RETURN COUNT(n) as total_nodes;
} AS node_exists;
"""
query_result = await self.query(query)
return query_result[0]["total_nodes"]
return not query_result[0]["node_exists"]
@deadlock_retry()
async def query(

View file

@ -9,5 +9,4 @@ from .exceptions import (
UnauthorizedDataAccessError,
DatasetNotFoundError,
DatasetTypeError,
SearchOnEmptyGraphError,
)

View file

@ -35,16 +35,6 @@ class DatasetNotFoundError(CogneeValidationError):
super().__init__(message, name, status_code)
class SearchOnEmptyGraphError(CogneeValidationError):
def __init__(
self,
message: str = "Knowledge graph is empty, please ensure data is added and cognified.",
name: str = "SearchOnEmptyGraphError",
status_code=status.HTTP_400_BAD_REQUEST,
):
super().__init__(message, name, status_code)
class DatasetTypeError(CogneeValidationError):
def __init__(
self,

View file

@ -51,21 +51,21 @@ async def main():
graph_engine = await get_graph_engine()
nodes_count = await graph_engine.count_nodes()
is_empty = await graph_engine.is_empty()
assert nodes_count == 0, "Kuzu graph database is not empty"
assert is_empty, "Kuzu graph database is not empty"
await cognee.add([explanation_file_path_quantum], dataset_name)
nodes_count = await graph_engine.count_nodes()
is_empty = await graph_engine.is_empty()
assert nodes_count == 0, "Kuzu graph database should be empty before cognify"
assert is_empty, "Kuzu graph database should be empty before cognify"
await cognee.cognify([dataset_name])
nodes_count = await graph_engine.count_nodes()
is_empty = await graph_engine.is_empty()
assert nodes_count != 0, "Kuzu graph database should not be empty"
assert not is_empty, "Kuzu graph database should not be empty"
from cognee.infrastructure.databases.vector import get_vector_engine
@ -131,9 +131,9 @@ async def main():
await cognee.prune.prune_system(metadata=True)
nodes_count = await graph_engine.count_nodes()
is_empty = await graph_engine.is_empty()
assert nodes_count == 0, "Kuzu graph database is not empty"
assert is_empty, "Kuzu graph database is not empty"
finally:
# Ensure cleanup even if tests fail

View file

@ -39,9 +39,9 @@ async def main():
graph_engine = await get_graph_engine()
nodes_count = await graph_engine.count_nodes()
is_empty = await graph_engine.is_empty()
assert nodes_count == 0, "Graph has to be empty"
assert is_empty, "Graph has to be empty"
await cognee.add([explanation_file_path_nlp], dataset_name)
@ -50,15 +50,15 @@ async def main():
)
await cognee.add([explanation_file_path_quantum], dataset_name)
nodes_count = await graph_engine.count_nodes()
is_empty = await graph_engine.is_empty()
assert nodes_count == 0, "Graph has to be empty before cognify"
assert is_empty, "Graph has to be empty before cognify"
await cognee.cognify([dataset_name])
nodes_count = await graph_engine.count_nodes()
is_empty = await graph_engine.is_empty()
assert nodes_count != 0, "Graph shouldn't be empty"
assert not is_empty, "Graph shouldn't be empty"
from cognee.infrastructure.databases.vector import get_vector_engine
@ -132,8 +132,8 @@ async def main():
assert not os.path.isdir(data_root_directory), "Local data files are not deleted"
await cognee.prune.prune_system(metadata=True)
nodes_count = await graph_engine.count_nodes()
assert nodes_count == 0, "Neo4j graph database is not empty"
is_empty = await graph_engine.is_empty()
assert is_empty, "Neo4j graph database is not empty"
if __name__ == "__main__":