fix: search without prior cognify (#1548)
<!-- .github/pull_request_template.md --> ## Description <!-- Please provide a clear, human-generated description of the changes in this PR. DO NOT use AI-generated descriptions. We want to understand your thought process and reasoning. --> Running search when `cognee.add()` is called, but `cognee.cognify()` wasn't called yet goes through the whole search operation to throw a cryptic error: ``` Error during graph projection: EntityNotFoundError: Empty graph projected from the database. (Status code: 404) ``` ## How to reproduce modify `dynamic_steps_example.py` to not run cognify ## This PR Checks graph before searching, and throws an informative exception to ensure cognify was run | Logs Before | Logs After | |--------------|------------| | `Error during graph projection: EntityNotFoundError: Empty graph projected from the database. (Status code: 404)` | `2025-10-17T11:05:58.465315 [warning ] Search attempt on an empty knowledge graph [cognee.shared.logging_utils]` | ## Type of Change <!-- Please check the relevant option --> - [x] Bug fix (non-breaking change that fixes an issue) - [ ] New feature (non-breaking change that adds functionality) - [ ] Breaking change (fix or feature that would cause existing functionality to change) - [ ] Documentation update - [ ] Code refactoring - [ ] Performance improvement - [ ] Other (please specify): ## Screenshots/Videos (if applicable) <!-- Add screenshots or videos to help explain your changes --> ## Pre-submission Checklist <!-- Please check all boxes that apply before submitting your PR --> - [ ] **I have tested my changes thoroughly before submitting this PR** - [ ] **This PR contains minimal changes necessary to address the issue/feature** - [ ] My code follows the project's coding standards and style guidelines - [ ] I have added tests that prove my fix is effective or that my feature works - [ ] I have added necessary documentation (if applicable) - [ ] All new and existing tests pass - [ ] I have searched existing PRs to ensure this change hasn't been submitted already - [ ] I have linked any relevant issues in the description - [ ] My commits have clear and descriptive messages ## DCO Affirmation I affirm that all code in every commit of this pull request conforms to the terms of the Topoteretes Developer Certificate of Origin.
This commit is contained in:
commit
9d0261f375
7 changed files with 91 additions and 9 deletions
|
|
@ -1,6 +1,7 @@
|
||||||
from uuid import UUID
|
from uuid import UUID
|
||||||
from typing import Union, Optional, List, Type
|
from typing import Union, Optional, List, Type
|
||||||
|
|
||||||
|
from cognee.infrastructure.databases.graph import get_graph_engine
|
||||||
from cognee.modules.engine.models.node_set import NodeSet
|
from cognee.modules.engine.models.node_set import NodeSet
|
||||||
from cognee.modules.users.models import User
|
from cognee.modules.users.models import User
|
||||||
from cognee.modules.search.types import SearchResult, SearchType, CombinedSearchResult
|
from cognee.modules.search.types import SearchResult, SearchType, CombinedSearchResult
|
||||||
|
|
@ -8,6 +9,9 @@ from cognee.modules.users.methods import get_default_user
|
||||||
from cognee.modules.search.methods import search as search_function
|
from cognee.modules.search.methods import search as search_function
|
||||||
from cognee.modules.data.methods import get_authorized_existing_datasets
|
from cognee.modules.data.methods import get_authorized_existing_datasets
|
||||||
from cognee.modules.data.exceptions import DatasetNotFoundError
|
from cognee.modules.data.exceptions import DatasetNotFoundError
|
||||||
|
from cognee.shared.logging_utils import get_logger
|
||||||
|
|
||||||
|
logger = get_logger()
|
||||||
|
|
||||||
|
|
||||||
async def search(
|
async def search(
|
||||||
|
|
@ -175,6 +179,13 @@ async def search(
|
||||||
if not datasets:
|
if not datasets:
|
||||||
raise DatasetNotFoundError(message="No datasets found.")
|
raise DatasetNotFoundError(message="No datasets found.")
|
||||||
|
|
||||||
|
graph_engine = await get_graph_engine()
|
||||||
|
is_empty = await graph_engine.is_empty()
|
||||||
|
|
||||||
|
if is_empty:
|
||||||
|
logger.warning("Search attempt on an empty knowledge graph")
|
||||||
|
return []
|
||||||
|
|
||||||
filtered_search_results = await search_function(
|
filtered_search_results = await search_function(
|
||||||
query_text=query_text,
|
query_text=query_text,
|
||||||
query_type=query_type,
|
query_type=query_type,
|
||||||
|
|
|
||||||
|
|
@ -159,6 +159,11 @@ class GraphDBInterface(ABC):
|
||||||
- get_connections
|
- get_connections
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
async def is_empty(self) -> bool:
|
||||||
|
logger.warning("is_empty() is not implemented")
|
||||||
|
return True
|
||||||
|
|
||||||
@abstractmethod
|
@abstractmethod
|
||||||
async def query(self, query: str, params: dict) -> List[Any]:
|
async def query(self, query: str, params: dict) -> List[Any]:
|
||||||
"""
|
"""
|
||||||
|
|
|
||||||
|
|
@ -198,6 +198,15 @@ class KuzuAdapter(GraphDBInterface):
|
||||||
except FileNotFoundError:
|
except FileNotFoundError:
|
||||||
logger.warning(f"Kuzu S3 storage file not found: {self.db_path}")
|
logger.warning(f"Kuzu S3 storage file not found: {self.db_path}")
|
||||||
|
|
||||||
|
async def is_empty(self) -> bool:
|
||||||
|
query = """
|
||||||
|
MATCH (n)
|
||||||
|
RETURN true
|
||||||
|
LIMIT 1;
|
||||||
|
"""
|
||||||
|
query_result = await self.query(query)
|
||||||
|
return len(query_result) == 0
|
||||||
|
|
||||||
async def query(self, query: str, params: Optional[dict] = None) -> List[Tuple]:
|
async def query(self, query: str, params: Optional[dict] = None) -> List[Tuple]:
|
||||||
"""
|
"""
|
||||||
Execute a Kuzu query asynchronously with automatic reconnection.
|
Execute a Kuzu query asynchronously with automatic reconnection.
|
||||||
|
|
|
||||||
|
|
@ -87,6 +87,15 @@ class Neo4jAdapter(GraphDBInterface):
|
||||||
async with self.driver.session(database=self.graph_database_name) as session:
|
async with self.driver.session(database=self.graph_database_name) as session:
|
||||||
yield session
|
yield session
|
||||||
|
|
||||||
|
async def is_empty(self) -> bool:
|
||||||
|
query = """
|
||||||
|
RETURN EXISTS {
|
||||||
|
MATCH (n)
|
||||||
|
} AS node_exists;
|
||||||
|
"""
|
||||||
|
query_result = await self.query(query)
|
||||||
|
return not query_result[0]["node_exists"]
|
||||||
|
|
||||||
@deadlock_retry()
|
@deadlock_retry()
|
||||||
async def query(
|
async def query(
|
||||||
self,
|
self,
|
||||||
|
|
|
||||||
|
|
@ -47,10 +47,26 @@ async def main():
|
||||||
pathlib.Path(__file__).parent, "test_data/Quantum_computers.txt"
|
pathlib.Path(__file__).parent, "test_data/Quantum_computers.txt"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
from cognee.infrastructure.databases.graph import get_graph_engine
|
||||||
|
|
||||||
|
graph_engine = await get_graph_engine()
|
||||||
|
|
||||||
|
is_empty = await graph_engine.is_empty()
|
||||||
|
|
||||||
|
assert is_empty, "Kuzu graph database is not empty"
|
||||||
|
|
||||||
await cognee.add([explanation_file_path_quantum], dataset_name)
|
await cognee.add([explanation_file_path_quantum], dataset_name)
|
||||||
|
|
||||||
|
is_empty = await graph_engine.is_empty()
|
||||||
|
|
||||||
|
assert is_empty, "Kuzu graph database should be empty before cognify"
|
||||||
|
|
||||||
await cognee.cognify([dataset_name])
|
await cognee.cognify([dataset_name])
|
||||||
|
|
||||||
|
is_empty = await graph_engine.is_empty()
|
||||||
|
|
||||||
|
assert not is_empty, "Kuzu graph database should not be empty"
|
||||||
|
|
||||||
from cognee.infrastructure.databases.vector import get_vector_engine
|
from cognee.infrastructure.databases.vector import get_vector_engine
|
||||||
|
|
||||||
vector_engine = get_vector_engine()
|
vector_engine = get_vector_engine()
|
||||||
|
|
@ -114,11 +130,10 @@ async def main():
|
||||||
assert not os.path.isdir(data_root_directory), "Local data files are not deleted"
|
assert not os.path.isdir(data_root_directory), "Local data files are not deleted"
|
||||||
|
|
||||||
await cognee.prune.prune_system(metadata=True)
|
await cognee.prune.prune_system(metadata=True)
|
||||||
from cognee.infrastructure.databases.graph import get_graph_engine
|
|
||||||
|
|
||||||
graph_engine = await get_graph_engine()
|
is_empty = await graph_engine.is_empty()
|
||||||
nodes, edges = await graph_engine.get_graph_data()
|
|
||||||
assert len(nodes) == 0 and len(edges) == 0, "Kuzu graph database is not empty"
|
assert is_empty, "Kuzu graph database is not empty"
|
||||||
|
|
||||||
finally:
|
finally:
|
||||||
# Ensure cleanup even if tests fail
|
# Ensure cleanup even if tests fail
|
||||||
|
|
|
||||||
|
|
@ -35,6 +35,14 @@ async def main():
|
||||||
explanation_file_path_nlp = os.path.join(
|
explanation_file_path_nlp = os.path.join(
|
||||||
pathlib.Path(__file__).parent, "test_data/Natural_language_processing.txt"
|
pathlib.Path(__file__).parent, "test_data/Natural_language_processing.txt"
|
||||||
)
|
)
|
||||||
|
from cognee.infrastructure.databases.graph import get_graph_engine
|
||||||
|
|
||||||
|
graph_engine = await get_graph_engine()
|
||||||
|
|
||||||
|
is_empty = await graph_engine.is_empty()
|
||||||
|
|
||||||
|
assert is_empty, "Graph has to be empty"
|
||||||
|
|
||||||
await cognee.add([explanation_file_path_nlp], dataset_name)
|
await cognee.add([explanation_file_path_nlp], dataset_name)
|
||||||
|
|
||||||
explanation_file_path_quantum = os.path.join(
|
explanation_file_path_quantum = os.path.join(
|
||||||
|
|
@ -42,9 +50,16 @@ async def main():
|
||||||
)
|
)
|
||||||
|
|
||||||
await cognee.add([explanation_file_path_quantum], dataset_name)
|
await cognee.add([explanation_file_path_quantum], dataset_name)
|
||||||
|
is_empty = await graph_engine.is_empty()
|
||||||
|
|
||||||
|
assert is_empty, "Graph has to be empty before cognify"
|
||||||
|
|
||||||
await cognee.cognify([dataset_name])
|
await cognee.cognify([dataset_name])
|
||||||
|
|
||||||
|
is_empty = await graph_engine.is_empty()
|
||||||
|
|
||||||
|
assert not is_empty, "Graph shouldn't be empty"
|
||||||
|
|
||||||
from cognee.infrastructure.databases.vector import get_vector_engine
|
from cognee.infrastructure.databases.vector import get_vector_engine
|
||||||
|
|
||||||
vector_engine = get_vector_engine()
|
vector_engine = get_vector_engine()
|
||||||
|
|
@ -117,11 +132,8 @@ async def main():
|
||||||
assert not os.path.isdir(data_root_directory), "Local data files are not deleted"
|
assert not os.path.isdir(data_root_directory), "Local data files are not deleted"
|
||||||
|
|
||||||
await cognee.prune.prune_system(metadata=True)
|
await cognee.prune.prune_system(metadata=True)
|
||||||
from cognee.infrastructure.databases.graph import get_graph_engine
|
is_empty = await graph_engine.is_empty()
|
||||||
|
assert is_empty, "Neo4j graph database is not empty"
|
||||||
graph_engine = await get_graph_engine()
|
|
||||||
nodes, edges = await graph_engine.get_graph_data()
|
|
||||||
assert len(nodes) == 0 and len(edges) == 0, "Neo4j graph database is not empty"
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
|
|
||||||
21
cognee/tests/unit/api/test_search.py
Normal file
21
cognee/tests/unit/api/test_search.py
Normal file
|
|
@ -0,0 +1,21 @@
|
||||||
|
import pytest
|
||||||
|
import cognee
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_empty_search_raises_SearchOnEmptyGraphError_on_empty_graph():
|
||||||
|
await cognee.prune.prune_data()
|
||||||
|
await cognee.prune.prune_system(metadata=True)
|
||||||
|
await cognee.add("Sample input")
|
||||||
|
result = await cognee.search("Sample query")
|
||||||
|
assert result == []
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_empty_search_doesnt_raise_SearchOnEmptyGraphError():
|
||||||
|
await cognee.prune.prune_data()
|
||||||
|
await cognee.prune.prune_system(metadata=True)
|
||||||
|
await cognee.add("Sample input")
|
||||||
|
await cognee.cognify()
|
||||||
|
result = await cognee.search("Sample query")
|
||||||
|
assert result != []
|
||||||
Loading…
Add table
Reference in a new issue