refactor: Make graphs return optional in search (#1919)
<!-- .github/pull_request_template.md -->
## Description
- Have search results be more human readable by making graphs return
information optional
## Acceptance Criteria
<!--
* Key requirements to the new feature or modification;
* Proof that the changes work and meet the requirements;
* Include instructions on how to verify the changes. Describe how to
test it locally;
* Proof that it's sufficiently tested.
-->
## Type of Change
<!-- Please check the relevant option -->
- [ ] Bug fix (non-breaking change that fixes an issue)
- [ ] New feature (non-breaking change that adds functionality)
- [ ] Breaking change (fix or feature that would cause existing
functionality to change)
- [ ] Documentation update
- [ ] Code refactoring
- [ ] Performance improvement
- [ ] Other (please specify):
## Screenshots/Videos (if applicable)
<!-- Add screenshots or videos to help explain your changes -->
## Pre-submission Checklist
<!-- Please check all boxes that apply before submitting your PR -->
- [ ] **I have tested my changes thoroughly before submitting this PR**
- [ ] **This PR contains minimal changes necessary to address the
issue/feature**
- [ ] My code follows the project's coding standards and style
guidelines
- [ ] I have added tests that prove my fix is effective or that my
feature works
- [ ] I have added necessary documentation (if applicable)
- [ ] All new and existing tests pass
- [ ] I have searched existing PRs to ensure this change hasn't been
submitted already
- [ ] I have linked any relevant issues in the description
- [ ] My commits have clear and descriptive messages
## DCO Affirmation
I affirm that all code in every commit of this pull request conforms to
the terms of the Topoteretes Developer Certificate of Origin.
<!-- This is an auto-generated comment: release notes by coderabbit.ai
-->
## Summary by CodeRabbit
* **New Features**
* Added an optional verbose mode to search. When enabled, results
include additional graph details; disabled by default for cleaner
responses.
* **Tests**
* Added unit tests verifying access-controlled search returns correctly
shaped results for both verbose and non-verbose modes, including
presence/absence of graph details.
<sub>✏️ Tip: You can customize this high-level summary in your review
settings.</sub>
<!-- end of auto-generated comment: release notes by coderabbit.ai -->
This commit is contained in:
commit
4caac4b8f0
3 changed files with 129 additions and 18 deletions
|
|
@ -33,6 +33,7 @@ async def search(
|
|||
session_id: Optional[str] = None,
|
||||
wide_search_top_k: Optional[int] = 100,
|
||||
triplet_distance_penalty: Optional[float] = 3.5,
|
||||
verbose: bool = False,
|
||||
) -> Union[List[SearchResult], CombinedSearchResult]:
|
||||
"""
|
||||
Search and query the knowledge graph for insights, information, and connections.
|
||||
|
|
@ -123,6 +124,8 @@ async def search(
|
|||
|
||||
session_id: Optional session identifier for caching Q&A interactions. Defaults to 'default_session' if None.
|
||||
|
||||
verbose: If True, returns detailed result information including graph representation (when possible).
|
||||
|
||||
Returns:
|
||||
list: Search results in format determined by query_type:
|
||||
|
||||
|
|
@ -204,6 +207,7 @@ async def search(
|
|||
session_id=session_id,
|
||||
wide_search_top_k=wide_search_top_k,
|
||||
triplet_distance_penalty=triplet_distance_penalty,
|
||||
verbose=verbose,
|
||||
)
|
||||
|
||||
return filtered_search_results
|
||||
|
|
|
|||
|
|
@ -49,6 +49,7 @@ async def search(
|
|||
session_id: Optional[str] = None,
|
||||
wide_search_top_k: Optional[int] = 100,
|
||||
triplet_distance_penalty: Optional[float] = 3.5,
|
||||
verbose: bool = False,
|
||||
) -> Union[CombinedSearchResult, List[SearchResult]]:
|
||||
"""
|
||||
|
||||
|
|
@ -140,6 +141,7 @@ async def search(
|
|||
)
|
||||
|
||||
if use_combined_context:
|
||||
# Note: combined context search must always be verbose and return a CombinedSearchResult with graphs info
|
||||
prepared_search_results = await prepare_search_result(
|
||||
search_results[0] if isinstance(search_results, list) else search_results
|
||||
)
|
||||
|
|
@ -173,25 +175,30 @@ async def search(
|
|||
datasets = prepared_search_results["datasets"]
|
||||
|
||||
if only_context:
|
||||
return_value.append(
|
||||
{
|
||||
"search_result": [context] if context else None,
|
||||
"dataset_id": datasets[0].id,
|
||||
"dataset_name": datasets[0].name,
|
||||
"dataset_tenant_id": datasets[0].tenant_id,
|
||||
"graphs": graphs,
|
||||
}
|
||||
)
|
||||
search_result_dict = {
|
||||
"search_result": [context] if context else None,
|
||||
"dataset_id": datasets[0].id,
|
||||
"dataset_name": datasets[0].name,
|
||||
"dataset_tenant_id": datasets[0].tenant_id,
|
||||
}
|
||||
if verbose:
|
||||
# Include graphs only in verbose mode
|
||||
search_result_dict["graphs"] = graphs
|
||||
|
||||
return_value.append(search_result_dict)
|
||||
else:
|
||||
return_value.append(
|
||||
{
|
||||
"search_result": [result] if result else None,
|
||||
"dataset_id": datasets[0].id,
|
||||
"dataset_name": datasets[0].name,
|
||||
"dataset_tenant_id": datasets[0].tenant_id,
|
||||
"graphs": graphs,
|
||||
}
|
||||
)
|
||||
search_result_dict = {
|
||||
"search_result": [result] if result else None,
|
||||
"dataset_id": datasets[0].id,
|
||||
"dataset_name": datasets[0].name,
|
||||
"dataset_tenant_id": datasets[0].tenant_id,
|
||||
}
|
||||
if verbose:
|
||||
# Include graphs only in verbose mode
|
||||
search_result_dict["graphs"] = graphs
|
||||
|
||||
return_value.append(search_result_dict)
|
||||
|
||||
return return_value
|
||||
else:
|
||||
return_value = []
|
||||
|
|
|
|||
100
cognee/tests/unit/modules/search/test_search.py
Normal file
100
cognee/tests/unit/modules/search/test_search.py
Normal file
|
|
@ -0,0 +1,100 @@
|
|||
import types
|
||||
from uuid import uuid4
|
||||
|
||||
import pytest
|
||||
|
||||
from cognee.modules.search.types import SearchType
|
||||
|
||||
|
||||
def _make_user(user_id: str = "u1", tenant_id=None):
|
||||
return types.SimpleNamespace(id=user_id, tenant_id=tenant_id)
|
||||
|
||||
|
||||
def _make_dataset(*, name="ds", tenant_id="t1", dataset_id=None, owner_id=None):
|
||||
return types.SimpleNamespace(
|
||||
id=dataset_id or uuid4(),
|
||||
name=name,
|
||||
tenant_id=tenant_id,
|
||||
owner_id=owner_id or uuid4(),
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def search_mod():
|
||||
import importlib
|
||||
|
||||
return importlib.import_module("cognee.modules.search.methods.search")
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def _patch_side_effect_boundaries(monkeypatch, search_mod):
|
||||
"""
|
||||
Keep production logic; patch only unavoidable side-effect boundaries.
|
||||
"""
|
||||
|
||||
async def dummy_log_query(_query_text, _query_type, _user_id):
|
||||
return types.SimpleNamespace(id="qid-1")
|
||||
|
||||
async def dummy_log_result(*_args, **_kwargs):
|
||||
return None
|
||||
|
||||
async def dummy_prepare_search_result(search_result):
|
||||
if isinstance(search_result, tuple) and len(search_result) == 3:
|
||||
result, context, datasets = search_result
|
||||
return {"result": result, "context": context, "graphs": {}, "datasets": datasets}
|
||||
return {"result": None, "context": None, "graphs": {}, "datasets": []}
|
||||
|
||||
monkeypatch.setattr(search_mod, "send_telemetry", lambda *a, **k: None)
|
||||
monkeypatch.setattr(search_mod, "log_query", dummy_log_query)
|
||||
monkeypatch.setattr(search_mod, "log_result", dummy_log_result)
|
||||
monkeypatch.setattr(search_mod, "prepare_search_result", dummy_prepare_search_result)
|
||||
|
||||
yield
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_search_access_control_returns_dataset_shaped_dicts(monkeypatch, search_mod):
|
||||
user = _make_user()
|
||||
ds = _make_dataset(name="ds1", tenant_id="t1")
|
||||
|
||||
async def dummy_authorized_search(**kwargs):
|
||||
assert kwargs["dataset_ids"] == [ds.id]
|
||||
return [("r", ["ctx"], [ds])]
|
||||
|
||||
monkeypatch.setattr(search_mod, "backend_access_control_enabled", lambda: True)
|
||||
monkeypatch.setattr(search_mod, "authorized_search", dummy_authorized_search)
|
||||
|
||||
out_non_verbose = await search_mod.search(
|
||||
query_text="q",
|
||||
query_type=SearchType.CHUNKS,
|
||||
dataset_ids=[ds.id],
|
||||
user=user,
|
||||
verbose=False,
|
||||
)
|
||||
|
||||
assert out_non_verbose == [
|
||||
{
|
||||
"search_result": ["r"],
|
||||
"dataset_id": ds.id,
|
||||
"dataset_name": "ds1",
|
||||
"dataset_tenant_id": "t1",
|
||||
}
|
||||
]
|
||||
|
||||
out_verbose = await search_mod.search(
|
||||
query_text="q",
|
||||
query_type=SearchType.CHUNKS,
|
||||
dataset_ids=[ds.id],
|
||||
user=user,
|
||||
verbose=True,
|
||||
)
|
||||
|
||||
assert out_verbose == [
|
||||
{
|
||||
"search_result": ["r"],
|
||||
"dataset_id": ds.id,
|
||||
"dataset_name": "ds1",
|
||||
"dataset_tenant_id": "t1",
|
||||
"graphs": {},
|
||||
}
|
||||
]
|
||||
Loading…
Add table
Reference in a new issue