Merge branch 'main' into agents-integrations

2025-12-18 15:16:42 +01:00 · 2025-12-18 15:16:42 +01:00 · 92e9ab319c
commit 92e9ab319c
parent ed16203c80 4caac4b8f0
3 changed files with 129 additions and 18 deletions
--- a/cognee/api/v1/search/search.py
+++ b/cognee/api/v1/search/search.py
@ -33,6 +33,7 @@ async def search(
    session_id: Optional[str] = None,
    wide_search_top_k: Optional[int] = 100,
    triplet_distance_penalty: Optional[float] = 3.5,
+    verbose: bool = False,
 ) -> Union[List[SearchResult], CombinedSearchResult]:
    """
    Search and query the knowledge graph for insights, information, and connections.
@ -123,6 +124,8 @@ async def search(

        session_id: Optional session identifier for caching Q&A interactions. Defaults to 'default_session' if None.

+        verbose: If True, returns detailed result information including graph representation (when possible).
+
    Returns:
        list: Search results in format determined by query_type:

@ -204,6 +207,7 @@ async def search(
        session_id=session_id,
        wide_search_top_k=wide_search_top_k,
        triplet_distance_penalty=triplet_distance_penalty,
+        verbose=verbose,
    )

    return filtered_search_results
--- a/cognee/modules/search/methods/search.py
+++ b/cognee/modules/search/methods/search.py
@ -49,6 +49,7 @@ async def search(
    session_id: Optional[str] = None,
    wide_search_top_k: Optional[int] = 100,
    triplet_distance_penalty: Optional[float] = 3.5,
+    verbose: bool = False,
 ) -> Union[CombinedSearchResult, List[SearchResult]]:
    """

@ -140,6 +141,7 @@ async def search(
    )

    if use_combined_context:
+        # Note: combined context search must always be verbose and return a CombinedSearchResult with graphs info
        prepared_search_results = await prepare_search_result(
            search_results[0] if isinstance(search_results, list) else search_results
        )
@ -173,25 +175,30 @@ async def search(
                datasets = prepared_search_results["datasets"]

                if only_context:
-                    return_value.append(
-                        {
-                            "search_result": [context] if context else None,
-                            "dataset_id": datasets[0].id,
-                            "dataset_name": datasets[0].name,
-                            "dataset_tenant_id": datasets[0].tenant_id,
-                            "graphs": graphs,
-                        }
-                    )
+                    search_result_dict = {
+                        "search_result": [context] if context else None,
+                        "dataset_id": datasets[0].id,
+                        "dataset_name": datasets[0].name,
+                        "dataset_tenant_id": datasets[0].tenant_id,
+                    }
+                    if verbose:
+                        # Include graphs only in verbose mode
+                        search_result_dict["graphs"] = graphs
+
+                    return_value.append(search_result_dict)
                else:
-                    return_value.append(
-                        {
-                            "search_result": [result] if result else None,
-                            "dataset_id": datasets[0].id,
-                            "dataset_name": datasets[0].name,
-                            "dataset_tenant_id": datasets[0].tenant_id,
-                            "graphs": graphs,
-                        }
-                    )
+                    search_result_dict = {
+                        "search_result": [result] if result else None,
+                        "dataset_id": datasets[0].id,
+                        "dataset_name": datasets[0].name,
+                        "dataset_tenant_id": datasets[0].tenant_id,
+                    }
+                    if verbose:
+                        # Include graphs only in verbose mode
+                        search_result_dict["graphs"] = graphs
+
+                    return_value.append(search_result_dict)
+
            return return_value
        else:
            return_value = []
--- a/cognee/tests/unit/modules/search/test_search.py
+++ b/cognee/tests/unit/modules/search/test_search.py
@ -0,0 +1,100 @@
+import types
+from uuid import uuid4
+
+import pytest
+
+from cognee.modules.search.types import SearchType
+
+
+def _make_user(user_id: str = "u1", tenant_id=None):
+    return types.SimpleNamespace(id=user_id, tenant_id=tenant_id)
+
+
+def _make_dataset(*, name="ds", tenant_id="t1", dataset_id=None, owner_id=None):
+    return types.SimpleNamespace(
+        id=dataset_id or uuid4(),
+        name=name,
+        tenant_id=tenant_id,
+        owner_id=owner_id or uuid4(),
+    )
+
+
+@pytest.fixture
+def search_mod():
+    import importlib
+
+    return importlib.import_module("cognee.modules.search.methods.search")
+
+
+@pytest.fixture(autouse=True)
+def _patch_side_effect_boundaries(monkeypatch, search_mod):
+    """
+    Keep production logic; patch only unavoidable side-effect boundaries.
+    """
+
+    async def dummy_log_query(_query_text, _query_type, _user_id):
+        return types.SimpleNamespace(id="qid-1")
+
+    async def dummy_log_result(*_args, **_kwargs):
+        return None
+
+    async def dummy_prepare_search_result(search_result):
+        if isinstance(search_result, tuple) and len(search_result) == 3:
+            result, context, datasets = search_result
+            return {"result": result, "context": context, "graphs": {}, "datasets": datasets}
+        return {"result": None, "context": None, "graphs": {}, "datasets": []}
+
+    monkeypatch.setattr(search_mod, "send_telemetry", lambda *a, **k: None)
+    monkeypatch.setattr(search_mod, "log_query", dummy_log_query)
+    monkeypatch.setattr(search_mod, "log_result", dummy_log_result)
+    monkeypatch.setattr(search_mod, "prepare_search_result", dummy_prepare_search_result)
+
+    yield
+
+
+@pytest.mark.asyncio
+async def test_search_access_control_returns_dataset_shaped_dicts(monkeypatch, search_mod):
+    user = _make_user()
+    ds = _make_dataset(name="ds1", tenant_id="t1")
+
+    async def dummy_authorized_search(**kwargs):
+        assert kwargs["dataset_ids"] == [ds.id]
+        return [("r", ["ctx"], [ds])]
+
+    monkeypatch.setattr(search_mod, "backend_access_control_enabled", lambda: True)
+    monkeypatch.setattr(search_mod, "authorized_search", dummy_authorized_search)
+
+    out_non_verbose = await search_mod.search(
+        query_text="q",
+        query_type=SearchType.CHUNKS,
+        dataset_ids=[ds.id],
+        user=user,
+        verbose=False,
+    )
+
+    assert out_non_verbose == [
+        {
+            "search_result": ["r"],
+            "dataset_id": ds.id,
+            "dataset_name": "ds1",
+            "dataset_tenant_id": "t1",
+        }
+    ]
+
+    out_verbose = await search_mod.search(
+        query_text="q",
+        query_type=SearchType.CHUNKS,
+        dataset_ids=[ds.id],
+        user=user,
+        verbose=True,
+    )
+
+    assert out_verbose == [
+        {
+            "search_result": ["r"],
+            "dataset_id": ds.id,
+            "dataset_name": "ds1",
+            "dataset_tenant_id": "t1",
+            "graphs": {},
+        }
+    ]