From 815d6391321c0b31f6e3af07c72103d09b89b376 Mon Sep 17 00:00:00 2001
From: EricXiao <7250816+EricXiao95@users.noreply.github.com>
Date: Sat, 9 Aug 2025 02:42:57 +0800
Subject: [PATCH] fix: graph visualization access for users with read
 permissions (#1220)

<!-- .github/pull_request_template.md -->

## Description
<!-- Provide a clear description of the changes in this PR -->
Description
This PR fix graph visualization access for users with read permissions
(https://github.com/topoteretes/cognee/issues/1182)

- Add permission checks for graph visualization endpoints to ensure
users can only access datasets they have permission to view
- Create get_dataset_with_permissions method to validate user access
before returning a dataset
- Remove redundant dataset existence validation in datasets router and
delegate permission checking to graph data retrieval
- Add comprehensive test suite for graph visualization permissions
covering owner access and permission granting scenarios
- Update get_formatted_graph_data() to use dataset owner's ID for
context
## Testing
Tests can be run with:
```bash
pytest -s cognee/tests/test_graph_visualization_permissions.py
```

## DCO Affirmation
I affirm that all code in every commit of this pull request conforms to
the terms of the Topoteretes Developer Certificate of Origin.

---------

Signed-off-by: EricXiao <taoiaox@gmail.com>
Co-authored-by: Vasilije <8619304+Vasilije1990@users.noreply.github.com>
---
 .../datasets/routers/get_datasets_router.py   |   8 +-
 cognee/modules/data/methods/__init__.py       |   1 +
 .../data/methods/get_authorized_dataset.py    |  23 +++
 .../graph/methods/get_formatted_graph_data.py |   8 +-
 .../test_graph_visualization_permissions.py   | 161 ++++++++++++++++++
 5 files changed, 193 insertions(+), 8 deletions(-)
 create mode 100644 cognee/modules/data/methods/get_authorized_dataset.py
 create mode 100644 cognee/tests/test_graph_visualization_permissions.py

diff --git a/cognee/api/v1/datasets/routers/get_datasets_router.py b/cognee/api/v1/datasets/routers/get_datasets_router.py
index 4de6feca1..627be226d 100644
--- a/cognee/api/v1/datasets/routers/get_datasets_router.py
+++ b/cognee/api/v1/datasets/routers/get_datasets_router.py
@@ -283,14 +283,8 @@ def get_datasets_router() -> APIRouter:
         - **404 Not Found**: Dataset doesn't exist or user doesn't have access
         - **500 Internal Server Error**: Error retrieving graph data
         """
-        from cognee.modules.data.methods import get_dataset
 
-        dataset = await get_dataset(user.id, dataset_id)
-
-        if dataset is None:
-            raise DatasetNotFoundError(message=f"Dataset ({str(dataset_id)}) not found.")
-
-        graph_data = await get_formatted_graph_data(dataset.id, user.id)
+        graph_data = await get_formatted_graph_data(dataset_id, user.id)
 
         return graph_data
 
diff --git a/cognee/modules/data/methods/__init__.py b/cognee/modules/data/methods/__init__.py
index 3c62c536d..a17d41683 100644
--- a/cognee/modules/data/methods/__init__.py
+++ b/cognee/modules/data/methods/__init__.py
@@ -6,6 +6,7 @@ from .get_dataset import get_dataset
 from .get_datasets import get_datasets
 from .get_datasets_by_name import get_datasets_by_name
 from .get_dataset_data import get_dataset_data
+from .get_authorized_dataset import get_authorized_dataset
 from .get_data import get_data
 from .get_unique_dataset_id import get_unique_dataset_id
 from .get_authorized_existing_datasets import get_authorized_existing_datasets
diff --git a/cognee/modules/data/methods/get_authorized_dataset.py b/cognee/modules/data/methods/get_authorized_dataset.py
new file mode 100644
index 000000000..ee6115d82
--- /dev/null
+++ b/cognee/modules/data/methods/get_authorized_dataset.py
@@ -0,0 +1,23 @@
+from typing import Optional
+from uuid import UUID
+from cognee.modules.users.permissions.methods import get_specific_user_permission_datasets
+from ..models import Dataset
+
+
+async def get_authorized_dataset(
+    user_id: UUID, dataset_id: UUID, permission_type="read"
+) -> Optional[Dataset]:
+    """
+    Get a specific dataset with permissions for a user.
+
+    Args:
+        user_id (UUID): user id
+        dataset_id (UUID): dataset id
+        permission_type (str): permission type(read, write, delete, share), default is read
+
+    Returns:
+        Optional[Dataset]: dataset with permissions
+    """
+    datasets = await get_specific_user_permission_datasets(user_id, permission_type, [dataset_id])
+
+    return datasets[0] if datasets else None
diff --git a/cognee/modules/graph/methods/get_formatted_graph_data.py b/cognee/modules/graph/methods/get_formatted_graph_data.py
index 001c6d15f..d08958a7a 100644
--- a/cognee/modules/graph/methods/get_formatted_graph_data.py
+++ b/cognee/modules/graph/methods/get_formatted_graph_data.py
@@ -1,10 +1,16 @@
 from uuid import UUID
 from cognee.infrastructure.databases.graph import get_graph_engine
 from cognee.context_global_variables import set_database_global_context_variables
+from cognee.modules.data.exceptions.exceptions import DatasetNotFoundError
+from cognee.modules.data.methods import get_authorized_dataset
 
 
 async def get_formatted_graph_data(dataset_id: UUID, user_id: UUID):
-    await set_database_global_context_variables(dataset_id, user_id)
+    dataset = await get_authorized_dataset(user_id, dataset_id)
+    if not dataset:
+        raise DatasetNotFoundError(message="Dataset not found.")
+
+    await set_database_global_context_variables(dataset_id, dataset.owner_id)
 
     graph_client = await get_graph_engine()
     (nodes, edges) = await graph_client.get_graph_data()
diff --git a/cognee/tests/test_graph_visualization_permissions.py b/cognee/tests/test_graph_visualization_permissions.py
new file mode 100644
index 000000000..6c76eeb65
--- /dev/null
+++ b/cognee/tests/test_graph_visualization_permissions.py
@@ -0,0 +1,161 @@
+import asyncio
+import os
+import pathlib
+
+import pytest
+import pytest_asyncio
+from httpx import ASGITransport, AsyncClient
+
+import cognee
+from cognee.api.client import app
+from cognee.modules.users.methods import create_user, get_default_user
+from cognee.modules.users.permissions.methods import authorized_give_permission_on_datasets
+
+# Use pytest-asyncio to handle all async tests
+pytestmark = pytest.mark.asyncio
+
+
+@pytest.fixture(scope="module")
+def event_loop():
+    """Create an instance of the default event loop for our test module."""
+    policy = asyncio.get_event_loop_policy()
+    loop = policy.new_event_loop()
+    yield loop
+    loop.close()
+
+
+@pytest_asyncio.fixture(scope="module")
+async def client():
+    """Create an async HTTP client for testing"""
+    transport = ASGITransport(app=app)
+    async with AsyncClient(transport=transport, base_url="http://test") as client:
+        yield client
+
+
+@pytest_asyncio.fixture(scope="module")
+async def setup_environment():
+    """
+    Set up a clean environment for the test, creating necessary users and datasets.
+    This fixture runs once before all tests and cleans up afterwards.
+    """
+    # 1. Enable permissions feature
+    os.environ["ENABLE_BACKEND_ACCESS_CONTROL"] = "True"
+
+    # 2. Set up an independent test directory
+    base_dir = pathlib.Path(__file__).parent
+    cognee.config.data_root_directory(str(base_dir / ".data_storage/test_graph_viz"))
+    cognee.config.system_root_directory(str(base_dir / ".cognee_system/test_graph_viz"))
+
+    # 3. Clean up old data
+    await cognee.prune.prune_data()
+    await cognee.prune.prune_system(metadata=True)
+
+    # 4. Add document for default user
+    explanation_file_path = os.path.join(base_dir, "test_data/Natural_language_processing.txt")
+    await cognee.add([explanation_file_path], dataset_name="NLP")
+    default_user = await get_default_user()
+    nlp_cognify_result = await cognee.cognify(["NLP"], user=default_user)
+
+    def extract_dataset_id_from_cognify(cognify_result):
+        """Extract dataset_id from cognify output dictionary"""
+        for dataset_id, pipeline_result in cognify_result.items():
+            return dataset_id
+        return None
+
+    dataset_id = extract_dataset_id_from_cognify(nlp_cognify_result)
+
+    yield dataset_id
+
+    # 5. Clean up data after tests are finished
+    await cognee.prune.prune_data()
+    await cognee.prune.prune_system(metadata=True)
+
+
+async def get_authentication_headers(client: AsyncClient, email: str, password: str) -> dict:
+    """Authenticates and returns the Authorization header."""
+    login_data = {"username": email, "password": password}
+    response = await client.post("/api/v1/auth/login", data=login_data, timeout=15)
+
+    assert response.status_code == 200, "Failed to log in and get token"
+
+    token_data = response.json()
+    access_token = token_data["access_token"]
+
+    return {"Authorization": f"Bearer {access_token}"}
+
+
+async def test_owner_can_access_graph(client: AsyncClient, setup_environment: int):
+    """
+    Test Case 1: The dataset owner should be able to access the graph data successfully.
+    """
+    dataset_id = setup_environment
+    default_user_email = "default_user@example.com"
+    default_user_password = "default_password"
+
+    response = await client.get(
+        f"/api/v1/datasets/{dataset_id}/graph",
+        headers=await get_authentication_headers(client, default_user_email, default_user_password),
+    )
+    assert response.status_code == 200, (
+        f"Owner failed to get the knowledge graph visualization. Response: {response.json()}"
+    )
+    data = response.json()
+    assert len(data) > 1, "The graph data is not valid."
+
+    print("✅ Owner can access the graph visualization successfully.")
+
+
+async def test_granting_permission_enables_access(client: AsyncClient, setup_environment: int):
+    """
+    Test Case 2: A user without any permissions should be denied access (404 Not Found).
+    After granting permission, the user should be able to access the graph data.
+    """
+    dataset_id = setup_environment
+    # Create a user without any permissions to the dataset
+    test_user_email = "test_user@example.com"
+    test_user_password = "test_password"
+    test_user = await create_user(test_user_email, test_user_password)
+
+    # Test the access to graph visualization for the test user without any permissions
+    response = await client.get(
+        f"/api/v1/datasets/{dataset_id}/graph",
+        headers=await get_authentication_headers(client, test_user_email, test_user_password),
+    )
+    assert response.status_code == 403, (
+        "Access to graph visualization should be denied without READ permission."
+    )
+    assert (
+        response.json()["detail"]
+        == "Request owner does not have necessary permission: [read] for all datasets requested. [PermissionDeniedError]"
+    )
+    print("✅ Access to graph visualization should be denied without READ permission.")
+
+    # Grant permission to the test user
+    default_user = await get_default_user()
+    await authorized_give_permission_on_datasets(
+        test_user.id, [dataset_id], "read", default_user.id
+    )
+
+    # Test the access to graph visualization for the test user
+    response_for_test_user = await client.get(
+        f"/api/v1/datasets/{dataset_id}/graph",
+        headers=await get_authentication_headers(client, test_user_email, test_user_password),
+    )
+    assert response_for_test_user.status_code == 200, (
+        "Access to graph visualization should succeed for user with been granted read permission"
+    )
+    print(
+        "✅ Access to graph visualization should succeed for user with been granted read permission"
+    )
+
+    # Test the graph data is the same for the test user and the default user
+    default_user_email = "default_user@example.com"
+    default_user_password = "default_password"
+    response_for_default_user = await client.get(
+        f"/api/v1/datasets/{dataset_id}/graph",
+        headers=await get_authentication_headers(client, default_user_email, default_user_password),
+    )
+    assert response_for_test_user.json() == response_for_default_user.json(), (
+        "The graph data for the test user and the default user is not the same."
+    )
+    print("✅ The graph data for the test user and the default user is the same.")