From 815d6391321c0b31f6e3af07c72103d09b89b376 Mon Sep 17 00:00:00 2001 From: EricXiao <7250816+EricXiao95@users.noreply.github.com> Date: Sat, 9 Aug 2025 02:42:57 +0800 Subject: [PATCH] fix: graph visualization access for users with read permissions (#1220) ## Description Description This PR fix graph visualization access for users with read permissions (https://github.com/topoteretes/cognee/issues/1182) - Add permission checks for graph visualization endpoints to ensure users can only access datasets they have permission to view - Create get_dataset_with_permissions method to validate user access before returning a dataset - Remove redundant dataset existence validation in datasets router and delegate permission checking to graph data retrieval - Add comprehensive test suite for graph visualization permissions covering owner access and permission granting scenarios - Update get_formatted_graph_data() to use dataset owner's ID for context ## Testing Tests can be run with: ```bash pytest -s cognee/tests/test_graph_visualization_permissions.py ``` ## DCO Affirmation I affirm that all code in every commit of this pull request conforms to the terms of the Topoteretes Developer Certificate of Origin. --------- Signed-off-by: EricXiao Co-authored-by: Vasilije <8619304+Vasilije1990@users.noreply.github.com> --- .../datasets/routers/get_datasets_router.py | 8 +- cognee/modules/data/methods/__init__.py | 1 + .../data/methods/get_authorized_dataset.py | 23 +++ .../graph/methods/get_formatted_graph_data.py | 8 +- .../test_graph_visualization_permissions.py | 161 ++++++++++++++++++ 5 files changed, 193 insertions(+), 8 deletions(-) create mode 100644 cognee/modules/data/methods/get_authorized_dataset.py create mode 100644 cognee/tests/test_graph_visualization_permissions.py diff --git a/cognee/api/v1/datasets/routers/get_datasets_router.py b/cognee/api/v1/datasets/routers/get_datasets_router.py index 4de6feca1..627be226d 100644 --- a/cognee/api/v1/datasets/routers/get_datasets_router.py +++ b/cognee/api/v1/datasets/routers/get_datasets_router.py @@ -283,14 +283,8 @@ def get_datasets_router() -> APIRouter: - **404 Not Found**: Dataset doesn't exist or user doesn't have access - **500 Internal Server Error**: Error retrieving graph data """ - from cognee.modules.data.methods import get_dataset - dataset = await get_dataset(user.id, dataset_id) - - if dataset is None: - raise DatasetNotFoundError(message=f"Dataset ({str(dataset_id)}) not found.") - - graph_data = await get_formatted_graph_data(dataset.id, user.id) + graph_data = await get_formatted_graph_data(dataset_id, user.id) return graph_data diff --git a/cognee/modules/data/methods/__init__.py b/cognee/modules/data/methods/__init__.py index 3c62c536d..a17d41683 100644 --- a/cognee/modules/data/methods/__init__.py +++ b/cognee/modules/data/methods/__init__.py @@ -6,6 +6,7 @@ from .get_dataset import get_dataset from .get_datasets import get_datasets from .get_datasets_by_name import get_datasets_by_name from .get_dataset_data import get_dataset_data +from .get_authorized_dataset import get_authorized_dataset from .get_data import get_data from .get_unique_dataset_id import get_unique_dataset_id from .get_authorized_existing_datasets import get_authorized_existing_datasets diff --git a/cognee/modules/data/methods/get_authorized_dataset.py b/cognee/modules/data/methods/get_authorized_dataset.py new file mode 100644 index 000000000..ee6115d82 --- /dev/null +++ b/cognee/modules/data/methods/get_authorized_dataset.py @@ -0,0 +1,23 @@ +from typing import Optional +from uuid import UUID +from cognee.modules.users.permissions.methods import get_specific_user_permission_datasets +from ..models import Dataset + + +async def get_authorized_dataset( + user_id: UUID, dataset_id: UUID, permission_type="read" +) -> Optional[Dataset]: + """ + Get a specific dataset with permissions for a user. + + Args: + user_id (UUID): user id + dataset_id (UUID): dataset id + permission_type (str): permission type(read, write, delete, share), default is read + + Returns: + Optional[Dataset]: dataset with permissions + """ + datasets = await get_specific_user_permission_datasets(user_id, permission_type, [dataset_id]) + + return datasets[0] if datasets else None diff --git a/cognee/modules/graph/methods/get_formatted_graph_data.py b/cognee/modules/graph/methods/get_formatted_graph_data.py index 001c6d15f..d08958a7a 100644 --- a/cognee/modules/graph/methods/get_formatted_graph_data.py +++ b/cognee/modules/graph/methods/get_formatted_graph_data.py @@ -1,10 +1,16 @@ from uuid import UUID from cognee.infrastructure.databases.graph import get_graph_engine from cognee.context_global_variables import set_database_global_context_variables +from cognee.modules.data.exceptions.exceptions import DatasetNotFoundError +from cognee.modules.data.methods import get_authorized_dataset async def get_formatted_graph_data(dataset_id: UUID, user_id: UUID): - await set_database_global_context_variables(dataset_id, user_id) + dataset = await get_authorized_dataset(user_id, dataset_id) + if not dataset: + raise DatasetNotFoundError(message="Dataset not found.") + + await set_database_global_context_variables(dataset_id, dataset.owner_id) graph_client = await get_graph_engine() (nodes, edges) = await graph_client.get_graph_data() diff --git a/cognee/tests/test_graph_visualization_permissions.py b/cognee/tests/test_graph_visualization_permissions.py new file mode 100644 index 000000000..6c76eeb65 --- /dev/null +++ b/cognee/tests/test_graph_visualization_permissions.py @@ -0,0 +1,161 @@ +import asyncio +import os +import pathlib + +import pytest +import pytest_asyncio +from httpx import ASGITransport, AsyncClient + +import cognee +from cognee.api.client import app +from cognee.modules.users.methods import create_user, get_default_user +from cognee.modules.users.permissions.methods import authorized_give_permission_on_datasets + +# Use pytest-asyncio to handle all async tests +pytestmark = pytest.mark.asyncio + + +@pytest.fixture(scope="module") +def event_loop(): + """Create an instance of the default event loop for our test module.""" + policy = asyncio.get_event_loop_policy() + loop = policy.new_event_loop() + yield loop + loop.close() + + +@pytest_asyncio.fixture(scope="module") +async def client(): + """Create an async HTTP client for testing""" + transport = ASGITransport(app=app) + async with AsyncClient(transport=transport, base_url="http://test") as client: + yield client + + +@pytest_asyncio.fixture(scope="module") +async def setup_environment(): + """ + Set up a clean environment for the test, creating necessary users and datasets. + This fixture runs once before all tests and cleans up afterwards. + """ + # 1. Enable permissions feature + os.environ["ENABLE_BACKEND_ACCESS_CONTROL"] = "True" + + # 2. Set up an independent test directory + base_dir = pathlib.Path(__file__).parent + cognee.config.data_root_directory(str(base_dir / ".data_storage/test_graph_viz")) + cognee.config.system_root_directory(str(base_dir / ".cognee_system/test_graph_viz")) + + # 3. Clean up old data + await cognee.prune.prune_data() + await cognee.prune.prune_system(metadata=True) + + # 4. Add document for default user + explanation_file_path = os.path.join(base_dir, "test_data/Natural_language_processing.txt") + await cognee.add([explanation_file_path], dataset_name="NLP") + default_user = await get_default_user() + nlp_cognify_result = await cognee.cognify(["NLP"], user=default_user) + + def extract_dataset_id_from_cognify(cognify_result): + """Extract dataset_id from cognify output dictionary""" + for dataset_id, pipeline_result in cognify_result.items(): + return dataset_id + return None + + dataset_id = extract_dataset_id_from_cognify(nlp_cognify_result) + + yield dataset_id + + # 5. Clean up data after tests are finished + await cognee.prune.prune_data() + await cognee.prune.prune_system(metadata=True) + + +async def get_authentication_headers(client: AsyncClient, email: str, password: str) -> dict: + """Authenticates and returns the Authorization header.""" + login_data = {"username": email, "password": password} + response = await client.post("/api/v1/auth/login", data=login_data, timeout=15) + + assert response.status_code == 200, "Failed to log in and get token" + + token_data = response.json() + access_token = token_data["access_token"] + + return {"Authorization": f"Bearer {access_token}"} + + +async def test_owner_can_access_graph(client: AsyncClient, setup_environment: int): + """ + Test Case 1: The dataset owner should be able to access the graph data successfully. + """ + dataset_id = setup_environment + default_user_email = "default_user@example.com" + default_user_password = "default_password" + + response = await client.get( + f"/api/v1/datasets/{dataset_id}/graph", + headers=await get_authentication_headers(client, default_user_email, default_user_password), + ) + assert response.status_code == 200, ( + f"Owner failed to get the knowledge graph visualization. Response: {response.json()}" + ) + data = response.json() + assert len(data) > 1, "The graph data is not valid." + + print("✅ Owner can access the graph visualization successfully.") + + +async def test_granting_permission_enables_access(client: AsyncClient, setup_environment: int): + """ + Test Case 2: A user without any permissions should be denied access (404 Not Found). + After granting permission, the user should be able to access the graph data. + """ + dataset_id = setup_environment + # Create a user without any permissions to the dataset + test_user_email = "test_user@example.com" + test_user_password = "test_password" + test_user = await create_user(test_user_email, test_user_password) + + # Test the access to graph visualization for the test user without any permissions + response = await client.get( + f"/api/v1/datasets/{dataset_id}/graph", + headers=await get_authentication_headers(client, test_user_email, test_user_password), + ) + assert response.status_code == 403, ( + "Access to graph visualization should be denied without READ permission." + ) + assert ( + response.json()["detail"] + == "Request owner does not have necessary permission: [read] for all datasets requested. [PermissionDeniedError]" + ) + print("✅ Access to graph visualization should be denied without READ permission.") + + # Grant permission to the test user + default_user = await get_default_user() + await authorized_give_permission_on_datasets( + test_user.id, [dataset_id], "read", default_user.id + ) + + # Test the access to graph visualization for the test user + response_for_test_user = await client.get( + f"/api/v1/datasets/{dataset_id}/graph", + headers=await get_authentication_headers(client, test_user_email, test_user_password), + ) + assert response_for_test_user.status_code == 200, ( + "Access to graph visualization should succeed for user with been granted read permission" + ) + print( + "✅ Access to graph visualization should succeed for user with been granted read permission" + ) + + # Test the graph data is the same for the test user and the default user + default_user_email = "default_user@example.com" + default_user_password = "default_password" + response_for_default_user = await client.get( + f"/api/v1/datasets/{dataset_id}/graph", + headers=await get_authentication_headers(client, default_user_email, default_user_password), + ) + assert response_for_test_user.json() == response_for_default_user.json(), ( + "The graph data for the test user and the default user is not the same." + ) + print("✅ The graph data for the test user and the default user is the same.")