fix: graph visualization access for users with read permissions (#1220)
<!-- .github/pull_request_template.md --> ## Description <!-- Provide a clear description of the changes in this PR --> Description This PR fix graph visualization access for users with read permissions (https://github.com/topoteretes/cognee/issues/1182) - Add permission checks for graph visualization endpoints to ensure users can only access datasets they have permission to view - Create get_dataset_with_permissions method to validate user access before returning a dataset - Remove redundant dataset existence validation in datasets router and delegate permission checking to graph data retrieval - Add comprehensive test suite for graph visualization permissions covering owner access and permission granting scenarios - Update get_formatted_graph_data() to use dataset owner's ID for context ## Testing Tests can be run with: ```bash pytest -s cognee/tests/test_graph_visualization_permissions.py ``` ## DCO Affirmation I affirm that all code in every commit of this pull request conforms to the terms of the Topoteretes Developer Certificate of Origin. --------- Signed-off-by: EricXiao <taoiaox@gmail.com> Co-authored-by: Vasilije <8619304+Vasilije1990@users.noreply.github.com>
This commit is contained in:
parent
e3b41e0ed4
commit
815d639132
5 changed files with 193 additions and 8 deletions
|
|
@ -283,14 +283,8 @@ def get_datasets_router() -> APIRouter:
|
|||
- **404 Not Found**: Dataset doesn't exist or user doesn't have access
|
||||
- **500 Internal Server Error**: Error retrieving graph data
|
||||
"""
|
||||
from cognee.modules.data.methods import get_dataset
|
||||
|
||||
dataset = await get_dataset(user.id, dataset_id)
|
||||
|
||||
if dataset is None:
|
||||
raise DatasetNotFoundError(message=f"Dataset ({str(dataset_id)}) not found.")
|
||||
|
||||
graph_data = await get_formatted_graph_data(dataset.id, user.id)
|
||||
graph_data = await get_formatted_graph_data(dataset_id, user.id)
|
||||
|
||||
return graph_data
|
||||
|
||||
|
|
|
|||
|
|
@ -6,6 +6,7 @@ from .get_dataset import get_dataset
|
|||
from .get_datasets import get_datasets
|
||||
from .get_datasets_by_name import get_datasets_by_name
|
||||
from .get_dataset_data import get_dataset_data
|
||||
from .get_authorized_dataset import get_authorized_dataset
|
||||
from .get_data import get_data
|
||||
from .get_unique_dataset_id import get_unique_dataset_id
|
||||
from .get_authorized_existing_datasets import get_authorized_existing_datasets
|
||||
|
|
|
|||
23
cognee/modules/data/methods/get_authorized_dataset.py
Normal file
23
cognee/modules/data/methods/get_authorized_dataset.py
Normal file
|
|
@ -0,0 +1,23 @@
|
|||
from typing import Optional
|
||||
from uuid import UUID
|
||||
from cognee.modules.users.permissions.methods import get_specific_user_permission_datasets
|
||||
from ..models import Dataset
|
||||
|
||||
|
||||
async def get_authorized_dataset(
|
||||
user_id: UUID, dataset_id: UUID, permission_type="read"
|
||||
) -> Optional[Dataset]:
|
||||
"""
|
||||
Get a specific dataset with permissions for a user.
|
||||
|
||||
Args:
|
||||
user_id (UUID): user id
|
||||
dataset_id (UUID): dataset id
|
||||
permission_type (str): permission type(read, write, delete, share), default is read
|
||||
|
||||
Returns:
|
||||
Optional[Dataset]: dataset with permissions
|
||||
"""
|
||||
datasets = await get_specific_user_permission_datasets(user_id, permission_type, [dataset_id])
|
||||
|
||||
return datasets[0] if datasets else None
|
||||
|
|
@ -1,10 +1,16 @@
|
|||
from uuid import UUID
|
||||
from cognee.infrastructure.databases.graph import get_graph_engine
|
||||
from cognee.context_global_variables import set_database_global_context_variables
|
||||
from cognee.modules.data.exceptions.exceptions import DatasetNotFoundError
|
||||
from cognee.modules.data.methods import get_authorized_dataset
|
||||
|
||||
|
||||
async def get_formatted_graph_data(dataset_id: UUID, user_id: UUID):
|
||||
await set_database_global_context_variables(dataset_id, user_id)
|
||||
dataset = await get_authorized_dataset(user_id, dataset_id)
|
||||
if not dataset:
|
||||
raise DatasetNotFoundError(message="Dataset not found.")
|
||||
|
||||
await set_database_global_context_variables(dataset_id, dataset.owner_id)
|
||||
|
||||
graph_client = await get_graph_engine()
|
||||
(nodes, edges) = await graph_client.get_graph_data()
|
||||
|
|
|
|||
161
cognee/tests/test_graph_visualization_permissions.py
Normal file
161
cognee/tests/test_graph_visualization_permissions.py
Normal file
|
|
@ -0,0 +1,161 @@
|
|||
import asyncio
|
||||
import os
|
||||
import pathlib
|
||||
|
||||
import pytest
|
||||
import pytest_asyncio
|
||||
from httpx import ASGITransport, AsyncClient
|
||||
|
||||
import cognee
|
||||
from cognee.api.client import app
|
||||
from cognee.modules.users.methods import create_user, get_default_user
|
||||
from cognee.modules.users.permissions.methods import authorized_give_permission_on_datasets
|
||||
|
||||
# Use pytest-asyncio to handle all async tests
|
||||
pytestmark = pytest.mark.asyncio
|
||||
|
||||
|
||||
@pytest.fixture(scope="module")
|
||||
def event_loop():
|
||||
"""Create an instance of the default event loop for our test module."""
|
||||
policy = asyncio.get_event_loop_policy()
|
||||
loop = policy.new_event_loop()
|
||||
yield loop
|
||||
loop.close()
|
||||
|
||||
|
||||
@pytest_asyncio.fixture(scope="module")
|
||||
async def client():
|
||||
"""Create an async HTTP client for testing"""
|
||||
transport = ASGITransport(app=app)
|
||||
async with AsyncClient(transport=transport, base_url="http://test") as client:
|
||||
yield client
|
||||
|
||||
|
||||
@pytest_asyncio.fixture(scope="module")
|
||||
async def setup_environment():
|
||||
"""
|
||||
Set up a clean environment for the test, creating necessary users and datasets.
|
||||
This fixture runs once before all tests and cleans up afterwards.
|
||||
"""
|
||||
# 1. Enable permissions feature
|
||||
os.environ["ENABLE_BACKEND_ACCESS_CONTROL"] = "True"
|
||||
|
||||
# 2. Set up an independent test directory
|
||||
base_dir = pathlib.Path(__file__).parent
|
||||
cognee.config.data_root_directory(str(base_dir / ".data_storage/test_graph_viz"))
|
||||
cognee.config.system_root_directory(str(base_dir / ".cognee_system/test_graph_viz"))
|
||||
|
||||
# 3. Clean up old data
|
||||
await cognee.prune.prune_data()
|
||||
await cognee.prune.prune_system(metadata=True)
|
||||
|
||||
# 4. Add document for default user
|
||||
explanation_file_path = os.path.join(base_dir, "test_data/Natural_language_processing.txt")
|
||||
await cognee.add([explanation_file_path], dataset_name="NLP")
|
||||
default_user = await get_default_user()
|
||||
nlp_cognify_result = await cognee.cognify(["NLP"], user=default_user)
|
||||
|
||||
def extract_dataset_id_from_cognify(cognify_result):
|
||||
"""Extract dataset_id from cognify output dictionary"""
|
||||
for dataset_id, pipeline_result in cognify_result.items():
|
||||
return dataset_id
|
||||
return None
|
||||
|
||||
dataset_id = extract_dataset_id_from_cognify(nlp_cognify_result)
|
||||
|
||||
yield dataset_id
|
||||
|
||||
# 5. Clean up data after tests are finished
|
||||
await cognee.prune.prune_data()
|
||||
await cognee.prune.prune_system(metadata=True)
|
||||
|
||||
|
||||
async def get_authentication_headers(client: AsyncClient, email: str, password: str) -> dict:
|
||||
"""Authenticates and returns the Authorization header."""
|
||||
login_data = {"username": email, "password": password}
|
||||
response = await client.post("/api/v1/auth/login", data=login_data, timeout=15)
|
||||
|
||||
assert response.status_code == 200, "Failed to log in and get token"
|
||||
|
||||
token_data = response.json()
|
||||
access_token = token_data["access_token"]
|
||||
|
||||
return {"Authorization": f"Bearer {access_token}"}
|
||||
|
||||
|
||||
async def test_owner_can_access_graph(client: AsyncClient, setup_environment: int):
|
||||
"""
|
||||
Test Case 1: The dataset owner should be able to access the graph data successfully.
|
||||
"""
|
||||
dataset_id = setup_environment
|
||||
default_user_email = "default_user@example.com"
|
||||
default_user_password = "default_password"
|
||||
|
||||
response = await client.get(
|
||||
f"/api/v1/datasets/{dataset_id}/graph",
|
||||
headers=await get_authentication_headers(client, default_user_email, default_user_password),
|
||||
)
|
||||
assert response.status_code == 200, (
|
||||
f"Owner failed to get the knowledge graph visualization. Response: {response.json()}"
|
||||
)
|
||||
data = response.json()
|
||||
assert len(data) > 1, "The graph data is not valid."
|
||||
|
||||
print("✅ Owner can access the graph visualization successfully.")
|
||||
|
||||
|
||||
async def test_granting_permission_enables_access(client: AsyncClient, setup_environment: int):
|
||||
"""
|
||||
Test Case 2: A user without any permissions should be denied access (404 Not Found).
|
||||
After granting permission, the user should be able to access the graph data.
|
||||
"""
|
||||
dataset_id = setup_environment
|
||||
# Create a user without any permissions to the dataset
|
||||
test_user_email = "test_user@example.com"
|
||||
test_user_password = "test_password"
|
||||
test_user = await create_user(test_user_email, test_user_password)
|
||||
|
||||
# Test the access to graph visualization for the test user without any permissions
|
||||
response = await client.get(
|
||||
f"/api/v1/datasets/{dataset_id}/graph",
|
||||
headers=await get_authentication_headers(client, test_user_email, test_user_password),
|
||||
)
|
||||
assert response.status_code == 403, (
|
||||
"Access to graph visualization should be denied without READ permission."
|
||||
)
|
||||
assert (
|
||||
response.json()["detail"]
|
||||
== "Request owner does not have necessary permission: [read] for all datasets requested. [PermissionDeniedError]"
|
||||
)
|
||||
print("✅ Access to graph visualization should be denied without READ permission.")
|
||||
|
||||
# Grant permission to the test user
|
||||
default_user = await get_default_user()
|
||||
await authorized_give_permission_on_datasets(
|
||||
test_user.id, [dataset_id], "read", default_user.id
|
||||
)
|
||||
|
||||
# Test the access to graph visualization for the test user
|
||||
response_for_test_user = await client.get(
|
||||
f"/api/v1/datasets/{dataset_id}/graph",
|
||||
headers=await get_authentication_headers(client, test_user_email, test_user_password),
|
||||
)
|
||||
assert response_for_test_user.status_code == 200, (
|
||||
"Access to graph visualization should succeed for user with been granted read permission"
|
||||
)
|
||||
print(
|
||||
"✅ Access to graph visualization should succeed for user with been granted read permission"
|
||||
)
|
||||
|
||||
# Test the graph data is the same for the test user and the default user
|
||||
default_user_email = "default_user@example.com"
|
||||
default_user_password = "default_password"
|
||||
response_for_default_user = await client.get(
|
||||
f"/api/v1/datasets/{dataset_id}/graph",
|
||||
headers=await get_authentication_headers(client, default_user_email, default_user_password),
|
||||
)
|
||||
assert response_for_test_user.json() == response_for_default_user.json(), (
|
||||
"The graph data for the test user and the default user is not the same."
|
||||
)
|
||||
print("✅ The graph data for the test user and the default user is the same.")
|
||||
Loading…
Add table
Reference in a new issue