refactor: Unify dataset resolution (#1488)

<!-- .github/pull_request_template.md -->

## Description
Unified dataset resolution mechanisms across cognee

## Type of Change
<!-- Please check the relevant option -->
- [ ] Bug fix (non-breaking change that fixes an issue)
- [ ] New feature (non-breaking change that adds functionality)
- [ ] Breaking change (fix or feature that would cause existing
functionality to change)
- [ ] Documentation update
- [x] Code refactoring
- [ ] Performance improvement
- [ ] Other (please specify):

## Pre-submission Checklist
<!-- Please check all boxes that apply before submitting your PR -->
- [ ] **I have tested my changes thoroughly before submitting this PR**
- [ ] **This PR contains minimal changes necessary to address the
issue/feature**
- [ ] My code follows the project's coding standards and style
guidelines
- [ ] I have added tests that prove my fix is effective or that my
feature works
- [ ] I have added necessary documentation (if applicable)
- [ ] All new and existing tests pass
- [ ] I have searched existing PRs to ensure this change hasn't been
submitted already
- [ ] I have linked any relevant issues in the description
- [ ] My commits have clear and descriptive messages

## DCO Affirmation
I affirm that all code in every commit of this pull request conforms to
the terms of the Topoteretes Developer Certificate of Origin.
This commit is contained in:
Igor Ilic 2025-10-07 19:14:46 +02:00 committed by GitHub
parent c2698094c6
commit 7b5bba2b18
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 22 additions and 32 deletions

View file

@ -148,7 +148,9 @@ async def add(
await setup()
user, authorized_dataset = await resolve_authorized_user_dataset(dataset_id, dataset_name, user)
user, authorized_dataset = await resolve_authorized_user_dataset(
dataset_name=dataset_name, dataset_id=dataset_id, user=user
)
await reset_dataset_pipeline_run_status(
authorized_dataset.id, user, pipeline_names=["add_pipeline", "cognify_pipeline"]

View file

@ -1,47 +1,31 @@
from uuid import UUID
from typing import Optional
from cognee.api.v1.exceptions import DatasetNotFoundError
from cognee.modules.users.models import User
from cognee.modules.users.methods import get_default_user
from cognee.modules.data.methods import (
create_authorized_dataset,
get_authorized_dataset,
get_authorized_dataset_by_name,
from cognee.modules.pipelines.layers.resolve_authorized_user_datasets import (
resolve_authorized_user_datasets,
)
async def resolve_authorized_user_dataset(dataset_id: UUID, dataset_name: str, user: User):
async def resolve_authorized_user_dataset(
dataset_name: str, dataset_id: Optional[UUID] = None, user: Optional[User] = None
):
"""
Function handles creation and dataset authorization if dataset already exist for Cognee.
Verifies that provided user has necessary permission for provided Dataset.
If Dataset does not exist creates the Dataset and gives permission for the user creating the dataset.
Args:
dataset_id: Id of the dataset.
dataset_name: Name of the dataset.
dataset_id: Id of the dataset.
user: Cognee User request is being processed for, if None default user will be used.
Returns:
Tuple[User, Dataset]: A tuple containing the user and the authorized dataset.
"""
if not user:
user = await get_default_user()
if dataset_id:
authorized_dataset = await get_authorized_dataset(user, dataset_id, "write")
elif dataset_name:
authorized_dataset = await get_authorized_dataset_by_name(dataset_name, user, "write")
user, authorized_datasets = await resolve_authorized_user_datasets(
datasets=dataset_id if dataset_id else dataset_name, user=user
)
if not authorized_dataset:
authorized_dataset = await create_authorized_dataset(
dataset_name=dataset_name, user=user
)
else:
raise ValueError("Either dataset_id or dataset_name must be provided.")
if not authorized_dataset:
raise DatasetNotFoundError(
message=f"Dataset ({str(dataset_id) or dataset_name}) not found."
)
return user, authorized_dataset
return user, authorized_datasets[0]

View file

@ -1,5 +1,5 @@
from uuid import UUID
from typing import Union, Tuple, List
from typing import Union, Tuple, List, Optional
from cognee.modules.users.methods import get_default_user
from cognee.modules.users.models import User
@ -13,7 +13,7 @@ from cognee.modules.data.methods import (
async def resolve_authorized_user_datasets(
datasets: Union[str, UUID, list[str], list[UUID]], user: User = None
datasets: Union[str, UUID, list[str], list[UUID]], user: Optional[User] = None
) -> Tuple[User, List[Dataset]]:
"""
Function handles creation and dataset authorization if datasets already exist for Cognee.

View file

@ -19,7 +19,9 @@ from cognee.modules.search.types import (
from cognee.modules.search.operations import log_query, log_result
from cognee.modules.users.models import User
from cognee.modules.data.models import Dataset
from cognee.modules.users.permissions.methods import get_specific_user_permission_datasets
from cognee.modules.data.methods.get_authorized_existing_datasets import (
get_authorized_existing_datasets,
)
from .get_search_type_tools import get_search_type_tools
from .no_access_control_search import no_access_control_search
@ -202,7 +204,9 @@ async def authorized_search(
Not to be used outside of active access control mode.
"""
# Find datasets user has read access for (if datasets are provided only return them. Provided user has read access)
search_datasets = await get_specific_user_permission_datasets(user.id, "read", dataset_ids)
search_datasets = await get_authorized_existing_datasets(
datasets=dataset_ids, permission_type="read", user=user
)
if use_combined_context:
search_responses = await search_in_datasets_context(