Removed check_permissions_on_dataset.py and related references (#1786)

## Description This PR removes the obsolete `check_permissions_on_dataset` task and all its related imports and usages across the codebase. The authorization logic is now handled earlier in the pipeline, so this task is no longer needed. These changes simplify the default Cognify pipeline and make the code cleaner and easier to maintain. ### Changes Made - Removed `cognee/tasks/documents/check_permissions_on_dataset.py` - Removed import from `cognee/tasks/documents/__init__.py` - Removed import and usage in `cognee/api/v1/cognify/cognify.py` - Removed import and usage in `cognee/eval_framework/corpus_builder/task_getters/get_cascade_graph_tasks.py` - Updated comments in `cognee/eval_framework/corpus_builder/task_getters/get_default_tasks_by_indices.py` (index positions changed) - Removed usage in `notebooks/cognee_demo.ipynb` - Updated documentation in `examples/python/simple_example.py` (process description) --- ## Type of Change - [ ] Bug fix (non-breaking change that fixes an issue) - [ ] New feature (non-breaking change that adds functionality) - [ ] Breaking change (fix or feature that would cause existing functionality to change) - [ ] Documentation update - [x] Code refactoring - [x] Other (please specify): Task removal / cleanup of deprecated function --- ## Pre-submission Checklist - [ ] **I have tested my changes thoroughly before submitting this PR** - [x] **This PR contains minimal changes necessary to address the issue** - [x] My code follows the project's coding standards and style guidelines - [ ] All new and existing tests pass - [x] I have searched existing PRs to ensure this change hasn't been submitted already - [x] I have linked any relevant issues in the description (Closes #1771) - [x] My commits have clear and descriptive messages --- ## DCO Affirmation I affirm that all code in every commit of this pull request conforms to the terms of the Topoteretes Developer Certificate of Origin.
2025-12-08 05:43:42 +01:00 · 2025-12-08 05:43:42 +01:00 · 75fea8dcc8
commit 75fea8dcc8
parent 00b60aed6c 3acb581bd0
7 changed files with 19 additions and 58 deletions
--- a/cognee/api/v1/cognify/cognify.py
+++ b/cognee/api/v1/cognify/cognify.py
@ -19,7 +19,6 @@ from cognee.modules.ontology.get_default_ontology_resolver import (
 from cognee.modules.users.models import User

 from cognee.tasks.documents import (
-    check_permissions_on_dataset,
    classify_documents,
    extract_chunks_from_documents,
 )
@ -78,12 +77,11 @@ async def cognify(

    Processing Pipeline:
        1. **Document Classification**: Identifies document types and structures
-        2. **Permission Validation**: Ensures user has processing rights
-        3. **Text Chunking**: Breaks content into semantically meaningful segments
-        4. **Entity Extraction**: Identifies key concepts, people, places, organizations
-        5. **Relationship Detection**: Discovers connections between entities
-        6. **Graph Construction**: Builds semantic knowledge graph with embeddings
-        7. **Content Summarization**: Creates hierarchical summaries for navigation
+        2. **Text Chunking**: Breaks content into semantically meaningful segments
+        3. **Entity Extraction**: Identifies key concepts, people, places, organizations
+        4. **Relationship Detection**: Discovers connections between entities
+        5. **Graph Construction**: Builds semantic knowledge graph with embeddings
+        6. **Content Summarization**: Creates hierarchical summaries for navigation

    Graph Model Customization:
        The `graph_model` parameter allows custom knowledge structures:
@ -274,7 +272,6 @@ async def get_default_tasks(  # TODO: Find out a better way to do this (Boris's

    default_tasks = [
        Task(classify_documents),
-        Task(check_permissions_on_dataset, user=user, permissions=["write"]),
        Task(
            extract_chunks_from_documents,
            max_chunk_size=chunk_size or get_max_chunk_tokens(),
@ -305,14 +302,13 @@ async def get_temporal_tasks(

    The pipeline includes:
    1. Document classification.
-    2. Dataset permission checks (requires "write" access).
-    3. Document chunking with a specified or default chunk size.
-    4. Event and timestamp extraction from chunks.
-    5. Knowledge graph extraction from events.
-    6. Batched insertion of data points.
+    2. Document chunking with a specified or default chunk size.
+    3. Event and timestamp extraction from chunks.
+    4. Knowledge graph extraction from events.
+    5. Batched insertion of data points.

    Args:
-        user (User, optional): The user requesting task execution, used for permission checks.
+        user (User, optional): The user requesting task execution.
        chunker (Callable, optional): A text chunking function/class to split documents. Defaults to TextChunker.
        chunk_size (int, optional): Maximum token size per chunk. If not provided, uses system default.
        chunks_per_batch (int, optional): Number of chunks to process in a single batch in Cognify
@ -325,7 +321,6 @@ async def get_temporal_tasks(

    temporal_tasks = [
        Task(classify_documents),
-        Task(check_permissions_on_dataset, user=user, permissions=["write"]),
        Task(
            extract_chunks_from_documents,
            max_chunk_size=chunk_size or get_max_chunk_tokens(),
--- a/cognee/eval_framework/corpus_builder/task_getters/get_cascade_graph_tasks.py
+++ b/cognee/eval_framework/corpus_builder/task_getters/get_cascade_graph_tasks.py
@ -8,7 +8,6 @@ from cognee.modules.users.models import User
 from cognee.shared.data_models import KnowledgeGraph
 from cognee.shared.utils import send_telemetry
 from cognee.tasks.documents import (
-    check_permissions_on_dataset,
    classify_documents,
    extract_chunks_from_documents,
 )
@ -31,7 +30,6 @@ async def get_cascade_graph_tasks(
        cognee_config = get_cognify_config()
        default_tasks = [
            Task(classify_documents),
-            Task(check_permissions_on_dataset, user=user, permissions=["write"]),
            Task(
                extract_chunks_from_documents, max_chunk_tokens=get_max_chunk_tokens()
            ),  # Extract text chunks based on the document type.
--- a/cognee/eval_framework/corpus_builder/task_getters/get_default_tasks_by_indices.py
+++ b/cognee/eval_framework/corpus_builder/task_getters/get_default_tasks_by_indices.py
@ -30,8 +30,8 @@ async def get_no_summary_tasks(
    ontology_file_path=None,
 ) -> List[Task]:
    """Returns default tasks without summarization tasks."""
-    # Get base tasks (0=classify, 1=check_permissions, 2=extract_chunks)
-    base_tasks = await get_default_tasks_by_indices([0, 1, 2], chunk_size, chunker)
+    # Get base tasks (0=classify, 1=extract_chunks)
+    base_tasks = await get_default_tasks_by_indices([0, 1], chunk_size, chunker)

    ontology_adapter = RDFLibOntologyResolver(ontology_file=ontology_file_path)

@ -51,8 +51,8 @@ async def get_just_chunks_tasks(
    chunk_size: int = None, chunker=TextChunker, user=None
 ) -> List[Task]:
    """Returns default tasks with only chunk extraction and data points addition."""
-    # Get base tasks (0=classify, 1=check_permissions, 2=extract_chunks)
-    base_tasks = await get_default_tasks_by_indices([0, 1, 2], chunk_size, chunker)
+    # Get base tasks (0=classify, 1=extract_chunks)
+    base_tasks = await get_default_tasks_by_indices([0, 1], chunk_size, chunker)

    add_data_points_task = Task(add_data_points, task_config={"batch_size": 10})

--- a/cognee/tasks/documents/init.py
+++ b/cognee/tasks/documents/init.py
@ -1,3 +1,2 @@
 from .classify_documents import classify_documents
 from .extract_chunks_from_documents import extract_chunks_from_documents
-from .check_permissions_on_dataset import check_permissions_on_dataset
--- a/cognee/tasks/documents/check_permissions_on_dataset.py
+++ b/cognee/tasks/documents/check_permissions_on_dataset.py
@ -1,26 +0,0 @@
-from cognee.modules.data.processing.document_types import Document
-from cognee.modules.users.permissions.methods import check_permission_on_dataset
-from typing import List
-
-
-async def check_permissions_on_dataset(
-    documents: List[Document], context: dict, user, permissions
-) -> List[Document]:
-    """
-    Validates a user's permissions on a list of documents.
-
-    Notes:
-        - This function assumes that `check_permission_on_documents` raises an exception if the permission check fails.
-        - It is designed to validate multiple permissions in a sequential manner for the same set of documents.
-        - Ensure that the `Document` and `user` objects conform to the expected structure and interfaces.
-    """
-
-    for permission in permissions:
-        await check_permission_on_dataset(
-            user,
-            permission,
-            # TODO: pass dataset through argument instead of context
-            context["dataset"].id,
-        )
-
-    return documents
--- a/examples/python/simple_example.py
+++ b/examples/python/simple_example.py
@ -32,16 +32,13 @@ async def main():
    print("Cognify process steps:")
    print("1. Classifying the document: Determining the type and category of the input text.")
    print(
-        "2. Checking permissions: Ensuring the user has the necessary rights to process the text."
+        "2. Extracting text chunks: Breaking down the text into sentences or phrases for analysis."
    )
    print(
-        "3. Extracting text chunks: Breaking down the text into sentences or phrases for analysis."
+        "3. Generating knowledge graph: Extracting entities and relationships to form a knowledge graph."
    )
-    print("4. Adding data points: Storing the extracted chunks for processing.")
-    print(
-        "5. Generating knowledge graph: Extracting entities and relationships to form a knowledge graph."
-    )
-    print("6. Summarizing text: Creating concise summaries of the content for quick insights.\n")
+    print("4. Summarizing text: Creating concise summaries of the content for quick insights.")
+    print("5. Adding data points: Storing the extracted chunks for processing.\n")

    # Use LLMs and cognee to create knowledge graph
    await cognee.cognify()
--- a/notebooks/cognee_demo.ipynb
+++ b/notebooks/cognee_demo.ipynb
@ -591,7 +591,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 11,
+   "execution_count": null,
   "id": "7c431fdef4921ae0",
   "metadata": {
    "ExecuteTime": {
@ -609,7 +609,6 @@
    "from cognee.modules.pipelines import run_tasks\n",
    "from cognee.modules.users.models import User\n",
    "from cognee.tasks.documents import (\n",
-    "    check_permissions_on_dataset,\n",
    "    classify_documents,\n",
    "    extract_chunks_from_documents,\n",
    ")\n",
@ -627,7 +626,6 @@
    "\n",
    "        tasks = [\n",
    "            Task(classify_documents),\n",
-    "            Task(check_permissions_on_dataset, user=user, permissions=[\"write\"]),\n",
    "            Task(\n",
    "                extract_chunks_from_documents, max_chunk_size=get_max_chunk_tokens()\n",
    "            ),  # Extract text chunks based on the document type.\n",