Removed check_permissions_on_dataset.py and related references

This commit is contained in:
martin0731 2025-11-13 08:31:15 -05:00
parent 487635b71b
commit 3acb581bd0
7 changed files with 19 additions and 58 deletions

View file

@ -19,7 +19,6 @@ from cognee.modules.ontology.get_default_ontology_resolver import (
from cognee.modules.users.models import User
from cognee.tasks.documents import (
check_permissions_on_dataset,
classify_documents,
extract_chunks_from_documents,
)
@ -78,12 +77,11 @@ async def cognify(
Processing Pipeline:
1. **Document Classification**: Identifies document types and structures
2. **Permission Validation**: Ensures user has processing rights
3. **Text Chunking**: Breaks content into semantically meaningful segments
4. **Entity Extraction**: Identifies key concepts, people, places, organizations
5. **Relationship Detection**: Discovers connections between entities
6. **Graph Construction**: Builds semantic knowledge graph with embeddings
7. **Content Summarization**: Creates hierarchical summaries for navigation
2. **Text Chunking**: Breaks content into semantically meaningful segments
3. **Entity Extraction**: Identifies key concepts, people, places, organizations
4. **Relationship Detection**: Discovers connections between entities
5. **Graph Construction**: Builds semantic knowledge graph with embeddings
6. **Content Summarization**: Creates hierarchical summaries for navigation
Graph Model Customization:
The `graph_model` parameter allows custom knowledge structures:
@ -274,7 +272,6 @@ async def get_default_tasks( # TODO: Find out a better way to do this (Boris's
default_tasks = [
Task(classify_documents),
Task(check_permissions_on_dataset, user=user, permissions=["write"]),
Task(
extract_chunks_from_documents,
max_chunk_size=chunk_size or get_max_chunk_tokens(),
@ -305,14 +302,13 @@ async def get_temporal_tasks(
The pipeline includes:
1. Document classification.
2. Dataset permission checks (requires "write" access).
3. Document chunking with a specified or default chunk size.
4. Event and timestamp extraction from chunks.
5. Knowledge graph extraction from events.
6. Batched insertion of data points.
2. Document chunking with a specified or default chunk size.
3. Event and timestamp extraction from chunks.
4. Knowledge graph extraction from events.
5. Batched insertion of data points.
Args:
user (User, optional): The user requesting task execution, used for permission checks.
user (User, optional): The user requesting task execution.
chunker (Callable, optional): A text chunking function/class to split documents. Defaults to TextChunker.
chunk_size (int, optional): Maximum token size per chunk. If not provided, uses system default.
chunks_per_batch (int, optional): Number of chunks to process in a single batch in Cognify
@ -325,7 +321,6 @@ async def get_temporal_tasks(
temporal_tasks = [
Task(classify_documents),
Task(check_permissions_on_dataset, user=user, permissions=["write"]),
Task(
extract_chunks_from_documents,
max_chunk_size=chunk_size or get_max_chunk_tokens(),

View file

@ -8,7 +8,6 @@ from cognee.modules.users.models import User
from cognee.shared.data_models import KnowledgeGraph
from cognee.shared.utils import send_telemetry
from cognee.tasks.documents import (
check_permissions_on_dataset,
classify_documents,
extract_chunks_from_documents,
)
@ -31,7 +30,6 @@ async def get_cascade_graph_tasks(
cognee_config = get_cognify_config()
default_tasks = [
Task(classify_documents),
Task(check_permissions_on_dataset, user=user, permissions=["write"]),
Task(
extract_chunks_from_documents, max_chunk_tokens=get_max_chunk_tokens()
), # Extract text chunks based on the document type.

View file

@ -30,8 +30,8 @@ async def get_no_summary_tasks(
ontology_file_path=None,
) -> List[Task]:
"""Returns default tasks without summarization tasks."""
# Get base tasks (0=classify, 1=check_permissions, 2=extract_chunks)
base_tasks = await get_default_tasks_by_indices([0, 1, 2], chunk_size, chunker)
# Get base tasks (0=classify, 1=extract_chunks)
base_tasks = await get_default_tasks_by_indices([0, 1], chunk_size, chunker)
ontology_adapter = RDFLibOntologyResolver(ontology_file=ontology_file_path)
@ -51,8 +51,8 @@ async def get_just_chunks_tasks(
chunk_size: int = None, chunker=TextChunker, user=None
) -> List[Task]:
"""Returns default tasks with only chunk extraction and data points addition."""
# Get base tasks (0=classify, 1=check_permissions, 2=extract_chunks)
base_tasks = await get_default_tasks_by_indices([0, 1, 2], chunk_size, chunker)
# Get base tasks (0=classify, 1=extract_chunks)
base_tasks = await get_default_tasks_by_indices([0, 1], chunk_size, chunker)
add_data_points_task = Task(add_data_points, task_config={"batch_size": 10})

View file

@ -1,3 +1,2 @@
from .classify_documents import classify_documents
from .extract_chunks_from_documents import extract_chunks_from_documents
from .check_permissions_on_dataset import check_permissions_on_dataset

View file

@ -1,26 +0,0 @@
from cognee.modules.data.processing.document_types import Document
from cognee.modules.users.permissions.methods import check_permission_on_dataset
from typing import List
async def check_permissions_on_dataset(
documents: List[Document], context: dict, user, permissions
) -> List[Document]:
"""
Validates a user's permissions on a list of documents.
Notes:
- This function assumes that `check_permission_on_documents` raises an exception if the permission check fails.
- It is designed to validate multiple permissions in a sequential manner for the same set of documents.
- Ensure that the `Document` and `user` objects conform to the expected structure and interfaces.
"""
for permission in permissions:
await check_permission_on_dataset(
user,
permission,
# TODO: pass dataset through argument instead of context
context["dataset"].id,
)
return documents

View file

@ -32,16 +32,13 @@ async def main():
print("Cognify process steps:")
print("1. Classifying the document: Determining the type and category of the input text.")
print(
"2. Checking permissions: Ensuring the user has the necessary rights to process the text."
"2. Extracting text chunks: Breaking down the text into sentences or phrases for analysis."
)
print(
"3. Extracting text chunks: Breaking down the text into sentences or phrases for analysis."
"3. Generating knowledge graph: Extracting entities and relationships to form a knowledge graph."
)
print("4. Adding data points: Storing the extracted chunks for processing.")
print(
"5. Generating knowledge graph: Extracting entities and relationships to form a knowledge graph."
)
print("6. Summarizing text: Creating concise summaries of the content for quick insights.\n")
print("4. Summarizing text: Creating concise summaries of the content for quick insights.")
print("5. Adding data points: Storing the extracted chunks for processing.\n")
# Use LLMs and cognee to create knowledge graph
await cognee.cognify()

View file

@ -591,7 +591,7 @@
},
{
"cell_type": "code",
"execution_count": 11,
"execution_count": null,
"id": "7c431fdef4921ae0",
"metadata": {
"ExecuteTime": {
@ -609,7 +609,6 @@
"from cognee.modules.pipelines import run_tasks\n",
"from cognee.modules.users.models import User\n",
"from cognee.tasks.documents import (\n",
" check_permissions_on_dataset,\n",
" classify_documents,\n",
" extract_chunks_from_documents,\n",
")\n",
@ -627,7 +626,6 @@
"\n",
" tasks = [\n",
" Task(classify_documents),\n",
" Task(check_permissions_on_dataset, user=user, permissions=[\"write\"]),\n",
" Task(\n",
" extract_chunks_from_documents, max_chunk_size=get_max_chunk_tokens()\n",
" ), # Extract text chunks based on the document type.\n",