Removed check_permissions_on_dataset.py and related references
This commit is contained in:
parent
487635b71b
commit
3acb581bd0
7 changed files with 19 additions and 58 deletions
|
|
@ -19,7 +19,6 @@ from cognee.modules.ontology.get_default_ontology_resolver import (
|
|||
from cognee.modules.users.models import User
|
||||
|
||||
from cognee.tasks.documents import (
|
||||
check_permissions_on_dataset,
|
||||
classify_documents,
|
||||
extract_chunks_from_documents,
|
||||
)
|
||||
|
|
@ -78,12 +77,11 @@ async def cognify(
|
|||
|
||||
Processing Pipeline:
|
||||
1. **Document Classification**: Identifies document types and structures
|
||||
2. **Permission Validation**: Ensures user has processing rights
|
||||
3. **Text Chunking**: Breaks content into semantically meaningful segments
|
||||
4. **Entity Extraction**: Identifies key concepts, people, places, organizations
|
||||
5. **Relationship Detection**: Discovers connections between entities
|
||||
6. **Graph Construction**: Builds semantic knowledge graph with embeddings
|
||||
7. **Content Summarization**: Creates hierarchical summaries for navigation
|
||||
2. **Text Chunking**: Breaks content into semantically meaningful segments
|
||||
3. **Entity Extraction**: Identifies key concepts, people, places, organizations
|
||||
4. **Relationship Detection**: Discovers connections between entities
|
||||
5. **Graph Construction**: Builds semantic knowledge graph with embeddings
|
||||
6. **Content Summarization**: Creates hierarchical summaries for navigation
|
||||
|
||||
Graph Model Customization:
|
||||
The `graph_model` parameter allows custom knowledge structures:
|
||||
|
|
@ -274,7 +272,6 @@ async def get_default_tasks( # TODO: Find out a better way to do this (Boris's
|
|||
|
||||
default_tasks = [
|
||||
Task(classify_documents),
|
||||
Task(check_permissions_on_dataset, user=user, permissions=["write"]),
|
||||
Task(
|
||||
extract_chunks_from_documents,
|
||||
max_chunk_size=chunk_size or get_max_chunk_tokens(),
|
||||
|
|
@ -305,14 +302,13 @@ async def get_temporal_tasks(
|
|||
|
||||
The pipeline includes:
|
||||
1. Document classification.
|
||||
2. Dataset permission checks (requires "write" access).
|
||||
3. Document chunking with a specified or default chunk size.
|
||||
4. Event and timestamp extraction from chunks.
|
||||
5. Knowledge graph extraction from events.
|
||||
6. Batched insertion of data points.
|
||||
2. Document chunking with a specified or default chunk size.
|
||||
3. Event and timestamp extraction from chunks.
|
||||
4. Knowledge graph extraction from events.
|
||||
5. Batched insertion of data points.
|
||||
|
||||
Args:
|
||||
user (User, optional): The user requesting task execution, used for permission checks.
|
||||
user (User, optional): The user requesting task execution.
|
||||
chunker (Callable, optional): A text chunking function/class to split documents. Defaults to TextChunker.
|
||||
chunk_size (int, optional): Maximum token size per chunk. If not provided, uses system default.
|
||||
chunks_per_batch (int, optional): Number of chunks to process in a single batch in Cognify
|
||||
|
|
@ -325,7 +321,6 @@ async def get_temporal_tasks(
|
|||
|
||||
temporal_tasks = [
|
||||
Task(classify_documents),
|
||||
Task(check_permissions_on_dataset, user=user, permissions=["write"]),
|
||||
Task(
|
||||
extract_chunks_from_documents,
|
||||
max_chunk_size=chunk_size or get_max_chunk_tokens(),
|
||||
|
|
|
|||
|
|
@ -8,7 +8,6 @@ from cognee.modules.users.models import User
|
|||
from cognee.shared.data_models import KnowledgeGraph
|
||||
from cognee.shared.utils import send_telemetry
|
||||
from cognee.tasks.documents import (
|
||||
check_permissions_on_dataset,
|
||||
classify_documents,
|
||||
extract_chunks_from_documents,
|
||||
)
|
||||
|
|
@ -31,7 +30,6 @@ async def get_cascade_graph_tasks(
|
|||
cognee_config = get_cognify_config()
|
||||
default_tasks = [
|
||||
Task(classify_documents),
|
||||
Task(check_permissions_on_dataset, user=user, permissions=["write"]),
|
||||
Task(
|
||||
extract_chunks_from_documents, max_chunk_tokens=get_max_chunk_tokens()
|
||||
), # Extract text chunks based on the document type.
|
||||
|
|
|
|||
|
|
@ -30,8 +30,8 @@ async def get_no_summary_tasks(
|
|||
ontology_file_path=None,
|
||||
) -> List[Task]:
|
||||
"""Returns default tasks without summarization tasks."""
|
||||
# Get base tasks (0=classify, 1=check_permissions, 2=extract_chunks)
|
||||
base_tasks = await get_default_tasks_by_indices([0, 1, 2], chunk_size, chunker)
|
||||
# Get base tasks (0=classify, 1=extract_chunks)
|
||||
base_tasks = await get_default_tasks_by_indices([0, 1], chunk_size, chunker)
|
||||
|
||||
ontology_adapter = RDFLibOntologyResolver(ontology_file=ontology_file_path)
|
||||
|
||||
|
|
@ -51,8 +51,8 @@ async def get_just_chunks_tasks(
|
|||
chunk_size: int = None, chunker=TextChunker, user=None
|
||||
) -> List[Task]:
|
||||
"""Returns default tasks with only chunk extraction and data points addition."""
|
||||
# Get base tasks (0=classify, 1=check_permissions, 2=extract_chunks)
|
||||
base_tasks = await get_default_tasks_by_indices([0, 1, 2], chunk_size, chunker)
|
||||
# Get base tasks (0=classify, 1=extract_chunks)
|
||||
base_tasks = await get_default_tasks_by_indices([0, 1], chunk_size, chunker)
|
||||
|
||||
add_data_points_task = Task(add_data_points, task_config={"batch_size": 10})
|
||||
|
||||
|
|
|
|||
|
|
@ -1,3 +1,2 @@
|
|||
from .classify_documents import classify_documents
|
||||
from .extract_chunks_from_documents import extract_chunks_from_documents
|
||||
from .check_permissions_on_dataset import check_permissions_on_dataset
|
||||
|
|
|
|||
|
|
@ -1,26 +0,0 @@
|
|||
from cognee.modules.data.processing.document_types import Document
|
||||
from cognee.modules.users.permissions.methods import check_permission_on_dataset
|
||||
from typing import List
|
||||
|
||||
|
||||
async def check_permissions_on_dataset(
|
||||
documents: List[Document], context: dict, user, permissions
|
||||
) -> List[Document]:
|
||||
"""
|
||||
Validates a user's permissions on a list of documents.
|
||||
|
||||
Notes:
|
||||
- This function assumes that `check_permission_on_documents` raises an exception if the permission check fails.
|
||||
- It is designed to validate multiple permissions in a sequential manner for the same set of documents.
|
||||
- Ensure that the `Document` and `user` objects conform to the expected structure and interfaces.
|
||||
"""
|
||||
|
||||
for permission in permissions:
|
||||
await check_permission_on_dataset(
|
||||
user,
|
||||
permission,
|
||||
# TODO: pass dataset through argument instead of context
|
||||
context["dataset"].id,
|
||||
)
|
||||
|
||||
return documents
|
||||
|
|
@ -32,16 +32,13 @@ async def main():
|
|||
print("Cognify process steps:")
|
||||
print("1. Classifying the document: Determining the type and category of the input text.")
|
||||
print(
|
||||
"2. Checking permissions: Ensuring the user has the necessary rights to process the text."
|
||||
"2. Extracting text chunks: Breaking down the text into sentences or phrases for analysis."
|
||||
)
|
||||
print(
|
||||
"3. Extracting text chunks: Breaking down the text into sentences or phrases for analysis."
|
||||
"3. Generating knowledge graph: Extracting entities and relationships to form a knowledge graph."
|
||||
)
|
||||
print("4. Adding data points: Storing the extracted chunks for processing.")
|
||||
print(
|
||||
"5. Generating knowledge graph: Extracting entities and relationships to form a knowledge graph."
|
||||
)
|
||||
print("6. Summarizing text: Creating concise summaries of the content for quick insights.\n")
|
||||
print("4. Summarizing text: Creating concise summaries of the content for quick insights.")
|
||||
print("5. Adding data points: Storing the extracted chunks for processing.\n")
|
||||
|
||||
# Use LLMs and cognee to create knowledge graph
|
||||
await cognee.cognify()
|
||||
|
|
|
|||
4
notebooks/cognee_demo.ipynb
vendored
4
notebooks/cognee_demo.ipynb
vendored
|
|
@ -591,7 +591,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 11,
|
||||
"execution_count": null,
|
||||
"id": "7c431fdef4921ae0",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
|
|
@ -609,7 +609,6 @@
|
|||
"from cognee.modules.pipelines import run_tasks\n",
|
||||
"from cognee.modules.users.models import User\n",
|
||||
"from cognee.tasks.documents import (\n",
|
||||
" check_permissions_on_dataset,\n",
|
||||
" classify_documents,\n",
|
||||
" extract_chunks_from_documents,\n",
|
||||
")\n",
|
||||
|
|
@ -627,7 +626,6 @@
|
|||
"\n",
|
||||
" tasks = [\n",
|
||||
" Task(classify_documents),\n",
|
||||
" Task(check_permissions_on_dataset, user=user, permissions=[\"write\"]),\n",
|
||||
" Task(\n",
|
||||
" extract_chunks_from_documents, max_chunk_size=get_max_chunk_tokens()\n",
|
||||
" ), # Extract text chunks based on the document type.\n",
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue