Merge branch 'main' into merge-main-vol7

2025-12-11 19:11:24 +01:00 · 2025-12-11 19:11:24 +01:00 · 59f8d12fa3
commit 59f8d12fa3
parent 46ddd4fd12 75fea8dcc8
7 changed files with 19 additions and 58 deletions
--- a/cognee/api/v1/cognify/cognify.py
+++ b/cognee/api/v1/cognify/cognify.py
@ -20,7 +20,6 @@ from cognee.modules.ontology.get_default_ontology_resolver import (
 from cognee.modules.users.models import User
 from cognee.tasks.documents import (
    check_permissions_on_dataset,
    classify_documents,
    extract_chunks_from_documents,
 )
@ -79,12 +78,11 @@ async def cognify(
    Processing Pipeline:
        1. **Document Classification**: Identifies document types and structures
-        2. **Permission Validation**: Ensures user has processing rights
+        2. **Text Chunking**: Breaks content into semantically meaningful segments
-        3. **Text Chunking**: Breaks content into semantically meaningful segments
+        3. **Entity Extraction**: Identifies key concepts, people, places, organizations
-        4. **Entity Extraction**: Identifies key concepts, people, places, organizations
+        4. **Relationship Detection**: Discovers connections between entities
-        5. **Relationship Detection**: Discovers connections between entities
+        5. **Graph Construction**: Builds semantic knowledge graph with embeddings
-        6. **Graph Construction**: Builds semantic knowledge graph with embeddings
+        6. **Content Summarization**: Creates hierarchical summaries for navigation
        7. **Content Summarization**: Creates hierarchical summaries for navigation
    Graph Model Customization:
        The `graph_model` parameter allows custom knowledge structures:
@ -278,7 +276,6 @@ async def get_default_tasks(  # TODO: Find out a better way to do this (Boris's
    default_tasks = [
        Task(classify_documents),
        Task(check_permissions_on_dataset, user=user, permissions=["write"]),
        Task(
            extract_chunks_from_documents,
            max_chunk_size=chunk_size or get_max_chunk_tokens(),
@ -313,14 +310,13 @@ async def get_temporal_tasks(
    The pipeline includes:
    1. Document classification.
-    2. Dataset permission checks (requires "write" access).
+    2. Document chunking with a specified or default chunk size.
-    3. Document chunking with a specified or default chunk size.
+    3. Event and timestamp extraction from chunks.
-    4. Event and timestamp extraction from chunks.
+    4. Knowledge graph extraction from events.
-    5. Knowledge graph extraction from events.
+    5. Batched insertion of data points.
    6. Batched insertion of data points.
    Args:
-        user (User, optional): The user requesting task execution, used for permission checks.
+        user (User, optional): The user requesting task execution.
        chunker (Callable, optional): A text chunking function/class to split documents. Defaults to TextChunker.
        chunk_size (int, optional): Maximum token size per chunk. If not provided, uses system default.
        chunks_per_batch (int, optional): Number of chunks to process in a single batch in Cognify
@ -333,7 +329,6 @@ async def get_temporal_tasks(
    temporal_tasks = [
        Task(classify_documents),
        Task(check_permissions_on_dataset, user=user, permissions=["write"]),
        Task(
            extract_chunks_from_documents,
            max_chunk_size=chunk_size or get_max_chunk_tokens(),
--- a/cognee/eval_framework/corpus_builder/task_getters/get_cascade_graph_tasks.py
+++ b/cognee/eval_framework/corpus_builder/task_getters/get_cascade_graph_tasks.py
@ -8,7 +8,6 @@ from cognee.modules.users.models import User
 from cognee.shared.data_models import KnowledgeGraph
 from cognee.shared.utils import send_telemetry
 from cognee.tasks.documents import (
    check_permissions_on_dataset,
    classify_documents,
    extract_chunks_from_documents,
 )
@ -31,7 +30,6 @@ async def get_cascade_graph_tasks(
        cognee_config = get_cognify_config()
        default_tasks = [
            Task(classify_documents),
            Task(check_permissions_on_dataset, user=user, permissions=["write"]),
            Task(
                extract_chunks_from_documents, max_chunk_tokens=get_max_chunk_tokens()
            ),  # Extract text chunks based on the document type.
--- a/cognee/eval_framework/corpus_builder/task_getters/get_default_tasks_by_indices.py
+++ b/cognee/eval_framework/corpus_builder/task_getters/get_default_tasks_by_indices.py
@ -30,8 +30,8 @@ async def get_no_summary_tasks(
    ontology_file_path=None,
 ) -> List[Task]:
    """Returns default tasks without summarization tasks."""
-    # Get base tasks (0=classify, 1=check_permissions, 2=extract_chunks)
+    # Get base tasks (0=classify, 1=extract_chunks)
-    base_tasks = await get_default_tasks_by_indices([0, 1, 2], chunk_size, chunker)
+    base_tasks = await get_default_tasks_by_indices([0, 1], chunk_size, chunker)
    ontology_adapter = RDFLibOntologyResolver(ontology_file=ontology_file_path)
@ -51,8 +51,8 @@ async def get_just_chunks_tasks(
    chunk_size: int = None, chunker=TextChunker, user=None
 ) -> List[Task]:
    """Returns default tasks with only chunk extraction and data points addition."""
-    # Get base tasks (0=classify, 1=check_permissions, 2=extract_chunks)
+    # Get base tasks (0=classify, 1=extract_chunks)
-    base_tasks = await get_default_tasks_by_indices([0, 1, 2], chunk_size, chunker)
+    base_tasks = await get_default_tasks_by_indices([0, 1], chunk_size, chunker)
    add_data_points_task = Task(add_data_points, task_config={"batch_size": 10})
--- a/cognee/tasks/documents/init.py
+++ b/cognee/tasks/documents/init.py
@ -1,3 +1,2 @@
 from .classify_documents import classify_documents
 from .extract_chunks_from_documents import extract_chunks_from_documents
 from .check_permissions_on_dataset import check_permissions_on_dataset
--- a/cognee/tasks/documents/check_permissions_on_dataset.py
+++ b/cognee/tasks/documents/check_permissions_on_dataset.py
@ -1,26 +0,0 @@
 from cognee.modules.data.processing.document_types import Document
 from cognee.modules.users.permissions.methods import check_permission_on_dataset
 from typing import List
 async def check_permissions_on_dataset(
    documents: List[Document], context: dict, user, permissions
 ) -> List[Document]:
    """
    Validates a user's permissions on a list of documents.
    Notes:
        - This function assumes that `check_permission_on_documents` raises an exception if the permission check fails.
        - It is designed to validate multiple permissions in a sequential manner for the same set of documents.
        - Ensure that the `Document` and `user` objects conform to the expected structure and interfaces.
    """
    for permission in permissions:
        await check_permission_on_dataset(
            user,
            permission,
            # TODO: pass dataset through argument instead of context
            context["dataset"].id,
        )
    return documents
--- a/examples/python/simple_example.py
+++ b/examples/python/simple_example.py
@ -32,16 +32,13 @@ async def main():
    print("Cognify process steps:")
    print("1. Classifying the document: Determining the type and category of the input text.")
    print(
-        "2. Checking permissions: Ensuring the user has the necessary rights to process the text."
+        "2. Extracting text chunks: Breaking down the text into sentences or phrases for analysis."
    )
    print(
-        "3. Extracting text chunks: Breaking down the text into sentences or phrases for analysis."
+        "3. Generating knowledge graph: Extracting entities and relationships to form a knowledge graph."
    )
-    print("4. Adding data points: Storing the extracted chunks for processing.")
+    print("4. Summarizing text: Creating concise summaries of the content for quick insights.")
-    print(
+    print("5. Adding data points: Storing the extracted chunks for processing.\n")
        "5. Generating knowledge graph: Extracting entities and relationships to form a knowledge graph."
    )
    print("6. Summarizing text: Creating concise summaries of the content for quick insights.\n")
    # Use LLMs and cognee to create knowledge graph
    await cognee.cognify()
--- a/notebooks/cognee_demo.ipynb
+++ b/notebooks/cognee_demo.ipynb
@ -591,7 +591,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 11,
+   "execution_count": null,
   "id": "7c431fdef4921ae0",
   "metadata": {
    "ExecuteTime": {
@ -609,7 +609,6 @@
    "from cognee.modules.pipelines import run_tasks\n",
    "from cognee.modules.users.models import User\n",
    "from cognee.tasks.documents import (\n",
    "    check_permissions_on_dataset,\n",
    "    classify_documents,\n",
    "    extract_chunks_from_documents,\n",
    ")\n",
@ -627,7 +626,6 @@
    "\n",
    "        tasks = [\n",
    "            Task(classify_documents),\n",
    "            Task(check_permissions_on_dataset, user=user, permissions=[\"write\"]),\n",
    "            Task(\n",
    "                extract_chunks_from_documents, max_chunk_size=get_max_chunk_tokens()\n",
    "            ),  # Extract text chunks based on the document type.\n",