From a210bd59054dd353675589c63e57fe9d7349b766 Mon Sep 17 00:00:00 2001
From: Igor Ilic <igorilic03@gmail.com>
Date: Wed, 15 Oct 2025 20:24:36 +0200
Subject: [PATCH] refactor: rename chunk_batch_size to chunks_per_batch

---
 cognee/api/v1/cognify/cognify.py | 32 ++++++++++++++++----------------
 1 file changed, 16 insertions(+), 16 deletions(-)

diff --git a/cognee/api/v1/cognify/cognify.py b/cognee/api/v1/cognify/cognify.py
index e0f6253d8..1d5c36a3c 100644
--- a/cognee/api/v1/cognify/cognify.py
+++ b/cognee/api/v1/cognify/cognify.py
@@ -44,7 +44,7 @@ async def cognify(
     graph_model: BaseModel = KnowledgeGraph,
     chunker=TextChunker,
     chunk_size: int = None,
-    chunk_batch_size: int = None,
+    chunks_per_batch: int = None,
     config: Config = None,
     vector_db_config: dict = None,
     graph_db_config: dict = None,
@@ -106,7 +106,7 @@ async def cognify(
                    Formula: min(embedding_max_completion_tokens, llm_max_completion_tokens // 2)
                    Default limits: ~512-8192 tokens depending on models.
                    Smaller chunks = more granular but potentially fragmented knowledge.
-        chunk_batch_size: Number of chunks to be processed in a single batch in Cognify tasks.
+        chunks_per_batch: Number of chunks to be processed in a single batch in Cognify tasks.
         vector_db_config: Custom vector database configuration for embeddings storage.
         graph_db_config: Custom graph database configuration for relationship storage.
         run_in_background: If True, starts processing asynchronously and returns immediately.
@@ -212,7 +212,7 @@ async def cognify(
 
     if temporal_cognify:
         tasks = await get_temporal_tasks(
-            user=user, chunker=chunker, chunk_size=chunk_size, chunk_batch_size=chunk_batch_size
+            user=user, chunker=chunker, chunk_size=chunk_size, chunks_per_batch=chunks_per_batch
         )
     else:
         tasks = await get_default_tasks(
@@ -222,7 +222,7 @@ async def cognify(
             chunk_size=chunk_size,
             config=config,
             custom_prompt=custom_prompt,
-            chunk_batch_size=chunk_batch_size,
+            chunks_per_batch=chunks_per_batch,
         )
 
     # By calling get pipeline executor we get a function that will have the run_pipeline run in the background or a function that we will need to wait for
@@ -248,7 +248,7 @@ async def get_default_tasks(  # TODO: Find out a better way to do this (Boris's
     chunk_size: int = None,
     config: Config = None,
     custom_prompt: Optional[str] = None,
-    chunk_batch_size: int = 100,
+    chunks_per_batch: int = 100,
 ) -> list[Task]:
     if config is None:
         ontology_config = get_ontology_env_config()
@@ -267,8 +267,8 @@ async def get_default_tasks(  # TODO: Find out a better way to do this (Boris's
                 "ontology_config": {"ontology_resolver": get_default_ontology_resolver()}
             }
 
-    if chunk_batch_size is None:
-        chunk_batch_size = 100
+    if chunks_per_batch is None:
+        chunks_per_batch = 100
 
     default_tasks = [
         Task(classify_documents),
@@ -283,20 +283,20 @@ async def get_default_tasks(  # TODO: Find out a better way to do this (Boris's
             graph_model=graph_model,
             config=config,
             custom_prompt=custom_prompt,
-            task_config={"batch_size": chunk_batch_size},
+            task_config={"batch_size": chunks_per_batch},
         ),  # Generate knowledge graphs from the document chunks.
         Task(
             summarize_text,
-            task_config={"batch_size": chunk_batch_size},
+            task_config={"batch_size": chunks_per_batch},
         ),
-        Task(add_data_points, task_config={"batch_size": chunk_batch_size}),
+        Task(add_data_points, task_config={"batch_size": chunks_per_batch}),
     ]
 
     return default_tasks
 
 
 async def get_temporal_tasks(
-    user: User = None, chunker=TextChunker, chunk_size: int = None, chunk_batch_size: int = 10
+    user: User = None, chunker=TextChunker, chunk_size: int = None, chunks_per_batch: int = 10
 ) -> list[Task]:
     """
     Builds and returns a list of temporal processing tasks to be executed in sequence.
@@ -313,13 +313,13 @@ async def get_temporal_tasks(
         user (User, optional): The user requesting task execution, used for permission checks.
         chunker (Callable, optional): A text chunking function/class to split documents. Defaults to TextChunker.
         chunk_size (int, optional): Maximum token size per chunk. If not provided, uses system default.
-        chunk_batch_size (int, optional): Number of chunks to process in a single batch in Cognify
+        chunks_per_batch (int, optional): Number of chunks to process in a single batch in Cognify
 
     Returns:
         list[Task]: A list of Task objects representing the temporal processing pipeline.
     """
-    if chunk_batch_size is None:
-        chunk_batch_size = 10
+    if chunks_per_batch is None:
+        chunks_per_batch = 10
 
     temporal_tasks = [
         Task(classify_documents),
@@ -329,9 +329,9 @@ async def get_temporal_tasks(
             max_chunk_size=chunk_size or get_max_chunk_tokens(),
             chunker=chunker,
         ),
-        Task(extract_events_and_timestamps, task_config={"batch_size": chunk_batch_size}),
+        Task(extract_events_and_timestamps, task_config={"batch_size": chunks_per_batch}),
         Task(extract_knowledge_graph_from_events),
-        Task(add_data_points, task_config={"batch_size": chunk_batch_size}),
+        Task(add_data_points, task_config={"batch_size": chunks_per_batch}),
     ]
 
     return temporal_tasks