From d1b13b113f9b33c6bc537be180e0362c47ae7a0c Mon Sep 17 00:00:00 2001 From: vasilije Date: Sun, 28 Dec 2025 21:06:42 +0100 Subject: [PATCH 1/5] added batch size as an env variable option --- cognee/api/v1/cognify/cognify.py | 17 +++++++++++------ .../v1/cognify/routers/get_cognify_router.py | 6 ++++++ cognee/cli/commands/cognify_command.py | 6 ++++++ cognee/modules/cognify/config.py | 2 ++ 4 files changed, 25 insertions(+), 6 deletions(-) diff --git a/cognee/api/v1/cognify/cognify.py b/cognee/api/v1/cognify/cognify.py index ffc903d68..bbe00c35f 100644 --- a/cognee/api/v1/cognify/cognify.py +++ b/cognee/api/v1/cognify/cognify.py @@ -252,7 +252,7 @@ async def get_default_tasks( # TODO: Find out a better way to do this (Boris's chunk_size: int = None, config: Config = None, custom_prompt: Optional[str] = None, - chunks_per_batch: int = 100, + chunks_per_batch: int = None, **kwargs, ) -> list[Task]: if config is None: @@ -272,12 +272,14 @@ async def get_default_tasks( # TODO: Find out a better way to do this (Boris's "ontology_config": {"ontology_resolver": get_default_ontology_resolver()} } - if chunks_per_batch is None: - chunks_per_batch = 100 - cognify_config = get_cognify_config() embed_triplets = cognify_config.triplet_embedding + if chunks_per_batch is None: + chunks_per_batch = ( + cognify_config.chunks_per_batch if cognify_config.chunks_per_batch is not None else 100 + ) + default_tasks = [ Task(classify_documents), Task( @@ -308,7 +310,7 @@ async def get_default_tasks( # TODO: Find out a better way to do this (Boris's async def get_temporal_tasks( - user: User = None, chunker=TextChunker, chunk_size: int = None, chunks_per_batch: int = 10 + user: User = None, chunker=TextChunker, chunk_size: int = None, chunks_per_batch: int = None ) -> list[Task]: """ Builds and returns a list of temporal processing tasks to be executed in sequence. @@ -330,7 +332,10 @@ async def get_temporal_tasks( list[Task]: A list of Task objects representing the temporal processing pipeline. """ if chunks_per_batch is None: - chunks_per_batch = 10 + from cognee.modules.cognify.config import get_cognify_config + + configured = get_cognify_config().chunks_per_batch + chunks_per_batch = configured if configured is not None else 10 temporal_tasks = [ Task(classify_documents), diff --git a/cognee/api/v1/cognify/routers/get_cognify_router.py b/cognee/api/v1/cognify/routers/get_cognify_router.py index a499b3ca3..0e2bf2bda 100644 --- a/cognee/api/v1/cognify/routers/get_cognify_router.py +++ b/cognee/api/v1/cognify/routers/get_cognify_router.py @@ -46,6 +46,11 @@ class CognifyPayloadDTO(InDTO): examples=[[]], description="Reference to one or more previously uploaded ontologies", ) + chunks_per_batch: Optional[int] = Field( + default=None, + description="Number of chunks to process per task batch in Cognify (overrides default).", + examples=[10, 20, 50, 100], + ) def get_cognify_router() -> APIRouter: @@ -146,6 +151,7 @@ def get_cognify_router() -> APIRouter: config=config_to_use, run_in_background=payload.run_in_background, custom_prompt=payload.custom_prompt, + chunks_per_batch=payload.chunks_per_batch, ) # If any cognify run errored return JSONResponse with proper error status code diff --git a/cognee/cli/commands/cognify_command.py b/cognee/cli/commands/cognify_command.py index b89c1f70e..6c278b4dc 100644 --- a/cognee/cli/commands/cognify_command.py +++ b/cognee/cli/commands/cognify_command.py @@ -62,6 +62,11 @@ After successful cognify processing, use `cognee search` to query the knowledge parser.add_argument( "--verbose", "-v", action="store_true", help="Show detailed progress information" ) + parser.add_argument( + "--chunks-per-batch", + type=int, + help="Number of chunks to process per task batch (try 50 for large single documents).", + ) def execute(self, args: argparse.Namespace) -> None: try: @@ -111,6 +116,7 @@ After successful cognify processing, use `cognee search` to query the knowledge chunk_size=args.chunk_size, ontology_file_path=args.ontology_file, run_in_background=args.background, + chunks_per_batch=args.chunks_per_batch, ) return result except Exception as e: diff --git a/cognee/modules/cognify/config.py b/cognee/modules/cognify/config.py index ec03225e8..223392375 100644 --- a/cognee/modules/cognify/config.py +++ b/cognee/modules/cognify/config.py @@ -9,6 +9,7 @@ class CognifyConfig(BaseSettings): classification_model: object = DefaultContentPrediction summarization_model: object = SummarizedContent triplet_embedding: bool = False + chunks_per_batch: Optional[int] = None model_config = SettingsConfigDict(env_file=".env", extra="allow") def to_dict(self) -> dict: @@ -16,6 +17,7 @@ class CognifyConfig(BaseSettings): "classification_model": self.classification_model, "summarization_model": self.summarization_model, "triplet_embedding": self.triplet_embedding, + "chunks_per_batch": self.chunks_per_batch, } From e38c33c1b560fe6015a917cff758d087a196abf2 Mon Sep 17 00:00:00 2001 From: vasilije Date: Sun, 11 Jan 2026 16:15:29 +0100 Subject: [PATCH 2/5] fix: handle missing chunks_per_batch attribute in cognify CLI command MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix AttributeError when args.chunks_per_batch is not present in the argparse.Namespace object. Use getattr() with default value of None to safely access the optional chunks_per_batch parameter. This resolves test failures in test_cli_edge_cases.py where Namespace objects were created without the chunks_per_batch attribute. Changes: - Use getattr(args, 'chunks_per_batch', None) instead of direct access - Update test assertion to expect chunks_per_batch=None parameter 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Sonnet 4.5 --- cognee/cli/commands/cognify_command.py | 2 +- cognee/tests/cli_tests/cli_unit_tests/test_cli_edge_cases.py | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/cognee/cli/commands/cognify_command.py b/cognee/cli/commands/cognify_command.py index 6c278b4dc..bae03eda2 100644 --- a/cognee/cli/commands/cognify_command.py +++ b/cognee/cli/commands/cognify_command.py @@ -116,7 +116,7 @@ After successful cognify processing, use `cognee search` to query the knowledge chunk_size=args.chunk_size, ontology_file_path=args.ontology_file, run_in_background=args.background, - chunks_per_batch=args.chunks_per_batch, + chunks_per_batch=getattr(args, 'chunks_per_batch', None), ) return result except Exception as e: diff --git a/cognee/tests/cli_tests/cli_unit_tests/test_cli_edge_cases.py b/cognee/tests/cli_tests/cli_unit_tests/test_cli_edge_cases.py index ca27c0f67..ae9e99ac6 100644 --- a/cognee/tests/cli_tests/cli_unit_tests/test_cli_edge_cases.py +++ b/cognee/tests/cli_tests/cli_unit_tests/test_cli_edge_cases.py @@ -373,6 +373,7 @@ class TestCognifyCommandEdgeCases: ontology_file_path=None, chunker=TextChunker, run_in_background=False, + chunks_per_batch=None, ) From 7d3450cb083f147743f9533fae7f8c57ab2c5119 Mon Sep 17 00:00:00 2001 From: vasilije Date: Sun, 11 Jan 2026 16:18:13 +0100 Subject: [PATCH 3/5] style: apply ruff formatting to use double quotes --- cognee/cli/commands/cognify_command.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cognee/cli/commands/cognify_command.py b/cognee/cli/commands/cognify_command.py index bae03eda2..c310b88b7 100644 --- a/cognee/cli/commands/cognify_command.py +++ b/cognee/cli/commands/cognify_command.py @@ -116,7 +116,7 @@ After successful cognify processing, use `cognee search` to query the knowledge chunk_size=args.chunk_size, ontology_file_path=args.ontology_file, run_in_background=args.background, - chunks_per_batch=getattr(args, 'chunks_per_batch', None), + chunks_per_batch=getattr(args, "chunks_per_batch", None), ) return result except Exception as e: From 98394fc2641c7c10d0272cd0cb583b89a9428a58 Mon Sep 17 00:00:00 2001 From: vasilije Date: Sun, 11 Jan 2026 16:18:29 +0100 Subject: [PATCH 4/5] docs: add code style rules for double quotes and pre-commit --- CLAUDE.md | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/CLAUDE.md b/CLAUDE.md index 7ac4f01d0..4303582c2 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -427,10 +427,12 @@ git checkout -b feature/your-feature-name ## Code Style -- Ruff for linting and formatting (configured in `pyproject.toml`) -- Line length: 100 characters -- Pre-commit hooks run ruff automatically -- Type hints encouraged (mypy checks enabled) +- **Formatter**: Ruff (configured in `pyproject.toml`) +- **Line length**: 100 characters +- **String quotes**: Use double quotes `"` not single quotes `'` (enforced by ruff-format) +- **Pre-commit hooks**: Run ruff linting and formatting automatically +- **Type hints**: Encouraged (mypy checks enabled) +- **Important**: Always run `pre-commit run --all-files` before committing to catch formatting issues ## Testing Strategy From b7d5bf5e9c11cd9822e04706699153e5a4e2f98c Mon Sep 17 00:00:00 2001 From: vasilije Date: Sun, 11 Jan 2026 16:22:06 +0100 Subject: [PATCH 5/5] test: update cognify test assertions to include chunks_per_batch parameter MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Update all cognify test assertions to expect the chunks_per_batch=None parameter that was added to the CLI command. This fixes three failing tests: - test_execute_basic_cognify - test_cognify_invalid_chunk_size - test_cognify_nonexistent_ontology_file 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Sonnet 4.5 --- cognee/tests/cli_tests/cli_unit_tests/test_cli_commands.py | 1 + cognee/tests/cli_tests/cli_unit_tests/test_cli_edge_cases.py | 2 ++ 2 files changed, 3 insertions(+) diff --git a/cognee/tests/cli_tests/cli_unit_tests/test_cli_commands.py b/cognee/tests/cli_tests/cli_unit_tests/test_cli_commands.py index 7654a781a..1301a8eaa 100644 --- a/cognee/tests/cli_tests/cli_unit_tests/test_cli_commands.py +++ b/cognee/tests/cli_tests/cli_unit_tests/test_cli_commands.py @@ -238,6 +238,7 @@ class TestCognifyCommand: ontology_file_path=None, chunker=TextChunker, run_in_background=False, + chunks_per_batch=None, ) @patch("cognee.cli.commands.cognify_command.asyncio.run") diff --git a/cognee/tests/cli_tests/cli_unit_tests/test_cli_edge_cases.py b/cognee/tests/cli_tests/cli_unit_tests/test_cli_edge_cases.py index ae9e99ac6..466a9e458 100644 --- a/cognee/tests/cli_tests/cli_unit_tests/test_cli_edge_cases.py +++ b/cognee/tests/cli_tests/cli_unit_tests/test_cli_edge_cases.py @@ -262,6 +262,7 @@ class TestCognifyCommandEdgeCases: ontology_file_path=None, chunker=TextChunker, run_in_background=False, + chunks_per_batch=None, ) @patch("cognee.cli.commands.cognify_command.asyncio.run", side_effect=_mock_run) @@ -295,6 +296,7 @@ class TestCognifyCommandEdgeCases: ontology_file_path="/nonexistent/path/ontology.owl", chunker=TextChunker, run_in_background=False, + chunks_per_batch=None, ) @patch("cognee.cli.commands.cognify_command.asyncio.run")