Merge branch 'dev' into feature/cog-3725-add-guide-example-codes-to-main-repo

This commit is contained in:
Milenko Gavrić 2026-01-16 15:32:10 +01:00 committed by GitHub
commit 17dbf2137b
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
7 changed files with 35 additions and 10 deletions

View file

@ -427,10 +427,12 @@ git checkout -b feature/your-feature-name
## Code Style ## Code Style
- Ruff for linting and formatting (configured in `pyproject.toml`) - **Formatter**: Ruff (configured in `pyproject.toml`)
- Line length: 100 characters - **Line length**: 100 characters
- Pre-commit hooks run ruff automatically - **String quotes**: Use double quotes `"` not single quotes `'` (enforced by ruff-format)
- Type hints encouraged (mypy checks enabled) - **Pre-commit hooks**: Run ruff linting and formatting automatically
- **Type hints**: Encouraged (mypy checks enabled)
- **Important**: Always run `pre-commit run --all-files` before committing to catch formatting issues
## Testing Strategy ## Testing Strategy

View file

@ -252,7 +252,7 @@ async def get_default_tasks( # TODO: Find out a better way to do this (Boris's
chunk_size: int = None, chunk_size: int = None,
config: Config = None, config: Config = None,
custom_prompt: Optional[str] = None, custom_prompt: Optional[str] = None,
chunks_per_batch: int = 100, chunks_per_batch: int = None,
**kwargs, **kwargs,
) -> list[Task]: ) -> list[Task]:
if config is None: if config is None:
@ -272,12 +272,14 @@ async def get_default_tasks( # TODO: Find out a better way to do this (Boris's
"ontology_config": {"ontology_resolver": get_default_ontology_resolver()} "ontology_config": {"ontology_resolver": get_default_ontology_resolver()}
} }
if chunks_per_batch is None:
chunks_per_batch = 100
cognify_config = get_cognify_config() cognify_config = get_cognify_config()
embed_triplets = cognify_config.triplet_embedding embed_triplets = cognify_config.triplet_embedding
if chunks_per_batch is None:
chunks_per_batch = (
cognify_config.chunks_per_batch if cognify_config.chunks_per_batch is not None else 100
)
default_tasks = [ default_tasks = [
Task(classify_documents), Task(classify_documents),
Task( Task(
@ -308,7 +310,7 @@ async def get_default_tasks( # TODO: Find out a better way to do this (Boris's
async def get_temporal_tasks( async def get_temporal_tasks(
user: User = None, chunker=TextChunker, chunk_size: int = None, chunks_per_batch: int = 10 user: User = None, chunker=TextChunker, chunk_size: int = None, chunks_per_batch: int = None
) -> list[Task]: ) -> list[Task]:
""" """
Builds and returns a list of temporal processing tasks to be executed in sequence. Builds and returns a list of temporal processing tasks to be executed in sequence.
@ -330,7 +332,10 @@ async def get_temporal_tasks(
list[Task]: A list of Task objects representing the temporal processing pipeline. list[Task]: A list of Task objects representing the temporal processing pipeline.
""" """
if chunks_per_batch is None: if chunks_per_batch is None:
chunks_per_batch = 10 from cognee.modules.cognify.config import get_cognify_config
configured = get_cognify_config().chunks_per_batch
chunks_per_batch = configured if configured is not None else 10
temporal_tasks = [ temporal_tasks = [
Task(classify_documents), Task(classify_documents),

View file

@ -46,6 +46,11 @@ class CognifyPayloadDTO(InDTO):
examples=[[]], examples=[[]],
description="Reference to one or more previously uploaded ontologies", description="Reference to one or more previously uploaded ontologies",
) )
chunks_per_batch: Optional[int] = Field(
default=None,
description="Number of chunks to process per task batch in Cognify (overrides default).",
examples=[10, 20, 50, 100],
)
def get_cognify_router() -> APIRouter: def get_cognify_router() -> APIRouter:
@ -146,6 +151,7 @@ def get_cognify_router() -> APIRouter:
config=config_to_use, config=config_to_use,
run_in_background=payload.run_in_background, run_in_background=payload.run_in_background,
custom_prompt=payload.custom_prompt, custom_prompt=payload.custom_prompt,
chunks_per_batch=payload.chunks_per_batch,
) )
# If any cognify run errored return JSONResponse with proper error status code # If any cognify run errored return JSONResponse with proper error status code

View file

@ -62,6 +62,11 @@ After successful cognify processing, use `cognee search` to query the knowledge
parser.add_argument( parser.add_argument(
"--verbose", "-v", action="store_true", help="Show detailed progress information" "--verbose", "-v", action="store_true", help="Show detailed progress information"
) )
parser.add_argument(
"--chunks-per-batch",
type=int,
help="Number of chunks to process per task batch (try 50 for large single documents).",
)
def execute(self, args: argparse.Namespace) -> None: def execute(self, args: argparse.Namespace) -> None:
try: try:
@ -111,6 +116,7 @@ After successful cognify processing, use `cognee search` to query the knowledge
chunk_size=args.chunk_size, chunk_size=args.chunk_size,
ontology_file_path=args.ontology_file, ontology_file_path=args.ontology_file,
run_in_background=args.background, run_in_background=args.background,
chunks_per_batch=getattr(args, "chunks_per_batch", None),
) )
return result return result
except Exception as e: except Exception as e:

View file

@ -9,6 +9,7 @@ class CognifyConfig(BaseSettings):
classification_model: object = DefaultContentPrediction classification_model: object = DefaultContentPrediction
summarization_model: object = SummarizedContent summarization_model: object = SummarizedContent
triplet_embedding: bool = False triplet_embedding: bool = False
chunks_per_batch: Optional[int] = None
model_config = SettingsConfigDict(env_file=".env", extra="allow") model_config = SettingsConfigDict(env_file=".env", extra="allow")
def to_dict(self) -> dict: def to_dict(self) -> dict:
@ -16,6 +17,7 @@ class CognifyConfig(BaseSettings):
"classification_model": self.classification_model, "classification_model": self.classification_model,
"summarization_model": self.summarization_model, "summarization_model": self.summarization_model,
"triplet_embedding": self.triplet_embedding, "triplet_embedding": self.triplet_embedding,
"chunks_per_batch": self.chunks_per_batch,
} }

View file

@ -238,6 +238,7 @@ class TestCognifyCommand:
ontology_file_path=None, ontology_file_path=None,
chunker=TextChunker, chunker=TextChunker,
run_in_background=False, run_in_background=False,
chunks_per_batch=None,
) )
@patch("cognee.cli.commands.cognify_command.asyncio.run") @patch("cognee.cli.commands.cognify_command.asyncio.run")

View file

@ -262,6 +262,7 @@ class TestCognifyCommandEdgeCases:
ontology_file_path=None, ontology_file_path=None,
chunker=TextChunker, chunker=TextChunker,
run_in_background=False, run_in_background=False,
chunks_per_batch=None,
) )
@patch("cognee.cli.commands.cognify_command.asyncio.run", side_effect=_mock_run) @patch("cognee.cli.commands.cognify_command.asyncio.run", side_effect=_mock_run)
@ -295,6 +296,7 @@ class TestCognifyCommandEdgeCases:
ontology_file_path="/nonexistent/path/ontology.owl", ontology_file_path="/nonexistent/path/ontology.owl",
chunker=TextChunker, chunker=TextChunker,
run_in_background=False, run_in_background=False,
chunks_per_batch=None,
) )
@patch("cognee.cli.commands.cognify_command.asyncio.run") @patch("cognee.cli.commands.cognify_command.asyncio.run")
@ -373,6 +375,7 @@ class TestCognifyCommandEdgeCases:
ontology_file_path=None, ontology_file_path=None,
chunker=TextChunker, chunker=TextChunker,
run_in_background=False, run_in_background=False,
chunks_per_batch=None,
) )