fix: Fixes pdf cut type None bug by introducting default cut type (#1044)

…cut type

<!-- .github/pull_request_template.md -->

## Description
 Fixes pdf cut type None bug by introducting default cut type

## DCO Affirmation
I affirm that all code in every commit of this pull request conforms to
the terms of the Topoteretes Developer Certificate of Origin.
This commit is contained in:
Boris 2025-07-02 10:54:13 +02:00 committed by GitHub
commit b8ea699abe
No known key found for this signature in database
GPG key ID: B5690EEEBB952194

View file

@ -31,7 +31,7 @@ def chunk_by_paragraph(
current_chunk = ""
chunk_index = 0
paragraph_ids = []
last_cut_type = None
last_cut_type = "default"
current_chunk_size = 0
for paragraph_id, sentence, sentence_size, end_type in chunk_by_sentence(
@ -77,6 +77,9 @@ def chunk_by_paragraph(
current_chunk_size = 0
chunk_index += 1
if not end_type:
end_type = "default"
last_cut_type = end_type
# Yield any remaining text