fix: Fixes pdf cut type None bug by introducting default cut type (#1044)
…cut type <!-- .github/pull_request_template.md --> ## Description Fixes pdf cut type None bug by introducting default cut type ## DCO Affirmation I affirm that all code in every commit of this pull request conforms to the terms of the Topoteretes Developer Certificate of Origin.
This commit is contained in:
commit
b8ea699abe
1 changed files with 4 additions and 1 deletions
|
|
@ -31,7 +31,7 @@ def chunk_by_paragraph(
|
|||
current_chunk = ""
|
||||
chunk_index = 0
|
||||
paragraph_ids = []
|
||||
last_cut_type = None
|
||||
last_cut_type = "default"
|
||||
current_chunk_size = 0
|
||||
|
||||
for paragraph_id, sentence, sentence_size, end_type in chunk_by_sentence(
|
||||
|
|
@ -77,6 +77,9 @@ def chunk_by_paragraph(
|
|||
current_chunk_size = 0
|
||||
chunk_index += 1
|
||||
|
||||
if not end_type:
|
||||
end_type = "default"
|
||||
|
||||
last_cut_type = end_type
|
||||
|
||||
# Yield any remaining text
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue