Fix chunk_index bug in chunk_by_paragraph

This commit is contained in:
Leon Luithlen 2024-11-14 13:50:40 +01:00
parent f2206a09c0
commit 6721eaee83

View file

@ -53,9 +53,9 @@ def chunk_by_paragraph(data: str, paragraph_length: int = 1024, batch_paragraphs
"cut_type": end_type
}
yield chunk_dict
chunk_index += 1
current_chunk = ""
current_word_count = 0
chunk_index = 0
last_cut_type = end_type
last_paragraph_id = paragraph_id