Return sentence_cut instead of word in chunk_by_paragraph
This commit is contained in:
parent
8b681529b1
commit
d6a6a9eaba
2 changed files with 2 additions and 2 deletions
|
|
@ -67,7 +67,7 @@ def chunk_by_paragraph(data: str, paragraph_length: int = 1024, batch_paragraphs
|
|||
"chunk_id": uuid5(NAMESPACE_OID, current_chunk),
|
||||
"paragraph_ids": paragraph_ids,
|
||||
"chunk_index": chunk_index,
|
||||
"cut_type": last_cut_type
|
||||
"cut_type": "sentence_cut" if last_cut_type == "word" else last_cut_type
|
||||
}
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -32,7 +32,7 @@ GROUND_TRUTH = {
|
|||
{
|
||||
"text": "\nThird paragraph is cut and is missing the dot at the end",
|
||||
"word_count": 12,
|
||||
"cut_type": "word",
|
||||
"cut_type": "sentence_cut",
|
||||
},
|
||||
],
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue