remove delimiter for naive parser

This commit is contained in:
Kevin Hu 2024-03-04 17:07:17 +08:00
parent 0cc0a91cbf
commit 4a053b8c08

View file

@ -246,7 +246,7 @@ def naive_merge(sections, chunk_token_num=128, delimiter="\n。"):
tk_nums[-1] += tnum tk_nums[-1] += tnum
for sec, pos in sections: for sec, pos in sections:
add_chunk(sec[s: e], pos) add_chunk(sec, pos)
continue continue
s, e = 0, 1 s, e = 0, 1
while e < len(sec): while e < len(sec):