Handle azure models as well
This commit is contained in:
parent
a774191ed3
commit
fb13a1b61a
1 changed files with 2 additions and 0 deletions
|
|
@ -29,6 +29,8 @@ def chunk_by_paragraph(
|
||||||
for paragraph_id, sentence, word_count, end_type in chunk_by_sentence(data, maximum_length=paragraph_length):
|
for paragraph_id, sentence, word_count, end_type in chunk_by_sentence(data, maximum_length=paragraph_length):
|
||||||
# Check if this sentence would exceed length limit
|
# Check if this sentence would exceed length limit
|
||||||
if embedding_model:
|
if embedding_model:
|
||||||
|
if embedding_model.startswith("azure/"):
|
||||||
|
embedding_model = embedding_model.split("/")[-1]
|
||||||
tokenizer = tiktoken.encoding_for_model(embedding_model)
|
tokenizer = tiktoken.encoding_for_model(embedding_model)
|
||||||
token_count = len(tokenizer.encode(sentence))
|
token_count = len(tokenizer.encode(sentence))
|
||||||
else:
|
else:
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue