From fb13a1b61a42c6b02ad85e70644c73aef722c1d7 Mon Sep 17 00:00:00 2001 From: Rita Aleksziev Date: Tue, 7 Jan 2025 15:00:58 +0100 Subject: [PATCH] Handle azure models as well --- cognee/tasks/chunks/chunk_by_paragraph.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/cognee/tasks/chunks/chunk_by_paragraph.py b/cognee/tasks/chunks/chunk_by_paragraph.py index 2bbd9689f..b3c191e29 100644 --- a/cognee/tasks/chunks/chunk_by_paragraph.py +++ b/cognee/tasks/chunks/chunk_by_paragraph.py @@ -29,6 +29,8 @@ def chunk_by_paragraph( for paragraph_id, sentence, word_count, end_type in chunk_by_sentence(data, maximum_length=paragraph_length): # Check if this sentence would exceed length limit if embedding_model: + if embedding_model.startswith("azure/"): + embedding_model = embedding_model.split("/")[-1] tokenizer = tiktoken.encoding_for_model(embedding_model) token_count = len(tokenizer.encode(sentence)) else: