Fix: avoid mixing different embedding models in document parsing (#8260)
### What problem does this PR solve? Fix mixing different embedding models in document parsing. ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue) --------- Co-authored-by: Kevin Hu <kevinhu.sh@gmail.com>
This commit is contained in:
parent
dabbc852c8
commit
0fa1a1469e
1 changed files with 5 additions and 3 deletions
|
|
@ -169,7 +169,7 @@ class TenantLLMService(CommonService):
|
||||||
return 0
|
return 0
|
||||||
|
|
||||||
llm_map = {
|
llm_map = {
|
||||||
LLMType.EMBEDDING.value: tenant.embd_id,
|
LLMType.EMBEDDING.value: tenant.embd_id if not llm_name else llm_name,
|
||||||
LLMType.SPEECH2TEXT.value: tenant.asr_id,
|
LLMType.SPEECH2TEXT.value: tenant.asr_id,
|
||||||
LLMType.IMAGE2TEXT.value: tenant.img2txt_id,
|
LLMType.IMAGE2TEXT.value: tenant.img2txt_id,
|
||||||
LLMType.CHAT.value: tenant.llm_id if not llm_name else llm_name,
|
LLMType.CHAT.value: tenant.llm_id if not llm_name else llm_name,
|
||||||
|
|
@ -235,7 +235,8 @@ class LLMBundle:
|
||||||
generation = self.trace.generation(name="encode", model=self.llm_name, input={"texts": texts})
|
generation = self.trace.generation(name="encode", model=self.llm_name, input={"texts": texts})
|
||||||
|
|
||||||
embeddings, used_tokens = self.mdl.encode(texts)
|
embeddings, used_tokens = self.mdl.encode(texts)
|
||||||
if not TenantLLMService.increase_usage(self.tenant_id, self.llm_type, used_tokens):
|
llm_name = getattr(self, "llm_name", None)
|
||||||
|
if not TenantLLMService.increase_usage(self.tenant_id, self.llm_type, used_tokens, llm_name):
|
||||||
logging.error("LLMBundle.encode can't update token usage for {}/EMBEDDING used_tokens: {}".format(self.tenant_id, used_tokens))
|
logging.error("LLMBundle.encode can't update token usage for {}/EMBEDDING used_tokens: {}".format(self.tenant_id, used_tokens))
|
||||||
|
|
||||||
if self.langfuse:
|
if self.langfuse:
|
||||||
|
|
@ -248,7 +249,8 @@ class LLMBundle:
|
||||||
generation = self.trace.generation(name="encode_queries", model=self.llm_name, input={"query": query})
|
generation = self.trace.generation(name="encode_queries", model=self.llm_name, input={"query": query})
|
||||||
|
|
||||||
emd, used_tokens = self.mdl.encode_queries(query)
|
emd, used_tokens = self.mdl.encode_queries(query)
|
||||||
if not TenantLLMService.increase_usage(self.tenant_id, self.llm_type, used_tokens):
|
llm_name = getattr(self, "llm_name", None)
|
||||||
|
if not TenantLLMService.increase_usage(self.tenant_id, self.llm_type, used_tokens, llm_name):
|
||||||
logging.error("LLMBundle.encode_queries can't update token usage for {}/EMBEDDING used_tokens: {}".format(self.tenant_id, used_tokens))
|
logging.error("LLMBundle.encode_queries can't update token usage for {}/EMBEDDING used_tokens: {}".format(self.tenant_id, used_tokens))
|
||||||
|
|
||||||
if self.langfuse:
|
if self.langfuse:
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue