Fix: tokenizer issue.

This commit is contained in:
Kevin Hu 2025-12-11 14:33:42 +08:00
parent bd0eff2954
commit 89b80c66d0
2 changed files with 17 additions and 0 deletions

View file

@ -33,6 +33,7 @@ from api.utils.web_utils import CONTENT_TYPE_MAP
from common import settings from common import settings
from common.constants import RetCode from common.constants import RetCode
@manager.route('/file/upload', methods=['POST']) # noqa: F821 @manager.route('/file/upload', methods=['POST']) # noqa: F821
@token_required @token_required
async def upload(tenant_id): async def upload(tenant_id):

View file

@ -33,6 +33,22 @@ class RagTokenizer(infinity.rag_tokenizer.RagTokenizer):
return super().fine_grained_tokenize(tks) return super().fine_grained_tokenize(tks)
def is_chinese(s):
return infinity.rag_tokenizer.is_chinese(s)
def is_number(s):
return infinity.rag_tokenizer.is_number(s)
def is_alphabet(s):
return infinity.rag_tokenizer.is_alphabet(s)
def naive_qie(txt):
return infinity.rag_tokenizer.naive_qie(txt)
tokenizer = RagTokenizer() tokenizer = RagTokenizer()
tokenize = tokenizer.tokenize tokenize = tokenizer.tokenize
fine_grained_tokenize = tokenizer.fine_grained_tokenize fine_grained_tokenize = tokenizer.fine_grained_tokenize