Fix: tokenizer issue.
This commit is contained in:
parent
bd0eff2954
commit
89b80c66d0
2 changed files with 17 additions and 0 deletions
|
|
@ -33,6 +33,7 @@ from api.utils.web_utils import CONTENT_TYPE_MAP
|
||||||
from common import settings
|
from common import settings
|
||||||
from common.constants import RetCode
|
from common.constants import RetCode
|
||||||
|
|
||||||
|
|
||||||
@manager.route('/file/upload', methods=['POST']) # noqa: F821
|
@manager.route('/file/upload', methods=['POST']) # noqa: F821
|
||||||
@token_required
|
@token_required
|
||||||
async def upload(tenant_id):
|
async def upload(tenant_id):
|
||||||
|
|
|
||||||
|
|
@ -33,6 +33,22 @@ class RagTokenizer(infinity.rag_tokenizer.RagTokenizer):
|
||||||
return super().fine_grained_tokenize(tks)
|
return super().fine_grained_tokenize(tks)
|
||||||
|
|
||||||
|
|
||||||
|
def is_chinese(s):
|
||||||
|
return infinity.rag_tokenizer.is_chinese(s)
|
||||||
|
|
||||||
|
|
||||||
|
def is_number(s):
|
||||||
|
return infinity.rag_tokenizer.is_number(s)
|
||||||
|
|
||||||
|
|
||||||
|
def is_alphabet(s):
|
||||||
|
return infinity.rag_tokenizer.is_alphabet(s)
|
||||||
|
|
||||||
|
|
||||||
|
def naive_qie(txt):
|
||||||
|
return infinity.rag_tokenizer.naive_qie(txt)
|
||||||
|
|
||||||
|
|
||||||
tokenizer = RagTokenizer()
|
tokenizer = RagTokenizer()
|
||||||
tokenize = tokenizer.tokenize
|
tokenize = tokenizer.tokenize
|
||||||
fine_grained_tokenize = tokenizer.fine_grained_tokenize
|
fine_grained_tokenize = tokenizer.fine_grained_tokenize
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue