Import rag_tokenizer in function to fix ruff check issue
This commit is contained in:
parent
44ed79a64b
commit
e147fb761e
1 changed files with 2 additions and 0 deletions
|
|
@ -266,6 +266,7 @@ def is_chinese(text):
|
|||
|
||||
|
||||
def tokenize(d, txt, eng):
|
||||
from . import rag_tokenizer
|
||||
d["content_with_weight"] = txt
|
||||
t = re.sub(r"</?(table|td|caption|tr|th)( [^<>]{0,12})?>", " ", txt)
|
||||
d["content_ltks"] = rag_tokenizer.tokenize(t)
|
||||
|
|
@ -363,6 +364,7 @@ def attach_media_context(chunks, table_context_size=0, image_context_size=0):
|
|||
Best-effort ordering: if positional info exists on any chunk, use it to
|
||||
order chunks before collecting context; otherwise keep original order.
|
||||
"""
|
||||
from . import rag_tokenizer
|
||||
if not chunks or (table_context_size <= 0 and image_context_size <= 0):
|
||||
return chunks
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue