Import rag_tokenizer in function to fix ruff check issue

This commit is contained in:
Ling Qin 2025-12-02 14:28:49 +08:00
parent 44ed79a64b
commit e147fb761e

View file

@ -266,6 +266,7 @@ def is_chinese(text):
def tokenize(d, txt, eng):
from . import rag_tokenizer
d["content_with_weight"] = txt
t = re.sub(r"</?(table|td|caption|tr|th)( [^<>]{0,12})?>", " ", txt)
d["content_ltks"] = rag_tokenizer.tokenize(t)
@ -363,6 +364,7 @@ def attach_media_context(chunks, table_context_size=0, image_context_size=0):
Best-effort ordering: if positional info exists on any chunk, use it to
order chunks before collecting context; otherwise keep original order.
"""
from . import rag_tokenizer
if not chunks or (table_context_size <= 0 and image_context_size <= 0):
return chunks