Import rag_tokenizer in function to fix ruff check issue

2025-12-02 14:28:49 +08:00 · 2025-12-02 14:28:49 +08:00 · e147fb761e
commit e147fb761e
parent 44ed79a64b
1 changed files with 2 additions and 0 deletions
--- a/rag/nlp/init.py
+++ b/rag/nlp/init.py
@ -266,6 +266,7 @@ def is_chinese(text):


 def tokenize(d, txt, eng):
+    from . import rag_tokenizer
    d["content_with_weight"] = txt
    t = re.sub(r"</?(table|td|caption|tr|th)( [^<>]{0,12})?>", " ", txt)
    d["content_ltks"] = rag_tokenizer.tokenize(t)
@ -363,6 +364,7 @@ def attach_media_context(chunks, table_context_size=0, image_context_size=0):
    Best-effort ordering: if positional info exists on any chunk, use it to
    order chunks before collecting context; otherwise keep original order.
    """
+    from . import rag_tokenizer
    if not chunks or (table_context_size <= 0 and image_context_size <= 0):
        return chunks