diff --git a/rag/nlp/__init__.py b/rag/nlp/__init__.py index 47a044669..d6c23217b 100644 --- a/rag/nlp/__init__.py +++ b/rag/nlp/__init__.py @@ -266,6 +266,7 @@ def is_chinese(text): def tokenize(d, txt, eng): + from . import rag_tokenizer d["content_with_weight"] = txt t = re.sub(r"]{0,12})?>", " ", txt) d["content_ltks"] = rag_tokenizer.tokenize(t) @@ -363,6 +364,7 @@ def attach_media_context(chunks, table_context_size=0, image_context_size=0): Best-effort ordering: if positional info exists on any chunk, use it to order chunks before collecting context; otherwise keep original order. """ + from . import rag_tokenizer if not chunks or (table_context_size <= 0 and image_context_size <= 0): return chunks