diff --git a/agent/canvas.py b/agent/canvas.py index 2dfeb63e6..c8b6f9e38 100644 --- a/agent/canvas.py +++ b/agent/canvas.py @@ -13,7 +13,6 @@ # See the License for the specific language governing permissions and # limitations under the License. # -import base64 import json import logging import re diff --git a/rag/nlp/__init__.py b/rag/nlp/__init__.py index cc679bc53..334c63645 100644 --- a/rag/nlp/__init__.py +++ b/rag/nlp/__init__.py @@ -266,7 +266,7 @@ def is_chinese(text): def tokenize(d, txt, eng): d["content_with_weight"] = txt - t = re.sub(r"]{0,12})?>", " ", t) + t = re.sub(r"]{0,12})?>", " ", txt) d["content_ltks"] = rag_tokenizer.tokenize(t) d["content_sm_ltks"] = rag_tokenizer.fine_grained_tokenize(d["content_ltks"])