Remove space between chinese chars and Egnlish symbols
This commit is contained in:
parent
ff65cba544
commit
17f5439952
1 changed files with 3 additions and 3 deletions
|
|
@ -1448,9 +1448,9 @@ def normalize_extracted_info(name: str, is_entity=False) -> str:
|
|||
# (?=[\u4e00-\u9fa5]): Positive lookahead for Chinese character
|
||||
name = re.sub(r"(?<=[\u4e00-\u9fa5])\s+(?=[\u4e00-\u9fa5])", "", name)
|
||||
|
||||
# Remove spaces between Chinese and English/numbers
|
||||
name = re.sub(r"(?<=[\u4e00-\u9fa5])\s+(?=[a-zA-Z0-9])", "", name)
|
||||
name = re.sub(r"(?<=[a-zA-Z0-9])\s+(?=[\u4e00-\u9fa5])", "", name)
|
||||
# Remove spaces between Chinese and English/numbers/symbols
|
||||
name = re.sub(r"(?<=[\u4e00-\u9fa5])\s+(?=[a-zA-Z0-9\(\)\[\]@#$%!&\*\-=+_])", "", name)
|
||||
name = re.sub(r"(?<=[a-zA-Z0-9\(\)\[\]@#$%!&\*\-=+_])\s+(?=[\u4e00-\u9fa5])", "", name)
|
||||
|
||||
# Remove English quotation marks from the beginning and end
|
||||
if len(name) >= 2 and name.startswith('"') and name.endswith('"'):
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue