Fix tuple delimiter corruption handling in regex patterns

This commit is contained in:
yangdx 2025-10-17 18:43:45 +08:00
parent 46ac5dac53
commit f555824064

View file

@ -2611,9 +2611,9 @@ def fix_tuple_delimiter_corruption(
record, record,
) )
# Fix: <X|#|> -> <|#|>, <|#|Y> -> <|#|>, <X|#|Y> -> <|#|>, <||#||> -> <|#|>, <||#> -> <|#|> (one extra characters outside pipes) # Fix: <X|#|> -> <|#|>, <|#|Y> -> <|#|>, <X|#|Y> -> <|#|>, <||#||> -> <|#|> (one extra characters outside pipes)
record = re.sub( record = re.sub(
rf"<.?\|{escaped_delimiter_core}\|*?>", rf"<.?\|{escaped_delimiter_core}\|.?>",
tuple_delimiter, tuple_delimiter,
record, record,
) )
@ -2633,7 +2633,6 @@ def fix_tuple_delimiter_corruption(
) )
# Fix: <|#| -> <|#|>, <|#|| -> <|#|> (missing closing >) # Fix: <|#| -> <|#|>, <|#|| -> <|#|> (missing closing >)
record = re.sub( record = re.sub(
rf"<\|{escaped_delimiter_core}\|+(?!>)", rf"<\|{escaped_delimiter_core}\|+(?!>)",
tuple_delimiter, tuple_delimiter,
@ -2647,6 +2646,13 @@ def fix_tuple_delimiter_corruption(
record, record,
) )
# Fix: <||#> -> <|#|> (double pipe at start, missing pipe at end)
record = re.sub(
rf"<\|+{escaped_delimiter_core}>",
tuple_delimiter,
record,
)
# Fix: <|| -> <|#|> # Fix: <|| -> <|#|>
record = re.sub( record = re.sub(
r"<\|\|(?!>)", r"<\|\|(?!>)",