Fix tuple delimiter corruption handling in regex patterns

This commit is contained in:
yangdx 2025-10-17 18:43:45 +08:00
parent 46ac5dac53
commit f555824064

View file

@ -2611,9 +2611,9 @@ def fix_tuple_delimiter_corruption(
record,
)
# Fix: <X|#|> -> <|#|>, <|#|Y> -> <|#|>, <X|#|Y> -> <|#|>, <||#||> -> <|#|>, <||#> -> <|#|> (one extra characters outside pipes)
# Fix: <X|#|> -> <|#|>, <|#|Y> -> <|#|>, <X|#|Y> -> <|#|>, <||#||> -> <|#|> (one extra characters outside pipes)
record = re.sub(
rf"<.?\|{escaped_delimiter_core}\|*?>",
rf"<.?\|{escaped_delimiter_core}\|.?>",
tuple_delimiter,
record,
)
@ -2633,7 +2633,6 @@ def fix_tuple_delimiter_corruption(
)
# Fix: <|#| -> <|#|>, <|#|| -> <|#|> (missing closing >)
record = re.sub(
rf"<\|{escaped_delimiter_core}\|+(?!>)",
tuple_delimiter,
@ -2647,6 +2646,13 @@ def fix_tuple_delimiter_corruption(
record,
)
# Fix: <||#> -> <|#|> (double pipe at start, missing pipe at end)
record = re.sub(
rf"<\|+{escaped_delimiter_core}>",
tuple_delimiter,
record,
)
# Fix: <|| -> <|#|>
record = re.sub(
r"<\|\|(?!>)",