Fix tuple delimiter corruption patterns and add missing edge cases
This commit is contained in:
parent
8660bf34e4
commit
c07bcbff44
1 changed files with 16 additions and 2 deletions
|
|
@ -2591,6 +2591,13 @@ def fix_tuple_delimiter_corruption(
|
||||||
# Escape the delimiter core for regex use
|
# Escape the delimiter core for regex use
|
||||||
escaped_delimiter_core = re.escape(delimiter_core)
|
escaped_delimiter_core = re.escape(delimiter_core)
|
||||||
|
|
||||||
|
# Fix: <|SEP||SEP|> -> <|SEP|>, <|SEP|||SEP|> -> <|SEP|>
|
||||||
|
record = re.sub(
|
||||||
|
rf"<\|{escaped_delimiter_core}\|+{escaped_delimiter_core}\|>",
|
||||||
|
tuple_delimiter,
|
||||||
|
record,
|
||||||
|
)
|
||||||
|
|
||||||
# Fix: <X|SEP|> -> <|SEP|>, <|SEP|Y> -> <|SEP|>, <X|SEP|Y> -> <|SEP|> (one extra characters outside pipes)
|
# Fix: <X|SEP|> -> <|SEP|>, <|SEP|Y> -> <|SEP|>, <X|SEP|Y> -> <|SEP|> (one extra characters outside pipes)
|
||||||
record = re.sub(
|
record = re.sub(
|
||||||
rf"<.?\|{escaped_delimiter_core}\|.?>",
|
rf"<.?\|{escaped_delimiter_core}\|.?>",
|
||||||
|
|
@ -2612,6 +2619,13 @@ def fix_tuple_delimiter_corruption(
|
||||||
record,
|
record,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# Fix: <|SEP| -> <|SEP|> (missing closing >)
|
||||||
|
record = re.sub(
|
||||||
|
rf"<\|{escaped_delimiter_core}\|(?!>)",
|
||||||
|
tuple_delimiter,
|
||||||
|
record,
|
||||||
|
)
|
||||||
|
|
||||||
# Fix: |SEP|> -> <|SEP|> (missing opening <)
|
# Fix: |SEP|> -> <|SEP|> (missing opening <)
|
||||||
record = re.sub(
|
record = re.sub(
|
||||||
rf"(?<!<)\|{escaped_delimiter_core}\|>",
|
rf"(?<!<)\|{escaped_delimiter_core}\|>",
|
||||||
|
|
@ -2619,9 +2633,9 @@ def fix_tuple_delimiter_corruption(
|
||||||
record,
|
record,
|
||||||
)
|
)
|
||||||
|
|
||||||
# Fix: <|SEP| -> <|SEP|> (missing closing >)
|
# Fix: <|SEP|>| -> <|SEP|> ( this is a fix of: <|SEP|| -> <|SEP|> )
|
||||||
record = re.sub(
|
record = re.sub(
|
||||||
rf"<\|{escaped_delimiter_core}\|(?!>)",
|
rf"<\|{escaped_delimiter_core}\|>\|",
|
||||||
tuple_delimiter,
|
tuple_delimiter,
|
||||||
record,
|
record,
|
||||||
)
|
)
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue