Fix tuple delimiter corruption patterns with pipes and brackets
- Handle <||S||> malformed delimiters - Fix <||> empty pipe sequences - Repair <|| incomplete patterns - Process ||S|| missing brackets - Improve delimiter normalization
This commit is contained in:
parent
43f6fcea6c
commit
8a3e2c03a9
1 changed files with 23 additions and 2 deletions
|
|
@ -2591,6 +2591,13 @@ def fix_tuple_delimiter_corruption(
|
|||
# Escape the delimiter core for regex use
|
||||
escaped_delimiter_core = re.escape(delimiter_core)
|
||||
|
||||
# Fix: <||S||>
|
||||
record = re.sub(
|
||||
rf"<\|+{escaped_delimiter_core}\|+>",
|
||||
tuple_delimiter,
|
||||
record,
|
||||
)
|
||||
|
||||
# Fix: <|S||S|> -> <|S|>, <|S|||S|> -> <|S|>
|
||||
record = re.sub(
|
||||
rf"<\|{escaped_delimiter_core}\|+{escaped_delimiter_core}\|>",
|
||||
|
|
@ -2605,9 +2612,9 @@ def fix_tuple_delimiter_corruption(
|
|||
record,
|
||||
)
|
||||
|
||||
# Fix: <|> -> <|S|>
|
||||
# Fix: <|> -> <|S|>, <||> -> <|S|>
|
||||
record = re.sub(
|
||||
r"<\|>",
|
||||
r"<\|+>",
|
||||
tuple_delimiter,
|
||||
record,
|
||||
)
|
||||
|
|
@ -2640,6 +2647,13 @@ def fix_tuple_delimiter_corruption(
|
|||
record,
|
||||
)
|
||||
|
||||
# Fix: <|| -> <|S|>
|
||||
record = re.sub(
|
||||
r"<\|\|(?!>)",
|
||||
tuple_delimiter,
|
||||
record,
|
||||
)
|
||||
|
||||
# Fix: |S|> -> <|S|> (missing opening <)
|
||||
record = re.sub(
|
||||
rf"(?<!<)\|{escaped_delimiter_core}\|>",
|
||||
|
|
@ -2654,6 +2668,13 @@ def fix_tuple_delimiter_corruption(
|
|||
record,
|
||||
)
|
||||
|
||||
# Fix: ||S|| -> <|S|> (double pipes on both sides without angle brackets)
|
||||
record = re.sub(
|
||||
rf"\|\|{escaped_delimiter_core}\|\|",
|
||||
tuple_delimiter,
|
||||
record,
|
||||
)
|
||||
|
||||
return record
|
||||
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue