diff --git a/lightrag/utils.py b/lightrag/utils.py index 07c03e43..163fb013 100644 --- a/lightrag/utils.py +++ b/lightrag/utils.py @@ -2591,6 +2591,13 @@ def fix_tuple_delimiter_corruption( # Escape the delimiter core for regex use escaped_delimiter_core = re.escape(delimiter_core) + # Fix: <||S||> + record = re.sub( + rf"<\|+{escaped_delimiter_core}\|+>", + tuple_delimiter, + record, + ) + # Fix: <|S||S|> -> <|S|>, <|S|||S|> -> <|S|> record = re.sub( rf"<\|{escaped_delimiter_core}\|+{escaped_delimiter_core}\|>", @@ -2605,9 +2612,9 @@ def fix_tuple_delimiter_corruption( record, ) - # Fix: <|> -> <|S|> + # Fix: <|> -> <|S|>, <||> -> <|S|> record = re.sub( - r"<\|>", + r"<\|+>", tuple_delimiter, record, ) @@ -2640,6 +2647,13 @@ def fix_tuple_delimiter_corruption( record, ) + # Fix: <|| -> <|S|> + record = re.sub( + r"<\|\|(?!>)", + tuple_delimiter, + record, + ) + # Fix: |S|> -> <|S|> (missing opening <) record = re.sub( rf"(?", @@ -2654,6 +2668,13 @@ def fix_tuple_delimiter_corruption( record, ) + # Fix: ||S|| -> <|S|> (double pipes on both sides without angle brackets) + record = re.sub( + rf"\|\|{escaped_delimiter_core}\|\|", + tuple_delimiter, + record, + ) + return record