diff --git a/lightrag/utils.py b/lightrag/utils.py index 83a3c394..94f1ff27 100644 --- a/lightrag/utils.py +++ b/lightrag/utils.py @@ -2611,9 +2611,9 @@ def fix_tuple_delimiter_corruption( record, ) - # Fix: -> <|#|>, <|#|Y> -> <|#|>, -> <|#|>, <||#||> -> <|#|>, <||#> -> <|#|> (one extra characters outside pipes) + # Fix: -> <|#|>, <|#|Y> -> <|#|>, -> <|#|>, <||#||> -> <|#|> (one extra characters outside pipes) record = re.sub( - rf"<.?\|{escaped_delimiter_core}\|*?>", + rf"<.?\|{escaped_delimiter_core}\|.?>", tuple_delimiter, record, ) @@ -2633,7 +2633,6 @@ def fix_tuple_delimiter_corruption( ) # Fix: <|#| -> <|#|>, <|#|| -> <|#|> (missing closing >) - record = re.sub( rf"<\|{escaped_delimiter_core}\|+(?!>)", tuple_delimiter, @@ -2647,6 +2646,13 @@ def fix_tuple_delimiter_corruption( record, ) + # Fix: <||#> -> <|#|> (double pipe at start, missing pipe at end) + record = re.sub( + rf"<\|+{escaped_delimiter_core}>", + tuple_delimiter, + record, + ) + # Fix: <|| -> <|#|> record = re.sub( r"<\|\|(?!>)",