From 8a3e2c03a927cdd3afa7041f5189ecb398b13c46 Mon Sep 17 00:00:00 2001 From: yangdx Date: Fri, 12 Sep 2025 17:45:32 +0800 Subject: [PATCH] Fix tuple delimiter corruption patterns with pipes and brackets - Handle <||S||> malformed delimiters - Fix <||> empty pipe sequences - Repair <|| incomplete patterns - Process ||S|| missing brackets - Improve delimiter normalization --- lightrag/utils.py | 25 +++++++++++++++++++++++-- 1 file changed, 23 insertions(+), 2 deletions(-) diff --git a/lightrag/utils.py b/lightrag/utils.py index 07c03e43..163fb013 100644 --- a/lightrag/utils.py +++ b/lightrag/utils.py @@ -2591,6 +2591,13 @@ def fix_tuple_delimiter_corruption( # Escape the delimiter core for regex use escaped_delimiter_core = re.escape(delimiter_core) + # Fix: <||S||> + record = re.sub( + rf"<\|+{escaped_delimiter_core}\|+>", + tuple_delimiter, + record, + ) + # Fix: <|S||S|> -> <|S|>, <|S|||S|> -> <|S|> record = re.sub( rf"<\|{escaped_delimiter_core}\|+{escaped_delimiter_core}\|>", @@ -2605,9 +2612,9 @@ def fix_tuple_delimiter_corruption( record, ) - # Fix: <|> -> <|S|> + # Fix: <|> -> <|S|>, <||> -> <|S|> record = re.sub( - r"<\|>", + r"<\|+>", tuple_delimiter, record, ) @@ -2640,6 +2647,13 @@ def fix_tuple_delimiter_corruption( record, ) + # Fix: <|| -> <|S|> + record = re.sub( + r"<\|\|(?!>)", + tuple_delimiter, + record, + ) + # Fix: |S|> -> <|S|> (missing opening <) record = re.sub( rf"(?", @@ -2654,6 +2668,13 @@ def fix_tuple_delimiter_corruption( record, ) + # Fix: ||S|| -> <|S|> (double pipes on both sides without angle brackets) + record = re.sub( + rf"\|\|{escaped_delimiter_core}\|\|", + tuple_delimiter, + record, + ) + return record