Fix LLM output instability for <|> tuple delimiter

- Replace <||> with <|> - Replace < | > with <|> - Apply fix in both functions - Handle delimiter variations - Improve parsing reliability
2025-09-01 01:22:27 +08:00 · 2025-09-01 01:22:27 +08:00 · 5fd7682f16
commit 5fd7682f16
parent cdc4570cfe
1 changed files with 14 additions and 0 deletions
--- a/lightrag/operate.py
+++ b/lightrag/operate.py
@ -822,6 +822,13 @@ async def _parse_extraction_result(
    maybe_nodes = defaultdict(list)
    maybe_edges = defaultdict(list)

+    # Preventive fix: when tuple_delimiter is <|>, fix LLM output instability issues
+    if context_base["tuple_delimiter"] == "<|>":
+        # 1. Convert <||> to <|>
+        extraction_result = extraction_result.replace("<||>", "<|>")
+        # 2. Convert < | > to <|>
+        extraction_result = extraction_result.replace("< | >", "<|>")
+
    # Parse the extraction result using the same logic as in extract_entities
    records = split_string_by_multi_markers(
        extraction_result,
@ -1729,6 +1736,13 @@ async def extract_entities(
        maybe_nodes = defaultdict(list)
        maybe_edges = defaultdict(list)

+        # Preventive fix: when tuple_delimiter is <|>, fix LLM output instability issues
+        if context_base["tuple_delimiter"] == "<|>":
+            # 1. Convert <||> to <|>
+            result = result.replace("<||>", "<|>")
+            # 2. Convert < | > to <|>
+            result = result.replace("< | >", "<|>")
+
        records = split_string_by_multi_markers(
            result,
            [context_base["record_delimiter"], context_base["completion_delimiter"]],