Fix LLM output instability for <|> tuple delimiter

- Replace <||> with <|>
- Replace < | > with <|>
- Apply fix in both functions
- Handle delimiter variations
- Improve parsing reliability
This commit is contained in:
yangdx 2025-09-01 01:22:27 +08:00
parent cdc4570cfe
commit 5fd7682f16

View file

@ -822,6 +822,13 @@ async def _parse_extraction_result(
maybe_nodes = defaultdict(list)
maybe_edges = defaultdict(list)
# Preventive fix: when tuple_delimiter is <|>, fix LLM output instability issues
if context_base["tuple_delimiter"] == "<|>":
# 1. Convert <||> to <|>
extraction_result = extraction_result.replace("<||>", "<|>")
# 2. Convert < | > to <|>
extraction_result = extraction_result.replace("< | >", "<|>")
# Parse the extraction result using the same logic as in extract_entities
records = split_string_by_multi_markers(
extraction_result,
@ -1729,6 +1736,13 @@ async def extract_entities(
maybe_nodes = defaultdict(list)
maybe_edges = defaultdict(list)
# Preventive fix: when tuple_delimiter is <|>, fix LLM output instability issues
if context_base["tuple_delimiter"] == "<|>":
# 1. Convert <||> to <|>
result = result.replace("<||>", "<|>")
# 2. Convert < | > to <|>
result = result.replace("< | >", "<|>")
records = split_string_by_multi_markers(
result,
[context_base["record_delimiter"], context_base["completion_delimiter"]],