Fix entity/relationship record parsing in extraction result processing

This commit is contained in:
yangdx 2025-09-14 05:35:01 +08:00
parent 4f5ad76c2c
commit b820d8d588

View file

@ -883,23 +883,24 @@ async def _process_extraction_result(
# Fix LLM output format error which use tuple_delimiter to seperate record instead of "\n"
fixed_records = []
for record in records:
record = record.strip()
if record is None:
continue
entity_records = split_string_by_multi_markers(
record, [f"{tuple_delimiter}entity{tuple_delimiter}"]
)
for entity_record in entity_records:
if not entity_record.startswith(
f"entity{tuple_delimiter}"
) and not entity_record.startswith(f"relationship{tuple_delimiter}"):
entity_record = f"entity{tuple_delimiter}{entity_record}"
if not entity_record.startswith("entity") and not entity_record.startswith(
"relationship"
):
entity_record = f"entity<|{entity_record}"
entity_relation_records = split_string_by_multi_markers(
entity_record, [f"{tuple_delimiter}relationship{tuple_delimiter}"]
)
for entity_relation_record in entity_relation_records:
if not entity_relation_record.startswith(
f"entity{tuple_delimiter}"
) and not entity_relation_record.startswith(
f"relationship{tuple_delimiter}"
):
"entity"
) and not entity_relation_record.startswith("relationship"):
entity_relation_record = (
f"relationship{tuple_delimiter}{entity_relation_record}"
)