Fix entity/relationship record parsing in extraction result processing

This commit is contained in:
yangdx 2025-09-14 05:35:01 +08:00
parent 4f5ad76c2c
commit b820d8d588

View file

@ -883,23 +883,24 @@ async def _process_extraction_result(
# Fix LLM output format error which use tuple_delimiter to seperate record instead of "\n" # Fix LLM output format error which use tuple_delimiter to seperate record instead of "\n"
fixed_records = [] fixed_records = []
for record in records: for record in records:
record = record.strip()
if record is None:
continue
entity_records = split_string_by_multi_markers( entity_records = split_string_by_multi_markers(
record, [f"{tuple_delimiter}entity{tuple_delimiter}"] record, [f"{tuple_delimiter}entity{tuple_delimiter}"]
) )
for entity_record in entity_records: for entity_record in entity_records:
if not entity_record.startswith( if not entity_record.startswith("entity") and not entity_record.startswith(
f"entity{tuple_delimiter}" "relationship"
) and not entity_record.startswith(f"relationship{tuple_delimiter}"): ):
entity_record = f"entity{tuple_delimiter}{entity_record}" entity_record = f"entity<|{entity_record}"
entity_relation_records = split_string_by_multi_markers( entity_relation_records = split_string_by_multi_markers(
entity_record, [f"{tuple_delimiter}relationship{tuple_delimiter}"] entity_record, [f"{tuple_delimiter}relationship{tuple_delimiter}"]
) )
for entity_relation_record in entity_relation_records: for entity_relation_record in entity_relation_records:
if not entity_relation_record.startswith( if not entity_relation_record.startswith(
f"entity{tuple_delimiter}" "entity"
) and not entity_relation_record.startswith( ) and not entity_relation_record.startswith("relationship"):
f"relationship{tuple_delimiter}"
):
entity_relation_record = ( entity_relation_record = (
f"relationship{tuple_delimiter}{entity_relation_record}" f"relationship{tuple_delimiter}{entity_relation_record}"
) )