Fix LLM output format errors in extraction result processing
- Handle tuple_delimiter as record separator - Add format validation and correction - Add warning for format errors
This commit is contained in:
parent
419f4f0268
commit
cddd81a86c
1 changed files with 23 additions and 0 deletions
|
|
@ -874,12 +874,35 @@ async def _process_extraction_result(
|
||||||
f"{chunk_key}: Complete delimiter can not be found in extraction result"
|
f"{chunk_key}: Complete delimiter can not be found in extraction result"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# Split LLL output result to records by "\n"
|
||||||
records = split_string_by_multi_markers(
|
records = split_string_by_multi_markers(
|
||||||
result,
|
result,
|
||||||
["\n", completion_delimiter],
|
["\n", completion_delimiter],
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# Fix LLM output format error which use tuple_delimiter to seperate record instead of "\n"
|
||||||
|
fixed_records = []
|
||||||
for record in records:
|
for record in records:
|
||||||
|
entity_records = split_string_by_multi_markers(
|
||||||
|
record, [f"{tuple_delimiter}entity{tuple_delimiter}"]
|
||||||
|
)
|
||||||
|
for entity_record in entity_records:
|
||||||
|
if not entity_record.startswith(f"entity{tuple_delimiter}") and not entity_record.startswith(f"relationship{tuple_delimiter}"):
|
||||||
|
entity_record = f"entity{tuple_delimiter}{entity_record}"
|
||||||
|
entity_relation_records = split_string_by_multi_markers(
|
||||||
|
entity_record, [f"{tuple_delimiter}relationship{tuple_delimiter}"]
|
||||||
|
)
|
||||||
|
for entity_relation_record in entity_relation_records:
|
||||||
|
if not entity_relation_record.startswith(f"entity{tuple_delimiter}") and not entity_relation_record.startswith(f"relationship{tuple_delimiter}"):
|
||||||
|
entity_relation_record = f"relationship{tuple_delimiter}{entity_relation_record}"
|
||||||
|
fixed_records = fixed_records + [entity_relation_record]
|
||||||
|
|
||||||
|
if len(fixed_records) != len(records):
|
||||||
|
logger.warning(
|
||||||
|
f"{chunk_key}: LLM output format error; find LLM use {tuple_delimiter} as record seperators instead new-line"
|
||||||
|
)
|
||||||
|
|
||||||
|
for record in fixed_records:
|
||||||
record = record.strip()
|
record = record.strip()
|
||||||
if record is None:
|
if record is None:
|
||||||
continue
|
continue
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue