Fix tuple delimiter regex patterns and add debug logging
- Add debug logs for malformed records - Fix regex for consecutive delimiters - Handle missing closing brackets
This commit is contained in:
parent
3792f86de3
commit
4dafec8884
2 changed files with 9 additions and 6 deletions
|
|
@ -325,6 +325,7 @@ async def _handle_single_entity_extraction(
|
|||
logger.warning(
|
||||
f"{chunk_key}: LLM output format error; found {len(record_attributes)}/4 feilds on ENTITY `{record_attributes[1]}` @ `{record_attributes[2] if len(record_attributes) > 2 else 'N/A'}`"
|
||||
)
|
||||
logger.debug(record_attributes)
|
||||
return None
|
||||
|
||||
try:
|
||||
|
|
@ -398,6 +399,7 @@ async def _handle_single_relationship_extraction(
|
|||
logger.warning(
|
||||
f"{chunk_key}: LLM output format error; found {len(record_attributes)}/5 fields on REALTION `{record_attributes[1]}`~`{record_attributes[2] if len(record_attributes) >2 else 'N/A'}`"
|
||||
)
|
||||
logger.debug(record_attributes)
|
||||
return None
|
||||
|
||||
try:
|
||||
|
|
|
|||
|
|
@ -2583,9 +2583,9 @@ def fix_tuple_delimiter_corruption(
|
|||
# Escape the delimiter core for regex use
|
||||
escaped_delimiter_core = re.escape(delimiter_core)
|
||||
|
||||
# Fix: <|#||#|> -> <|#|>, <|#|||#|> -> <|#|>
|
||||
# Fix: <|##|> -> <|#|>, <|#||#|> -> <|#|>, <|#|||#|> -> <|#|>
|
||||
record = re.sub(
|
||||
rf"<\|{escaped_delimiter_core}\|+{escaped_delimiter_core}\|>",
|
||||
rf"<\|{escaped_delimiter_core}\|*?{escaped_delimiter_core}\|>",
|
||||
tuple_delimiter,
|
||||
record,
|
||||
)
|
||||
|
|
@ -2604,9 +2604,9 @@ def fix_tuple_delimiter_corruption(
|
|||
record,
|
||||
)
|
||||
|
||||
# Fix: <X|#|> -> <|#|>, <|#|Y> -> <|#|>, <X|#|Y> -> <|#|>, <||#||> -> <|#|> (one extra characters outside pipes)
|
||||
# Fix: <X|#|> -> <|#|>, <|#|Y> -> <|#|>, <X|#|Y> -> <|#|>, <||#||> -> <|#|>, <||#> -> <|#|> (one extra characters outside pipes)
|
||||
record = re.sub(
|
||||
rf"<.?\|{escaped_delimiter_core}\|.?>",
|
||||
rf"<.?\|{escaped_delimiter_core}\|*?>",
|
||||
tuple_delimiter,
|
||||
record,
|
||||
)
|
||||
|
|
@ -2625,9 +2625,10 @@ def fix_tuple_delimiter_corruption(
|
|||
record,
|
||||
)
|
||||
|
||||
# Fix: <|#| -> <|#|> (missing closing >)
|
||||
# Fix: <|#| -> <|#|>, <|#|| -> <|#|> (missing closing >)
|
||||
#
|
||||
record = re.sub(
|
||||
rf"<\|{escaped_delimiter_core}\|(?!>)",
|
||||
rf"<\|{escaped_delimiter_core}\|+(?!>)",
|
||||
tuple_delimiter,
|
||||
record,
|
||||
)
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue