Fix extraction validation and delimiter comment accuracy

• Change < to != for exact length check • Fix entity validation from 4 to exact 4 • Fix relationship validation to exact 5 • Correct delimiter comment example
2025-09-12 18:13:25 +08:00 · 2025-09-12 18:13:25 +08:00 · 9a2e8be5a7
commit 9a2e8be5a7
parent 8088b7e07a
1 changed files with 3 additions and 3 deletions
--- a/lightrag/operate.py
+++ b/lightrag/operate.py
@ -320,7 +320,7 @@ async def _handle_single_entity_extraction(
    timestamp: int,
    file_path: str = "unknown_source",
 ):
-    if len(record_attributes) < 4 or "entity" not in record_attributes[0]:
+    if len(record_attributes) != 4 or "entity" not in record_attributes[0]:
        if len(record_attributes) > 1 and "entity" in record_attributes[0]:
            logger.warning(
                f"{chunk_key}: extraction failed! only got {len(record_attributes)} feilds on entity `{record_attributes[1]}`"
@ -391,7 +391,7 @@ async def _handle_single_relationship_extraction(
    timestamp: int,
    file_path: str = "unknown_source",
 ):
-    if len(record_attributes) < 5 or "relationship" not in record_attributes[0]:
+    if len(record_attributes) != 5 or "relationship" not in record_attributes[0]:
        if len(record_attributes) > 1 and "relationship" in record_attributes[0]:
            logger.warning(
                f"{chunk_key}: extraction failed! only got {len(record_attributes)} fields on realtion `{record_attributes[1]}`"
@ -885,7 +885,7 @@ async def _process_extraction_result(
            continue

        # Fix various forms of tuple_delimiter corruption from the LLM output using the dedicated function
-        delimiter_core = tuple_delimiter[2:-2]  # Extract "SEP" from "<|SEP|>"
+        delimiter_core = tuple_delimiter[2:-2]  # Extract "S" from "<|S|>"
        record = fix_tuple_delimiter_corruption(record, delimiter_core, tuple_delimiter)
        # change delimiter_core to lower case, and fix again
        delimiter_core = delimiter_core.lower()