From 9a2e8be5a75dfefc93e894e503818bb042becfe1 Mon Sep 17 00:00:00 2001 From: yangdx Date: Fri, 12 Sep 2025 18:13:25 +0800 Subject: [PATCH] Fix extraction validation and delimiter comment accuracy MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit • Change < to != for exact length check • Fix entity validation from 4 to exact 4 • Fix relationship validation to exact 5 • Correct delimiter comment example --- lightrag/operate.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/lightrag/operate.py b/lightrag/operate.py index ec97a461..bdc5bbd2 100644 --- a/lightrag/operate.py +++ b/lightrag/operate.py @@ -320,7 +320,7 @@ async def _handle_single_entity_extraction( timestamp: int, file_path: str = "unknown_source", ): - if len(record_attributes) < 4 or "entity" not in record_attributes[0]: + if len(record_attributes) != 4 or "entity" not in record_attributes[0]: if len(record_attributes) > 1 and "entity" in record_attributes[0]: logger.warning( f"{chunk_key}: extraction failed! only got {len(record_attributes)} feilds on entity `{record_attributes[1]}`" @@ -391,7 +391,7 @@ async def _handle_single_relationship_extraction( timestamp: int, file_path: str = "unknown_source", ): - if len(record_attributes) < 5 or "relationship" not in record_attributes[0]: + if len(record_attributes) != 5 or "relationship" not in record_attributes[0]: if len(record_attributes) > 1 and "relationship" in record_attributes[0]: logger.warning( f"{chunk_key}: extraction failed! only got {len(record_attributes)} fields on realtion `{record_attributes[1]}`" @@ -885,7 +885,7 @@ async def _process_extraction_result( continue # Fix various forms of tuple_delimiter corruption from the LLM output using the dedicated function - delimiter_core = tuple_delimiter[2:-2] # Extract "SEP" from "<|SEP|>" + delimiter_core = tuple_delimiter[2:-2] # Extract "S" from "<|S|>" record = fix_tuple_delimiter_corruption(record, delimiter_core, tuple_delimiter) # change delimiter_core to lower case, and fix again delimiter_core = delimiter_core.lower()