From a2e59dd0781348c030d289f8e80531394c8ca9fd Mon Sep 17 00:00:00 2001 From: yangdx Date: Sat, 5 Jul 2025 12:06:34 +0800 Subject: [PATCH] fix: prevent empty entity names after normalization in extraction Added validation checks in entity and relationship extraction functions to filter out entities that become empty strings after normalize_extracted_info processing. This prevents empty labels from appearing in get_all_labels() results and maintains knowledge graph data integrity. --- lightrag/operate.py | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/lightrag/operate.py b/lightrag/operate.py index 60425148..4d8264bf 100644 --- a/lightrag/operate.py +++ b/lightrag/operate.py @@ -167,6 +167,13 @@ async def _handle_single_entity_extraction( # Normalize entity name entity_name = normalize_extracted_info(entity_name, is_entity=True) + + # Check if entity name became empty after normalization + if not entity_name or not entity_name.strip(): + logger.warning( + f"Entity extraction error: entity name became empty after normalization. Original: '{record_attributes[1]}'" + ) + return None # Clean and validate entity type entity_type = clean_str(record_attributes[2]).strip('"') @@ -209,6 +216,20 @@ async def _handle_single_relationship_extraction( # Normalize source and target entity names source = normalize_extracted_info(source, is_entity=True) target = normalize_extracted_info(target, is_entity=True) + + # Check if source or target became empty after normalization + if not source or not source.strip(): + logger.warning( + f"Relationship extraction error: source entity became empty after normalization. Original: '{record_attributes[1]}'" + ) + return None + + if not target or not target.strip(): + logger.warning( + f"Relationship extraction error: target entity became empty after normalization. Original: '{record_attributes[2]}'" + ) + return None + if source == target: logger.debug( f"Relationship source and target are the same in: {record_attributes}"