From a2e59dd0781348c030d289f8e80531394c8ca9fd Mon Sep 17 00:00:00 2001 From: yangdx Date: Sat, 5 Jul 2025 12:06:34 +0800 Subject: [PATCH 1/2] fix: prevent empty entity names after normalization in extraction Added validation checks in entity and relationship extraction functions to filter out entities that become empty strings after normalize_extracted_info processing. This prevents empty labels from appearing in get_all_labels() results and maintains knowledge graph data integrity. --- lightrag/operate.py | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/lightrag/operate.py b/lightrag/operate.py index 60425148..4d8264bf 100644 --- a/lightrag/operate.py +++ b/lightrag/operate.py @@ -167,6 +167,13 @@ async def _handle_single_entity_extraction( # Normalize entity name entity_name = normalize_extracted_info(entity_name, is_entity=True) + + # Check if entity name became empty after normalization + if not entity_name or not entity_name.strip(): + logger.warning( + f"Entity extraction error: entity name became empty after normalization. Original: '{record_attributes[1]}'" + ) + return None # Clean and validate entity type entity_type = clean_str(record_attributes[2]).strip('"') @@ -209,6 +216,20 @@ async def _handle_single_relationship_extraction( # Normalize source and target entity names source = normalize_extracted_info(source, is_entity=True) target = normalize_extracted_info(target, is_entity=True) + + # Check if source or target became empty after normalization + if not source or not source.strip(): + logger.warning( + f"Relationship extraction error: source entity became empty after normalization. Original: '{record_attributes[1]}'" + ) + return None + + if not target or not target.strip(): + logger.warning( + f"Relationship extraction error: target entity became empty after normalization. Original: '{record_attributes[2]}'" + ) + return None + if source == target: logger.debug( f"Relationship source and target are the same in: {record_attributes}" From fe134752341cd50b47a1cdd7b81896fabc7e4ed7 Mon Sep 17 00:00:00 2001 From: yangdx Date: Sat, 5 Jul 2025 12:07:37 +0800 Subject: [PATCH 2/2] Fix linting --- lightrag/operate.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/lightrag/operate.py b/lightrag/operate.py index 4d8264bf..88837435 100644 --- a/lightrag/operate.py +++ b/lightrag/operate.py @@ -167,7 +167,7 @@ async def _handle_single_entity_extraction( # Normalize entity name entity_name = normalize_extracted_info(entity_name, is_entity=True) - + # Check if entity name became empty after normalization if not entity_name or not entity_name.strip(): logger.warning( @@ -216,20 +216,20 @@ async def _handle_single_relationship_extraction( # Normalize source and target entity names source = normalize_extracted_info(source, is_entity=True) target = normalize_extracted_info(target, is_entity=True) - + # Check if source or target became empty after normalization if not source or not source.strip(): logger.warning( f"Relationship extraction error: source entity became empty after normalization. Original: '{record_attributes[1]}'" ) return None - + if not target or not target.strip(): logger.warning( f"Relationship extraction error: target entity became empty after normalization. Original: '{record_attributes[2]}'" ) return None - + if source == target: logger.debug( f"Relationship source and target are the same in: {record_attributes}"