From fd48afdb0016fae126bf1ad58cc86fc7f9b77ca2 Mon Sep 17 00:00:00 2001 From: yangdx Date: Sun, 14 Sep 2025 11:43:35 +0800 Subject: [PATCH 1/3] Use "relation" instead of "relationship" in extration prompt, and support both format for safty --- lightrag/operate.py | 23 +++++++++++++++-------- lightrag/prompt.py | 30 +++++++++++++++--------------- 2 files changed, 30 insertions(+), 23 deletions(-) diff --git a/lightrag/operate.py b/lightrag/operate.py index 61a095ac..6a8d446a 100644 --- a/lightrag/operate.py +++ b/lightrag/operate.py @@ -323,7 +323,7 @@ async def _handle_single_entity_extraction( if len(record_attributes) != 4 or "entity" not in record_attributes[0]: if len(record_attributes) > 1 and "entity" in record_attributes[0]: logger.warning( - f"{chunk_key}: LLM output format error; found {len(record_attributes)}/4 feilds on ENTITY `{record_attributes[1]}`" + f"{chunk_key}: LLM output format error; found {len(record_attributes)}/4 feilds on ENTITY `{record_attributes[1]}` of type {record_attributes[2] if len(record_attributes) > 2 else 'N/A'}" ) return None @@ -391,10 +391,12 @@ async def _handle_single_relationship_extraction( timestamp: int, file_path: str = "unknown_source", ): - if len(record_attributes) != 5 or "relationship" not in record_attributes[0]: - if len(record_attributes) > 1 and "relationship" in record_attributes[0]: + if ( + len(record_attributes) != 5 or "relation" not in record_attributes[0] + ): # treat "relationship" and "relation" interchangeable + if len(record_attributes) > 1 and "relation" in record_attributes[0]: logger.warning( - f"{chunk_key}: LLM output format error; found {len(record_attributes)}/5 fields on REALTION `{record_attributes[1]}`" + f"{chunk_key}: LLM output format error; found {len(record_attributes)}/5 fields on REALTION `{record_attributes[1]}`~`{record_attributes[2] if len(record_attributes) >2 else 'N/A'}`" ) return None @@ -891,18 +893,23 @@ async def _process_extraction_result( ) for entity_record in entity_records: if not entity_record.startswith("entity") and not entity_record.startswith( - "relationship" + "relation" ): entity_record = f"entity<|{entity_record}" entity_relation_records = split_string_by_multi_markers( - entity_record, [f"{tuple_delimiter}relationship{tuple_delimiter}"] + # treat "relationship" and "relation" interchangeable + entity_record, + [ + f"{tuple_delimiter}relationship{tuple_delimiter}", + f"{tuple_delimiter}relation{tuple_delimiter}", + ], ) for entity_relation_record in entity_relation_records: if not entity_relation_record.startswith( "entity" - ) and not entity_relation_record.startswith("relationship"): + ) and not entity_relation_record.startswith("relation"): entity_relation_record = ( - f"relationship{tuple_delimiter}{entity_relation_record}" + f"relation{tuple_delimiter}{entity_relation_record}" ) fixed_records = fixed_records + [entity_relation_record] diff --git a/lightrag/prompt.py b/lightrag/prompt.py index b36e8fb2..31318b40 100644 --- a/lightrag/prompt.py +++ b/lightrag/prompt.py @@ -32,8 +32,8 @@ You are a Knowledge Graph Specialist responsible for extracting entities and rel * `target_entity`: The name of the target entity. Ensure **consistent naming** with entity extraction. Capitalize the first letter of each significant word (title case) if the name is case-insensitive. * `relationship_keywords`: One or more high-level keywords summarizing the overarching nature, concepts, or themes of the relationship. Multiple keywords within this field must be separated by a comma `,`. **DO NOT use `{tuple_delimiter}` for separating multiple keywords within this field.** * `relationship_description`: A concise explanation of the nature of the relationship between the source and target entities, providing a clear rationale for their connection. - * **Output Format - Relationships:** Output a total of 5 fields for each relationship, delimited by `{tuple_delimiter}`, on a single line. The first field *must* be the literal string `relationship`. - * Format: `relationship{tuple_delimiter}source_entity{tuple_delimiter}target_entity{tuple_delimiter}relationship_keywords{tuple_delimiter}relationship_description` + * **Output Format - Relationships:** Output a total of 5 fields for each relationship, delimited by `{tuple_delimiter}`, on a single line. The first field *must* be the literal string `relation`. + * Format: `relation{tuple_delimiter}source_entity{tuple_delimiter}target_entity{tuple_delimiter}relationship_keywords{tuple_delimiter}relationship_description` 3. **Delimiter Usage Protocol:** * The `{tuple_delimiter}` is a complete, atomic marker and **must not be filled with content**. It serves strictly as a field separator. @@ -113,11 +113,11 @@ entity{tuple_delimiter}Taylor{tuple_delimiter}person{tuple_delimiter}Taylor is p entity{tuple_delimiter}Jordan{tuple_delimiter}person{tuple_delimiter}Jordan shares a commitment to discovery and has a significant interaction with Taylor regarding a device. entity{tuple_delimiter}Cruz{tuple_delimiter}person{tuple_delimiter}Cruz is associated with a vision of control and order, influencing the dynamics among other characters. entity{tuple_delimiter}The Device{tuple_delimiter}equiment{tuple_delimiter}The Device is central to the story, with potential game-changing implications, and is revered by Taylor. -relationship{tuple_delimiter}Alex{tuple_delimiter}Taylor{tuple_delimiter}power dynamics, observation{tuple_delimiter}Alex observes Taylor's authoritarian behavior and notes changes in Taylor's attitude toward the device. -relationship{tuple_delimiter}Alex{tuple_delimiter}Jordan{tuple_delimiter}shared goals, rebellion{tuple_delimiter}Alex and Jordan share a commitment to discovery, which contrasts with Cruz's vision.) -relationship{tuple_delimiter}Taylor{tuple_delimiter}Jordan{tuple_delimiter}conflict resolution, mutual respect{tuple_delimiter}Taylor and Jordan interact directly regarding the device, leading to a moment of mutual respect and an uneasy truce. -relationship{tuple_delimiter}Jordan{tuple_delimiter}Cruz{tuple_delimiter}ideological conflict, rebellion{tuple_delimiter}Jordan's commitment to discovery is in rebellion against Cruz's vision of control and order. -relationship{tuple_delimiter}Taylor{tuple_delimiter}The Device{tuple_delimiter}reverence, technological significance{tuple_delimiter}Taylor shows reverence towards the device, indicating its importance and potential impact. +relation{tuple_delimiter}Alex{tuple_delimiter}Taylor{tuple_delimiter}power dynamics, observation{tuple_delimiter}Alex observes Taylor's authoritarian behavior and notes changes in Taylor's attitude toward the device. +relation{tuple_delimiter}Alex{tuple_delimiter}Jordan{tuple_delimiter}shared goals, rebellion{tuple_delimiter}Alex and Jordan share a commitment to discovery, which contrasts with Cruz's vision.) +relation{tuple_delimiter}Taylor{tuple_delimiter}Jordan{tuple_delimiter}conflict resolution, mutual respect{tuple_delimiter}Taylor and Jordan interact directly regarding the device, leading to a moment of mutual respect and an uneasy truce. +relation{tuple_delimiter}Jordan{tuple_delimiter}Cruz{tuple_delimiter}ideological conflict, rebellion{tuple_delimiter}Jordan's commitment to discovery is in rebellion against Cruz's vision of control and order. +relation{tuple_delimiter}Taylor{tuple_delimiter}The Device{tuple_delimiter}reverence, technological significance{tuple_delimiter}Taylor shows reverence towards the device, indicating its importance and potential impact. {completion_delimiter} """, @@ -141,10 +141,10 @@ entity{tuple_delimiter}Crude Oil{tuple_delimiter}product{tuple_delimiter}Crude o entity{tuple_delimiter}Market Selloff{tuple_delimiter}category{tuple_delimiter}Market selloff refers to the significant decline in stock values due to investor concerns over interest rates and regulations. entity{tuple_delimiter}Federal Reserve Policy Announcement{tuple_delimiter}category{tuple_delimiter}The Federal Reserve's upcoming policy announcement is expected to impact investor confidence and market stability. entity{tuple_delimiter}3.4% Decline{tuple_delimiter}category{tuple_delimiter}The Global Tech Index experienced a 3.4% decline in midday trading. -relationship{tuple_delimiter}Global Tech Index{tuple_delimiter}Market Selloff{tuple_delimiter}market performance, investor sentiment{tuple_delimiter}The decline in the Global Tech Index is part of the broader market selloff driven by investor concerns. -relationship{tuple_delimiter}Nexon Technologies{tuple_delimiter}Global Tech Index{tuple_delimiter}company impact, index movement{tuple_delimiter}Nexon Technologies' stock decline contributed to the overall drop in the Global Tech Index. -relationship{tuple_delimiter}Gold Futures{tuple_delimiter}Market Selloff{tuple_delimiter}market reaction, safe-haven investment{tuple_delimiter}Gold prices rose as investors sought safe-haven assets during the market selloff. -relationship{tuple_delimiter}Federal Reserve Policy Announcement{tuple_delimiter}Market Selloff{tuple_delimiter}interest rate impact, financial regulation{tuple_delimiter}Speculation over Federal Reserve policy changes contributed to market volatility and investor selloff. +relation{tuple_delimiter}Global Tech Index{tuple_delimiter}Market Selloff{tuple_delimiter}market performance, investor sentiment{tuple_delimiter}The decline in the Global Tech Index is part of the broader market selloff driven by investor concerns. +relation{tuple_delimiter}Nexon Technologies{tuple_delimiter}Global Tech Index{tuple_delimiter}company impact, index movement{tuple_delimiter}Nexon Technologies' stock decline contributed to the overall drop in the Global Tech Index. +relation{tuple_delimiter}Gold Futures{tuple_delimiter}Market Selloff{tuple_delimiter}market reaction, safe-haven investment{tuple_delimiter}Gold prices rose as investors sought safe-haven assets during the market selloff. +relation{tuple_delimiter}Federal Reserve Policy Announcement{tuple_delimiter}Market Selloff{tuple_delimiter}interest rate impact, financial regulation{tuple_delimiter}Speculation over Federal Reserve policy changes contributed to market volatility and investor selloff. {completion_delimiter} """, @@ -160,10 +160,10 @@ entity{tuple_delimiter}Noah Carter{tuple_delimiter}person{tuple_delimiter}Noah C entity{tuple_delimiter}100m Sprint Record{tuple_delimiter}category{tuple_delimiter}The 100m sprint record is a benchmark in athletics, recently broken by Noah Carter. entity{tuple_delimiter}Carbon-Fiber Spikes{tuple_delimiter}equipment{tuple_delimiter}Carbon-fiber spikes are advanced sprinting shoes that provide enhanced speed and traction. entity{tuple_delimiter}World Athletics Federation{tuple_delimiter}organization{tuple_delimiter}The World Athletics Federation is the governing body overseeing the World Athletics Championship and record validations. -relationship{tuple_delimiter}World Athletics Championship{tuple_delimiter}Tokyo{tuple_delimiter}event location, international competition{tuple_delimiter}The World Athletics Championship is being hosted in Tokyo. -relationship{tuple_delimiter}Noah Carter{tuple_delimiter}100m Sprint Record{tuple_delimiter}athlete achievement, record-breaking{tuple_delimiter}Noah Carter set a new 100m sprint record at the championship. -relationship{tuple_delimiter}Noah Carter{tuple_delimiter}Carbon-Fiber Spikes{tuple_delimiter}athletic equipment, performance boost{tuple_delimiter}Noah Carter used carbon-fiber spikes to enhance performance during the race. -relationship{tuple_delimiter}Noah Carter{tuple_delimiter}World Athletics Championship{tuple_delimiter}athlete participation, competition{tuple_delimiter}Noah Carter is competing at the World Athletics Championship. +relation{tuple_delimiter}World Athletics Championship{tuple_delimiter}Tokyo{tuple_delimiter}event location, international competition{tuple_delimiter}The World Athletics Championship is being hosted in Tokyo. +relation{tuple_delimiter}Noah Carter{tuple_delimiter}100m Sprint Record{tuple_delimiter}athlete achievement, record-breaking{tuple_delimiter}Noah Carter set a new 100m sprint record at the championship. +relation{tuple_delimiter}Noah Carter{tuple_delimiter}Carbon-Fiber Spikes{tuple_delimiter}athletic equipment, performance boost{tuple_delimiter}Noah Carter used carbon-fiber spikes to enhance performance during the race. +relation{tuple_delimiter}Noah Carter{tuple_delimiter}World Athletics Championship{tuple_delimiter}athlete participation, competition{tuple_delimiter}Noah Carter is competing at the World Athletics Championship. {completion_delimiter} """, From ff705a23230a2b272f074b6b9c41d97e73e145ac Mon Sep 17 00:00:00 2001 From: yangdx Date: Sun, 14 Sep 2025 11:44:21 +0800 Subject: [PATCH 2/3] Fix tuple delimiter corruption when missing closing bracket, Handle <|#: -> <|#|> pattern --- lightrag/utils.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/lightrag/utils.py b/lightrag/utils.py index 4e10c393..4eb54571 100644 --- a/lightrag/utils.py +++ b/lightrag/utils.py @@ -2652,6 +2652,13 @@ def fix_tuple_delimiter_corruption( record, ) + # Fix <|#: -> <|#|> (missing closing >) + record = re.sub( + rf"<\|{escaped_delimiter_core}:(?!>)", + tuple_delimiter, + record, + ) + # Fix: <|| -> <|#|> record = re.sub( r"<\|\|(?!>)", From 619553021e4972a4292175069a2be2f4f6650714 Mon Sep 17 00:00:00 2001 From: yangdx Date: Sun, 14 Sep 2025 12:23:48 +0800 Subject: [PATCH 3/3] Fix delimiter processing and optimize case-sensitive handling MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit • Fix completion_delimiter reference bug • Add case check before lowercase conversion • Improve delimiter corruption handling • Optimize redundant processing logic --- lightrag/operate.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/lightrag/operate.py b/lightrag/operate.py index 6a8d446a..b87ac37f 100644 --- a/lightrag/operate.py +++ b/lightrag/operate.py @@ -879,7 +879,7 @@ async def _process_extraction_result( # Split LLL output result to records by "\n" records = split_string_by_multi_markers( result, - ["\n", completion_delimiter], + ["\n", completion_delimiter, completion_delimiter.lower()], ) # Fix LLM output format error which use tuple_delimiter to seperate record instead of "\n" @@ -926,9 +926,12 @@ async def _process_extraction_result( # Fix various forms of tuple_delimiter corruption from the LLM output using the dedicated function delimiter_core = tuple_delimiter[2:-2] # Extract "#" from "<|#|>" record = fix_tuple_delimiter_corruption(record, delimiter_core, tuple_delimiter) - # change delimiter_core to lower case, and fix again - delimiter_core = delimiter_core.lower() - record = fix_tuple_delimiter_corruption(record, delimiter_core, tuple_delimiter) + if delimiter_core != delimiter_core.lower(): + # change delimiter_core to lower case, and fix again + delimiter_core = delimiter_core.lower() + record = fix_tuple_delimiter_corruption( + record, delimiter_core, tuple_delimiter + ) record_attributes = split_string_by_multi_markers(record, [tuple_delimiter])