From 4de1473875e8f3288cf83c7171908da048ea9977 Mon Sep 17 00:00:00 2001 From: yangdx Date: Sun, 14 Sep 2025 13:45:59 +0800 Subject: [PATCH] Improve entity extraction prompts and error message formatting MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit • Fix typo in error log message • Clarify format requirements in prompts • Make extraction instructions clearer • Improve user prompt consistency --- lightrag/operate.py | 2 +- lightrag/prompt.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/lightrag/operate.py b/lightrag/operate.py index 35b9404c..4cdc1c19 100644 --- a/lightrag/operate.py +++ b/lightrag/operate.py @@ -324,7 +324,7 @@ async def _handle_single_entity_extraction( if len(record_attributes) != 4 or "entity" not in record_attributes[0]: if len(record_attributes) > 1 and "entity" in record_attributes[0]: logger.warning( - f"{chunk_key}: LLM output format error; found {len(record_attributes)}/4 feilds on ENTITY `{record_attributes[1]}` of type {record_attributes[2] if len(record_attributes) > 2 else 'N/A'}" + f"{chunk_key}: LLM output format error; found {len(record_attributes)}/4 feilds on ENTITY `{record_attributes[1]}` @ `{record_attributes[2] if len(record_attributes) > 2 else 'N/A'}`" ) return None diff --git a/lightrag/prompt.py b/lightrag/prompt.py index 31318b40..b3b313c3 100644 --- a/lightrag/prompt.py +++ b/lightrag/prompt.py @@ -74,7 +74,7 @@ PROMPTS["entity_extraction_user_prompt"] = """---Task--- Extract entities and relationships from the input text to be processed. ---Instructions--- -1. Output each entity and relationship on a single line; use `{tuple_delimiter}` as the field separator within each extracted item. +1. Adhere strictly to the format requirements for entity and relationship lists as specified in the system prompts. 2. Output `{completion_delimiter}` only after all relevant entities and relationships have been extracted. 3. Ensure the output language is {language}. Proper nouns (e.g., personal names, place names, organization names) must be kept in their original language and not translated. @@ -85,7 +85,7 @@ PROMPTS["entity_continue_extraction_user_prompt"] = """---Task--- Identify any missed entities or relationships from the input text to be processed based on the last extraction task. ---Instructions--- -1. Output entities and relationships in the same format as the previous extraction task. +1. Adhere strictly to the format requirements for entity and relationship lists as specified in the system prompts. 2. Do not include entities and relationships that were correctly extracted in the last extraction task. 3. If an entity or relationship output was truncated or had missing fields in the last extraction task, please re-output it in the correct format. 4. Output each entity and relationship on a single line; use `{tuple_delimiter}` as the field separator within each extracted item.