From 207af40f54943ccee47256d25156215e01d4456c Mon Sep 17 00:00:00 2001 From: Ghazi-raad Date: Wed, 26 Nov 2025 21:56:25 +0000 Subject: [PATCH 1/5] Optimize for OpenAI Prompt Caching: Restructure entity extraction prompts - Remove input_text from entity_extraction_system_prompt to enable caching - Move input_text to entity_extraction_user_prompt for per-chunk variability - Update operate.py to format system prompt once without input_text - Format user prompts with input_text for each chunk This enables OpenAI's automatic prompt caching (50% discount on cached tokens): - ~1300 token system message cached and reused for ALL chunks - Only ~150 token user message varies per chunk - Expected 45% cost reduction on prompt tokens during indexing - 2-3x faster response times from cached prompts Fixes #2355 --- lightrag/operate.py | 4 +++- lightrag/prompt.py | 10 ++++++---- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/lightrag/operate.py b/lightrag/operate.py index 5f824af0..521e8b03 100644 --- a/lightrag/operate.py +++ b/lightrag/operate.py @@ -2832,9 +2832,11 @@ async def extract_entities( cache_keys_collector = [] # Get initial extraction + # Format system prompt once without input_text for OpenAI prompt caching entity_extraction_system_prompt = PROMPTS[ "entity_extraction_system_prompt" - ].format(**{**context_base, "input_text": content}) + ].format(**context_base) + # Format user prompts with input_text for each chunk entity_extraction_user_prompt = PROMPTS["entity_extraction_user_prompt"].format( **{**context_base, "input_text": content} ) diff --git a/lightrag/prompt.py b/lightrag/prompt.py index bf514fe8..cfa5b76f 100644 --- a/lightrag/prompt.py +++ b/lightrag/prompt.py @@ -62,14 +62,16 @@ You are a Knowledge Graph Specialist responsible for extracting entities and rel ---Real Data to be Processed--- Entity_types: [{entity_types}] +""" + +PROMPTS["entity_extraction_user_prompt"] = """---Task--- +Extract entities and relationships from the following input text. + +---Input Text--- Text: ``` {input_text} ``` -""" - -PROMPTS["entity_extraction_user_prompt"] = """---Task--- -Extract entities and relationships from the input text to be processed. ---Instructions--- 1. **Strict Adherence to Format:** Strictly adhere to all format requirements for entity and relationship lists, including output order, field delimiters, and proper noun handling, as specified in the system prompt. From 56677ae466a318289eedbaadb97fe55a14129b4a Mon Sep 17 00:00:00 2001 From: Ghazi-raad Date: Wed, 26 Nov 2025 23:18:12 +0000 Subject: [PATCH 2/5] Update lightrag/prompt.py Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- lightrag/prompt.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lightrag/prompt.py b/lightrag/prompt.py index cfa5b76f..3bb031d4 100644 --- a/lightrag/prompt.py +++ b/lightrag/prompt.py @@ -65,7 +65,7 @@ Entity_types: [{entity_types}] """ PROMPTS["entity_extraction_user_prompt"] = """---Task--- -Extract entities and relationships from the following input text. +Extract entities and relationships from the input text below. ---Input Text--- Text: From 4e8e08cf4d9f09ce6e5bfb9a22849379fb0a44f6 Mon Sep 17 00:00:00 2001 From: Ghazi-raad Date: Wed, 26 Nov 2025 23:18:20 +0000 Subject: [PATCH 3/5] Update lightrag/operate.py Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- lightrag/operate.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lightrag/operate.py b/lightrag/operate.py index 521e8b03..ab9d957e 100644 --- a/lightrag/operate.py +++ b/lightrag/operate.py @@ -2832,7 +2832,7 @@ async def extract_entities( cache_keys_collector = [] # Get initial extraction - # Format system prompt once without input_text for OpenAI prompt caching + # Format system prompt without input_text for each chunk (enables OpenAI prompt caching across chunks) entity_extraction_system_prompt = PROMPTS[ "entity_extraction_system_prompt" ].format(**context_base) From 294f75438eb6ad99fb71dad80595b178e88495a4 Mon Sep 17 00:00:00 2001 From: yangdx Date: Thu, 11 Dec 2025 19:12:34 +0800 Subject: [PATCH 4/5] Restructure entity extraction prompt format for consistency MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit • Move entity_types to user prompt • Add XML-style formatting tags • Update examples with entity_types --- lightrag/prompt.py | 26 +++++++++++++++++--------- 1 file changed, 17 insertions(+), 9 deletions(-) diff --git a/lightrag/prompt.py b/lightrag/prompt.py index 3bb031d4..9c61041b 100644 --- a/lightrag/prompt.py +++ b/lightrag/prompt.py @@ -58,17 +58,16 @@ You are a Knowledge Graph Specialist responsible for extracting entities and rel ---Examples--- {examples} - ----Real Data to be Processed--- - -Entity_types: [{entity_types}] """ PROMPTS["entity_extraction_user_prompt"] = """---Task--- Extract entities and relationships from the input text below. ----Input Text--- -Text: +---Data to be Processed--- + +[{entity_types}] + + ``` {input_text} ``` @@ -101,7 +100,10 @@ Based on the last extraction task, identify and extract any **missed or incorrec """ PROMPTS["entity_extraction_examples"] = [ - """ + """ +["Person","Creature","Organization","Location","Event","Concept","Method","Content","Data","Artifact","NaturalObject"] + + ``` while Alex clenched his jaw, the buzz of frustration dull against the backdrop of Taylor's authoritarian certainty. It was this competitive undercurrent that kept him alert, the sense that his and Jordan's shared commitment to discovery was an unspoken rebellion against Cruz's narrowing vision of control and order. @@ -126,7 +128,10 @@ relation{tuple_delimiter}Taylor{tuple_delimiter}The Device{tuple_delimiter}rever {completion_delimiter} """, - """ + """ +["Person","Creature","Organization","Location","Event","Concept","Method","Content","Data","Artifact","NaturalObject"] + + ``` Stock markets faced a sharp downturn today as tech giants saw significant declines, with the global tech index dropping by 3.4% in midday trading. Analysts attribute the selloff to investor concerns over rising interest rates and regulatory uncertainty. @@ -153,7 +158,10 @@ relation{tuple_delimiter}Federal Reserve Policy Announcement{tuple_delimiter}Mar {completion_delimiter} """, - """ + """ +["Person","Creature","Organization","Location","Event","Concept","Method","Content","Data","Artifact","NaturalObject"] + + ``` At the World Athletics Championship in Tokyo, Noah Carter broke the 100m sprint record using cutting-edge carbon-fiber spikes. ``` From 834778eb0148f7ded575014ecbcc86cdf4592c5f Mon Sep 17 00:00:00 2001 From: yangdx Date: Fri, 12 Dec 2025 06:12:47 +0800 Subject: [PATCH 5/5] Reorganize entity extraction prompt for better clarity - Move instructions before data section - Update task description wording --- lightrag/prompt.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/lightrag/prompt.py b/lightrag/prompt.py index 9c61041b..6230197b 100644 --- a/lightrag/prompt.py +++ b/lightrag/prompt.py @@ -61,7 +61,13 @@ You are a Knowledge Graph Specialist responsible for extracting entities and rel """ PROMPTS["entity_extraction_user_prompt"] = """---Task--- -Extract entities and relationships from the input text below. +Extract entities and relationships from the input text in Data to be Processed below. + +---Instructions--- +1. **Strict Adherence to Format:** Strictly adhere to all format requirements for entity and relationship lists, including output order, field delimiters, and proper noun handling, as specified in the system prompt. +2. **Output Content Only:** Output *only* the extracted list of entities and relationships. Do not include any introductory or concluding remarks, explanations, or additional text before or after the list. +3. **Completion Signal:** Output `{completion_delimiter}` as the final line after all relevant entities and relationships have been extracted and presented. +4. **Output Language:** Ensure the output language is {language}. Proper nouns (e.g., personal names, place names, organization names) must be kept in their original language and not translated. ---Data to be Processed--- @@ -72,12 +78,6 @@ Extract entities and relationships from the input text below. {input_text} ``` ----Instructions--- -1. **Strict Adherence to Format:** Strictly adhere to all format requirements for entity and relationship lists, including output order, field delimiters, and proper noun handling, as specified in the system prompt. -2. **Output Content Only:** Output *only* the extracted list of entities and relationships. Do not include any introductory or concluding remarks, explanations, or additional text before or after the list. -3. **Completion Signal:** Output `{completion_delimiter}` as the final line after all relevant entities and relationships have been extracted and presented. -4. **Output Language:** Ensure the output language is {language}. Proper nouns (e.g., personal names, place names, organization names) must be kept in their original language and not translated. - """