From 27f1eef616e7db78ba061c9a783d6fc7376cfb4e Mon Sep 17 00:00:00 2001
From: yangdx <gzdaniel@me.com>
Date: Sun, 14 Sep 2025 18:26:41 +0800
Subject: [PATCH] Add language control and format clarity to extraction prompts

---
 lightrag/prompt.py | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)
diff --git a/lightrag/prompt.py b/lightrag/prompt.py
index 6f927d6d..f6842700 100644
--- a/lightrag/prompt.py
+++ b/lightrag/prompt.py
@@ -77,6 +77,7 @@ Extract entities and relationships from the input text to be processed.
 1.  **Strict Adherence to Format:** Strictly adhere to all format requirements for entity and relationship lists, including output order, field delimiters, and proper noun handling, as specified in the system prompt.
 2.  **Output Content Only:** Output *only* the extracted list of entities and relationships. Do not include any introductory or concluding remarks, explanations, or additional text before or after the list.
 3.  **Completion Signal:** Output `{completion_delimiter}` as the final line after all relevant entities and relationships have been extracted and presented.
+4.  **Oputput Language:** Ensure the output language is {language}. Proper nouns (e.g., personal names, place names, organization names) must be kept in their original language and not translated.
 
 <Output>
 """
@@ -90,8 +91,11 @@ Based on the last extraction task, identify and extract any **missed or incorrec
     *   **Do NOT** re-output entities and relationships that were **correctly and fully** extracted in the last task.
     *   If an entity or relationship was **missed** in the last task, extract and output it now according to the system format.
     *   If an entity or relationship was **truncated, had missing fields, or was otherwise incorrectly formatted** in the last task, re-output the *corrected and complete* version in the specified format.
-3.  **Output Content Only:** Output *only* the extracted list of entities and relationships. Do not include any introductory or concluding remarks, explanations, or additional text before or after the list.
-4.  **Completion Signal:** Output `{completion_delimiter}` as the final line after all relevant missing or corrected entities and relationships have been extracted and presented.
+3.  **Output Format - Entities:** Output a total of 4 fields for each entity, delimited by `{tuple_delimiter}`, on a single line. The first field *must* be the literal string `entity`.
+4.  **Output Format - Relationships:** Output a total of 5 fields for each relationship, delimited by `{tuple_delimiter}`, on a single line. The first field *must* be the literal string `relation`.
+5.  **Output Content Only:** Output *only* the extracted list of entities and relationships. Do not include any introductory or concluding remarks, explanations, or additional text before or after the list.
+6.  **Completion Signal:** Output `{completion_delimiter}` as the final line after all relevant missing or corrected entities and relationships have been extracted and presented.
+7.  **Oputput Language:** Ensure the output language is {language}. Proper nouns (e.g., personal names, place names, organization names) must be kept in their original language and not translated.
 
 <Output>
 """