From 02e7462645632f0cd648e48cc38a7811fb0f5ce9 Mon Sep 17 00:00:00 2001
From: yangdx <gzdaniel@me.com>
Date: Wed, 10 Sep 2025 18:10:06 +0800
Subject: [PATCH] feat: enhance LLM output format tolerance for bracket
 processing

- Expand bracket tolerance to support additional characters: < > " '
- Implement symmetric handling for both leading and trailing characters
- Replace simple string matching with robust regex-based pattern detection
- Maintain full backward compatibility with existing bracket formats
---
 lightrag/operate.py | 27 ++++++++++++++++++++++-----
 lightrag/prompt.py  |  6 +++---
 2 files changed, 25 insertions(+), 8 deletions(-)

diff --git a/lightrag/operate.py b/lightrag/operate.py
index 745b042c..304b8298 100644
--- a/lightrag/operate.py
+++ b/lightrag/operate.py
@@ -876,22 +876,39 @@ async def _process_extraction_result(
     )
 
     for record in records:
-        # Remove outer brackets (support English and Chinese brackets)
+        # Remove outer brackets (support English and Chinese brackets with enhanced tolerance)
         record = record.strip()
+        
+        # Define allowed leading and trailing characters
+        leading_trailing_chars = r'[`<>"\']*'
+        
+        # Handle leading characters before left bracket
         if record.startswith("(") or record.startswith("（"):
             record = record[1:]
         else:
-            if  record.startswith("`(") or record.startswith("`（"):
-                record = record[2:]
+            # Check for leading characters + left bracket pattern
+            leading_bracket_pattern = r'^' + leading_trailing_chars + r'([（(])'
+            match = re.search(leading_bracket_pattern, record)
+            if match:
+                # Extract content from the left bracket position
+                bracket_pos = match.start(1)
+                record = record[bracket_pos + 1:]
             else:
                 logger.warning(
                     f"{chunk_key}: Record starting bracket can not be found in extraction result"
                 )
+        
+        # Handle trailing characters after right bracket
         if record.endswith(")") or record.endswith("）"):
             record = record[:-1]
         else:
-            if record.endswith(")`") or record.endswith("）`"):
-                record = record[:-2]
+            # Check for right bracket + trailing characters pattern
+            trailing_bracket_pattern = r'([)）])' + leading_trailing_chars + r'$'
+            match = re.search(trailing_bracket_pattern, record)
+            if match:
+                # Extract content up to the right bracket position
+                bracket_pos = match.start(1)
+                record = record[:bracket_pos]
             else:
                 logger.warning(
                     f"{chunk_key}: Record ending bracket can not be found in extraction result"
diff --git a/lightrag/prompt.py b/lightrag/prompt.py
index 9b44edb0..a2e87957 100644
--- a/lightrag/prompt.py
+++ b/lightrag/prompt.py
@@ -28,8 +28,8 @@ You are a Knowledge Graph Specialist responsible for extracting entities and rel
 5. **Relationship Order:** Prioritize relationships based on their significance to the intended meaning of input text, and output more crucial relationships first.
 6. **Avoid Pronouns:** For entity names and all descriptions, explicitly name the subject or object instead of using pronouns; avoid pronouns such as `this document`, `our company`, `I`, `you`, and `he/she`.
 7. **Undirectional Relationship:** Treat relationships as undirected; swapping the source and target entities does not constitute a new relationship. Avoid outputting duplicate relationships.
-8. **Language:** Output entity names, keywords and descriptions in {language}.
-9. **Delimiter:** Use `{record_delimiter}` as the entity or relationship list delimiter; output `{completion_delimiter}` when all the entities and relationships are extracted.
+8. **Language:** Output entity names, keywords and descriptions in {language}. Proper nouns, such as personal names, should not be translated. Please keep them in their original language.
+9. **Delimiter:** Use {record_delimiter} as the entity or relationship list delimiter; output {completion_delimiter} when all the entities and relationships are extracted.
 
 ---Examples---
 {examples}
@@ -49,7 +49,7 @@ Extract entities and relationships from the input text to be Processed.
 ---Instructions---
 1. Output entities and relationships, prioritized by their relevance to the input text's core meaning.
 2. Output `{completion_delimiter}` when all the entities and relationships are extracted.
-3. Ensure the output language is {language}.
+3. Ensure the output language is {language}. Proper nouns, such as personal names, should not be translated. Please keep them in their original language.
 
 <Output>
 """