From 8bbf307aebe502e535b6010623438dace4e0be54 Mon Sep 17 00:00:00 2001
From: yangdx <gzdaniel@me.com>
Date: Mon, 1 Sep 2025 10:35:06 +0800
Subject: [PATCH] Fix regex to match multiline content in extraction parsing
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

• Remove non-greedy quantifier
• Add DOTALL flag for multiline matching
• Apply to both parsing functions
• Enable cross-line content extraction
---
 lightrag/operate.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/lightrag/operate.py b/lightrag/operate.py
index 0cf1248e..607143a5 100644
--- a/lightrag/operate.py
+++ b/lightrag/operate.py
@@ -835,7 +835,7 @@ async def _parse_extraction_result(
         [context_base["record_delimiter"], context_base["completion_delimiter"]],
     )
     for record in records:
-        record = re.search(r"\((.*?)\)", record)
+        record = re.search(r"\((.*)\)", record, re.DOTALL)
         if record is None:
             continue
         record = record.group(1)
@@ -1749,7 +1749,7 @@ async def extract_entities(
         )
 
         for record in records:
-            record = re.search(r"\((.*?)\)", record)
+            record = re.search(r"\((.*)\)", record, re.DOTALL)
             if record is None:
                 continue
             record = record.group(1)