From 8bbf307aebe502e535b6010623438dace4e0be54 Mon Sep 17 00:00:00 2001 From: yangdx Date: Mon, 1 Sep 2025 10:35:06 +0800 Subject: [PATCH] Fix regex to match multiline content in extraction parsing MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit • Remove non-greedy quantifier • Add DOTALL flag for multiline matching • Apply to both parsing functions • Enable cross-line content extraction --- lightrag/operate.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lightrag/operate.py b/lightrag/operate.py index 0cf1248e..607143a5 100644 --- a/lightrag/operate.py +++ b/lightrag/operate.py @@ -835,7 +835,7 @@ async def _parse_extraction_result( [context_base["record_delimiter"], context_base["completion_delimiter"]], ) for record in records: - record = re.search(r"\((.*?)\)", record) + record = re.search(r"\((.*)\)", record, re.DOTALL) if record is None: continue record = record.group(1) @@ -1749,7 +1749,7 @@ async def extract_entities( ) for record in records: - record = re.search(r"\((.*?)\)", record) + record = re.search(r"\((.*)\)", record, re.DOTALL) if record is None: continue record = record.group(1)