From 9a9674d590a43f1c02c7fed164057a33d13db9f2 Mon Sep 17 00:00:00 2001
From: frankj <frankxjkuang@gmail.com>
Date: Tue, 8 Jul 2025 10:24:19 +0800
Subject: [PATCH 1/2] Fix incorrect file path (404 Not Found)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Issue Description
A 404 error occurred when accessing the repository link pointing to README_zh.md. Upon inspection, the actual file path is README-zh.md, indicating an incorrect path reference in the original link.

Fix Details
Corrected the broken link from README_zh.md to the correct path README-zh.md.

Verification Method
After modification, the target file opens normally in the browser.

Hope this fix helps users access the Chinese documentation properly—thanks for the review!
---
 README-zh.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/README-zh.md b/README-zh.md
index 45335489..e9599099 100644
--- a/README-zh.md
+++ b/README-zh.md
@@ -30,7 +30,7 @@
       <a href="https://github.com/HKUDS/LightRAG/issues/285"><img src="https://img.shields.io/badge/💬微信群-交流-07c160?style=for-the-badge&logo=wechat&logoColor=white&labelColor=1a1a2e"></a>
     </p>
     <p>
-      <a href="README_zh.md"><img src="https://img.shields.io/badge/🇨🇳中文版-1a1a2e?style=for-the-badge"></a>
+      <a href="README-zh.md"><img src="https://img.shields.io/badge/🇨🇳中文版-1a1a2e?style=for-the-badge"></a>
       <a href="README.md"><img src="https://img.shields.io/badge/🇺🇸English-1a1a2e?style=for-the-badge"></a>
     </p>
   </div>

From 5f330ec11a487753e9aa06a15fdeb5df782d9d49 Mon Sep 17 00:00:00 2001
From: SLKun <summerslyb@gmail.com>
Date: Mon, 7 Jul 2025 10:31:46 +0800
Subject: [PATCH 2/2] remove <think> tag for entities and keywords extraction

---
 lightrag/operate.py |  4 +++-
 lightrag/utils.py   | 11 +++++++++--
 2 files changed, 12 insertions(+), 3 deletions(-)

diff --git a/lightrag/operate.py b/lightrag/operate.py
index 88837435..4e219cf8 100644
--- a/lightrag/operate.py
+++ b/lightrag/operate.py
@@ -26,6 +26,7 @@ from .utils import (
     get_conversation_turns,
     use_llm_func_with_cache,
     update_chunk_cache_list,
+    remove_think_tags,
 )
 from .base import (
     BaseGraphStorage,
@@ -1703,7 +1704,8 @@ async def extract_keywords_only(
     result = await use_model_func(kw_prompt, keyword_extraction=True)
 
     # 6. Parse out JSON from the LLM response
-    match = re.search(r"\{.*\}", result, re.DOTALL)
+    result = remove_think_tags(result)
+    match = re.search(r"\{.*?\}", result, re.DOTALL)
     if not match:
         logger.error("No JSON-like structure found in the LLM respond.")
         return [], []
diff --git a/lightrag/utils.py b/lightrag/utils.py
index c6e2def9..386de3ab 100644
--- a/lightrag/utils.py
+++ b/lightrag/utils.py
@@ -1465,6 +1465,11 @@ async def update_chunk_cache_list(
         )
 
 
+def remove_think_tags(text: str) -> str:
+    """Remove <think> tags from the text"""
+    return re.sub(r"^(<think>.*?</think>|<think>)", "", text, flags=re.DOTALL).strip()
+
+
 async def use_llm_func_with_cache(
     input_text: str,
     use_llm_func: callable,
@@ -1531,6 +1536,7 @@ async def use_llm_func_with_cache(
             kwargs["max_tokens"] = max_tokens
 
         res: str = await use_llm_func(input_text, **kwargs)
+        res = remove_think_tags(res)
 
         if llm_response_cache.global_config.get("enable_llm_cache_for_entity_extract"):
             await save_to_cache(
@@ -1557,8 +1563,9 @@ async def use_llm_func_with_cache(
     if max_tokens is not None:
         kwargs["max_tokens"] = max_tokens
 
-    logger.info(f"Call LLM function with query text lenght: {len(input_text)}")
-    return await use_llm_func(input_text, **kwargs)
+    logger.info(f"Call LLM function with query text length: {len(input_text)}")
+    res = await use_llm_func(input_text, **kwargs)
+    return remove_think_tags(res)
 
 
 def get_content_summary(content: str, max_length: int = 250) -> str: