From 9a9674d590a43f1c02c7fed164057a33d13db9f2 Mon Sep 17 00:00:00 2001 From: frankj Date: Tue, 8 Jul 2025 10:24:19 +0800 Subject: [PATCH 1/2] Fix incorrect file path (404 Not Found) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Issue Description A 404 error occurred when accessing the repository link pointing to README_zh.md. Upon inspection, the actual file path is README-zh.md, indicating an incorrect path reference in the original link. Fix Details Corrected the broken link from README_zh.md to the correct path README-zh.md. Verification Method After modification, the target file opens normally in the browser. Hope this fix helps users access the Chinese documentation properly—thanks for the review! --- README-zh.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README-zh.md b/README-zh.md index 45335489..e9599099 100644 --- a/README-zh.md +++ b/README-zh.md @@ -30,7 +30,7 @@

- +

From 5f330ec11a487753e9aa06a15fdeb5df782d9d49 Mon Sep 17 00:00:00 2001 From: SLKun Date: Mon, 7 Jul 2025 10:31:46 +0800 Subject: [PATCH 2/2] remove tag for entities and keywords extraction --- lightrag/operate.py | 4 +++- lightrag/utils.py | 11 +++++++++-- 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/lightrag/operate.py b/lightrag/operate.py index 88837435..4e219cf8 100644 --- a/lightrag/operate.py +++ b/lightrag/operate.py @@ -26,6 +26,7 @@ from .utils import ( get_conversation_turns, use_llm_func_with_cache, update_chunk_cache_list, + remove_think_tags, ) from .base import ( BaseGraphStorage, @@ -1703,7 +1704,8 @@ async def extract_keywords_only( result = await use_model_func(kw_prompt, keyword_extraction=True) # 6. Parse out JSON from the LLM response - match = re.search(r"\{.*\}", result, re.DOTALL) + result = remove_think_tags(result) + match = re.search(r"\{.*?\}", result, re.DOTALL) if not match: logger.error("No JSON-like structure found in the LLM respond.") return [], [] diff --git a/lightrag/utils.py b/lightrag/utils.py index c6e2def9..386de3ab 100644 --- a/lightrag/utils.py +++ b/lightrag/utils.py @@ -1465,6 +1465,11 @@ async def update_chunk_cache_list( ) +def remove_think_tags(text: str) -> str: + """Remove tags from the text""" + return re.sub(r"^(.*?|)", "", text, flags=re.DOTALL).strip() + + async def use_llm_func_with_cache( input_text: str, use_llm_func: callable, @@ -1531,6 +1536,7 @@ async def use_llm_func_with_cache( kwargs["max_tokens"] = max_tokens res: str = await use_llm_func(input_text, **kwargs) + res = remove_think_tags(res) if llm_response_cache.global_config.get("enable_llm_cache_for_entity_extract"): await save_to_cache( @@ -1557,8 +1563,9 @@ async def use_llm_func_with_cache( if max_tokens is not None: kwargs["max_tokens"] = max_tokens - logger.info(f"Call LLM function with query text lenght: {len(input_text)}") - return await use_llm_func(input_text, **kwargs) + logger.info(f"Call LLM function with query text length: {len(input_text)}") + res = await use_llm_func(input_text, **kwargs) + return remove_think_tags(res) def get_content_summary(content: str, max_length: int = 250) -> str: