remove <think> tag for entities and keywords extraction
This commit is contained in:
parent
88935d135f
commit
5f330ec11a
2 changed files with 12 additions and 3 deletions
|
|
@ -26,6 +26,7 @@ from .utils import (
|
||||||
get_conversation_turns,
|
get_conversation_turns,
|
||||||
use_llm_func_with_cache,
|
use_llm_func_with_cache,
|
||||||
update_chunk_cache_list,
|
update_chunk_cache_list,
|
||||||
|
remove_think_tags,
|
||||||
)
|
)
|
||||||
from .base import (
|
from .base import (
|
||||||
BaseGraphStorage,
|
BaseGraphStorage,
|
||||||
|
|
@ -1703,7 +1704,8 @@ async def extract_keywords_only(
|
||||||
result = await use_model_func(kw_prompt, keyword_extraction=True)
|
result = await use_model_func(kw_prompt, keyword_extraction=True)
|
||||||
|
|
||||||
# 6. Parse out JSON from the LLM response
|
# 6. Parse out JSON from the LLM response
|
||||||
match = re.search(r"\{.*\}", result, re.DOTALL)
|
result = remove_think_tags(result)
|
||||||
|
match = re.search(r"\{.*?\}", result, re.DOTALL)
|
||||||
if not match:
|
if not match:
|
||||||
logger.error("No JSON-like structure found in the LLM respond.")
|
logger.error("No JSON-like structure found in the LLM respond.")
|
||||||
return [], []
|
return [], []
|
||||||
|
|
|
||||||
|
|
@ -1465,6 +1465,11 @@ async def update_chunk_cache_list(
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def remove_think_tags(text: str) -> str:
|
||||||
|
"""Remove <think> tags from the text"""
|
||||||
|
return re.sub(r"^(<think>.*?</think>|<think>)", "", text, flags=re.DOTALL).strip()
|
||||||
|
|
||||||
|
|
||||||
async def use_llm_func_with_cache(
|
async def use_llm_func_with_cache(
|
||||||
input_text: str,
|
input_text: str,
|
||||||
use_llm_func: callable,
|
use_llm_func: callable,
|
||||||
|
|
@ -1531,6 +1536,7 @@ async def use_llm_func_with_cache(
|
||||||
kwargs["max_tokens"] = max_tokens
|
kwargs["max_tokens"] = max_tokens
|
||||||
|
|
||||||
res: str = await use_llm_func(input_text, **kwargs)
|
res: str = await use_llm_func(input_text, **kwargs)
|
||||||
|
res = remove_think_tags(res)
|
||||||
|
|
||||||
if llm_response_cache.global_config.get("enable_llm_cache_for_entity_extract"):
|
if llm_response_cache.global_config.get("enable_llm_cache_for_entity_extract"):
|
||||||
await save_to_cache(
|
await save_to_cache(
|
||||||
|
|
@ -1557,8 +1563,9 @@ async def use_llm_func_with_cache(
|
||||||
if max_tokens is not None:
|
if max_tokens is not None:
|
||||||
kwargs["max_tokens"] = max_tokens
|
kwargs["max_tokens"] = max_tokens
|
||||||
|
|
||||||
logger.info(f"Call LLM function with query text lenght: {len(input_text)}")
|
logger.info(f"Call LLM function with query text length: {len(input_text)}")
|
||||||
return await use_llm_func(input_text, **kwargs)
|
res = await use_llm_func(input_text, **kwargs)
|
||||||
|
return remove_think_tags(res)
|
||||||
|
|
||||||
|
|
||||||
def get_content_summary(content: str, max_length: int = 250) -> str:
|
def get_content_summary(content: str, max_length: int = 250) -> str:
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue