diff --git a/README-zh.md b/README-zh.md index 40dfc777..2d08ff51 100644 --- a/README-zh.md +++ b/README-zh.md @@ -272,7 +272,6 @@ if __name__ == "__main__": | **enable_llm_cache** | `bool` | 如果为`TRUE`,将LLM结果存储在缓存中;重复的提示返回缓存的响应 | `TRUE` | | **enable_llm_cache_for_entity_extract** | `bool` | 如果为`TRUE`,将实体提取的LLM结果存储在缓存中;适合初学者调试应用程序 | `TRUE` | | **addon_params** | `dict` | 附加参数,例如`{"example_number": 1, "language": "Simplified Chinese", "entity_types": ["organization", "person", "geo", "event"]}`:设置示例限制、输出语言和文档处理的批量大小 | `example_number: 所有示例, language: English` | -| **convert_response_to_json_func** | `callable` | 未使用 | `convert_response_to_json` | | **embedding_cache_config** | `dict` | 问答缓存的配置。包含三个参数:`enabled`:布尔值,启用/禁用缓存查找功能。启用时,系统将在生成新答案之前检查缓存的响应。`similarity_threshold`:浮点值(0-1),相似度阈值。当新问题与缓存问题的相似度超过此阈值时,将直接返回缓存的答案而不调用LLM。`use_llm_check`:布尔值,启用/禁用LLM相似度验证。启用时,在返回缓存答案之前,将使用LLM作为二次检查来验证问题之间的相似度。 | 默认:`{"enabled": False, "similarity_threshold": 0.95, "use_llm_check": False}` | diff --git a/README.md b/README.md index a6002e6a..bf319763 100644 --- a/README.md +++ b/README.md @@ -279,7 +279,6 @@ A full list of LightRAG init parameters: | **enable_llm_cache** | `bool` | If `TRUE`, stores LLM results in cache; repeated prompts return cached responses | `TRUE` | | **enable_llm_cache_for_entity_extract** | `bool` | If `TRUE`, stores LLM results in cache for entity extraction; Good for beginners to debug your application | `TRUE` | | **addon_params** | `dict` | Additional parameters, e.g., `{"example_number": 1, "language": "Simplified Chinese", "entity_types": ["organization", "person", "geo", "event"]}`: sets example limit, entiy/relation extraction output language | `example_number: all examples, language: English` | -| **convert_response_to_json_func** | `callable` | Not used | `convert_response_to_json` | | **embedding_cache_config** | `dict` | Configuration for question-answer caching. Contains three parameters: `enabled`: Boolean value to enable/disable cache lookup functionality. When enabled, the system will check cached responses before generating new answers. `similarity_threshold`: Float value (0-1), similarity threshold. When a new question's similarity with a cached question exceeds this threshold, the cached answer will be returned directly without calling the LLM. `use_llm_check`: Boolean value to enable/disable LLM similarity verification. When enabled, LLM will be used as a secondary check to verify the similarity between questions before returning cached answers. | Default: `{"enabled": False, "similarity_threshold": 0.95, "use_llm_check": False}` | diff --git a/lightrag/lightrag.py b/lightrag/lightrag.py index c26e9b66..ae42e5df 100644 --- a/lightrag/lightrag.py +++ b/lightrag/lightrag.py @@ -81,7 +81,6 @@ from .utils import ( EmbeddingFunc, always_get_an_event_loop, compute_mdhash_id, - convert_response_to_json, lazy_external_import, priority_limit_async_func_call, get_content_summary, @@ -341,15 +340,6 @@ class LightRAG: # Storages Management # --- - convert_response_to_json_func: Callable[[str], dict[str, Any]] = field( - default_factory=lambda: convert_response_to_json - ) - """ - Custom function for converting LLM responses to JSON format. - - The default function is :func:`.utils.convert_response_to_json`. - """ - cosine_better_than_threshold: float = field( default=float(os.getenv("COSINE_THRESHOLD", 0.2)) ) diff --git a/lightrag/llm/azure_openai.py b/lightrag/llm/azure_openai.py index ecec0fcc..60d2c18e 100644 --- a/lightrag/llm/azure_openai.py +++ b/lightrag/llm/azure_openai.py @@ -23,7 +23,6 @@ from tenacity import ( from lightrag.utils import ( wrap_embedding_func_with_attrs, - locate_json_string_body_from_string, safe_unicode_decode, ) @@ -108,7 +107,7 @@ async def azure_openai_complete_if_cache( async def azure_openai_complete( prompt, system_prompt=None, history_messages=[], keyword_extraction=False, **kwargs ) -> str: - keyword_extraction = kwargs.pop("keyword_extraction", None) + kwargs.pop("keyword_extraction", None) result = await azure_openai_complete_if_cache( os.getenv("LLM_MODEL", "gpt-4o-mini"), prompt, @@ -116,8 +115,6 @@ async def azure_openai_complete( history_messages=history_messages, **kwargs, ) - if keyword_extraction: # TODO: use JSON API - return locate_json_string_body_from_string(result) return result diff --git a/lightrag/llm/bedrock.py b/lightrag/llm/bedrock.py index e1edc7ff..1640abbb 100644 --- a/lightrag/llm/bedrock.py +++ b/lightrag/llm/bedrock.py @@ -15,10 +15,6 @@ from tenacity import ( retry_if_exception_type, ) -from lightrag.utils import ( - locate_json_string_body_from_string, -) - class BedrockError(Exception): """Generic error for issues related to Amazon Bedrock""" @@ -96,7 +92,7 @@ async def bedrock_complete_if_cache( async def bedrock_complete( prompt, system_prompt=None, history_messages=[], keyword_extraction=False, **kwargs ) -> str: - keyword_extraction = kwargs.pop("keyword_extraction", None) + kwargs.pop("keyword_extraction", None) model_name = kwargs["hashing_kv"].global_config["llm_model_name"] result = await bedrock_complete_if_cache( model_name, @@ -105,8 +101,6 @@ async def bedrock_complete( history_messages=history_messages, **kwargs, ) - if keyword_extraction: # TODO: use JSON API - return locate_json_string_body_from_string(result) return result diff --git a/lightrag/llm/hf.py b/lightrag/llm/hf.py index 8a648ea9..7adf1570 100644 --- a/lightrag/llm/hf.py +++ b/lightrag/llm/hf.py @@ -24,9 +24,6 @@ from lightrag.exceptions import ( RateLimitError, APITimeoutError, ) -from lightrag.utils import ( - locate_json_string_body_from_string, -) import torch import numpy as np @@ -119,7 +116,7 @@ async def hf_model_if_cache( async def hf_model_complete( prompt, system_prompt=None, history_messages=[], keyword_extraction=False, **kwargs ) -> str: - keyword_extraction = kwargs.pop("keyword_extraction", None) + kwargs.pop("keyword_extraction", None) model_name = kwargs["hashing_kv"].global_config["llm_model_name"] result = await hf_model_if_cache( model_name, @@ -128,8 +125,6 @@ async def hf_model_complete( history_messages=history_messages, **kwargs, ) - if keyword_extraction: # TODO: use JSON API - return locate_json_string_body_from_string(result) return result diff --git a/lightrag/llm/llama_index_impl.py b/lightrag/llm/llama_index_impl.py index a88d830f..2e50e33e 100644 --- a/lightrag/llm/llama_index_impl.py +++ b/lightrag/llm/llama_index_impl.py @@ -21,7 +21,6 @@ from tenacity import ( ) from lightrag.utils import ( wrap_embedding_func_with_attrs, - locate_json_string_body_from_string, ) from lightrag.exceptions import ( APIConnectionError, @@ -157,7 +156,7 @@ async def llama_index_complete( if history_messages is None: history_messages = [] - keyword_extraction = kwargs.pop("keyword_extraction", None) + kwargs.pop("keyword_extraction", None) result = await llama_index_complete_if_cache( kwargs.get("llm_instance"), prompt, @@ -165,8 +164,6 @@ async def llama_index_complete( history_messages=history_messages, **kwargs, ) - if keyword_extraction: - return locate_json_string_body_from_string(result) return result diff --git a/lightrag/llm/openai.py b/lightrag/llm/openai.py index cedd804d..981669b6 100644 --- a/lightrag/llm/openai.py +++ b/lightrag/llm/openai.py @@ -27,7 +27,6 @@ from tenacity import ( ) from lightrag.utils import ( wrap_embedding_func_with_attrs, - locate_json_string_body_from_string, safe_unicode_decode, logger, ) @@ -418,7 +417,7 @@ async def nvidia_openai_complete( ) -> str: if history_messages is None: history_messages = [] - keyword_extraction = kwargs.pop("keyword_extraction", None) + kwargs.pop("keyword_extraction", None) result = await openai_complete_if_cache( "nvidia/llama-3.1-nemotron-70b-instruct", # context length 128k prompt, @@ -427,8 +426,6 @@ async def nvidia_openai_complete( base_url="https://integrate.api.nvidia.com/v1", **kwargs, ) - if keyword_extraction: # TODO: use JSON API - return locate_json_string_body_from_string(result) return result diff --git a/lightrag/operate.py b/lightrag/operate.py index 78836176..45153bc5 100644 --- a/lightrag/operate.py +++ b/lightrag/operate.py @@ -5,6 +5,7 @@ import asyncio import json import re import os +import json_repair from typing import Any, AsyncIterator from collections import Counter, defaultdict @@ -1781,10 +1782,10 @@ async def extract_keywords_only( ) if cached_response is not None: try: - keywords_data = json.loads(cached_response) - return keywords_data["high_level_keywords"], keywords_data[ - "low_level_keywords" - ] + keywords_data = json_repair.loads(cached_response) + return keywords_data.get("high_level_keywords", []), keywords_data.get( + "low_level_keywords", [] + ) except (json.JSONDecodeError, KeyError): logger.warning( "Invalid cache format for keywords, proceeding with extraction" @@ -1832,12 +1833,11 @@ async def extract_keywords_only( # 6. Parse out JSON from the LLM response result = remove_think_tags(result) - match = re.search(r"\{.*?\}", result, re.DOTALL) - if not match: - logger.error("No JSON-like structure found in the LLM respond.") - return [], [] try: - keywords_data = json.loads(match.group(0)) + keywords_data = json_repair.loads(result) + if not keywords_data: + logger.error("No JSON-like structure found in the LLM respond.") + return [], [] except json.JSONDecodeError as e: logger.error(f"JSON parsing error: {e}") logger.error(f"LLM respond: {result}") diff --git a/lightrag/utils.py b/lightrag/utils.py index 599fcc9c..354ca0a3 100644 --- a/lightrag/utils.py +++ b/lightrag/utils.py @@ -248,43 +248,6 @@ class EmbeddingFunc: return await self.func(*args, **kwargs) -def locate_json_string_body_from_string(content: str) -> str | None: - """Locate the JSON string body from a string""" - try: - maybe_json_str = re.search(r"{.*}", content, re.DOTALL) - if maybe_json_str is not None: - maybe_json_str = maybe_json_str.group(0) - maybe_json_str = maybe_json_str.replace("\\n", "") - maybe_json_str = maybe_json_str.replace("\n", "") - maybe_json_str = maybe_json_str.replace("'", '"') - # json.loads(maybe_json_str) # don't check here, cannot validate schema after all - return maybe_json_str - except Exception: - pass - # try: - # content = ( - # content.replace(kw_prompt[:-1], "") - # .replace("user", "") - # .replace("model", "") - # .strip() - # ) - # maybe_json_str = "{" + content.split("{")[1].split("}")[0] + "}" - # json.loads(maybe_json_str) - - return None - - -def convert_response_to_json(response: str) -> dict[str, Any]: - json_str = locate_json_string_body_from_string(response) - assert json_str is not None, f"Unable to parse JSON from response: {response}" - try: - data = json.loads(json_str) - return data - except json.JSONDecodeError as e: - logger.error(f"Failed to parse JSON: {json_str}") - raise e from None - - def compute_args_hash(*args: Any) -> str: """Compute a hash for the given arguments. Args: diff --git a/pyproject.toml b/pyproject.toml index 081c603c..18e525d5 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -25,6 +25,7 @@ dependencies = [ "configparser", "dotenv", "future", + "json-repair", "nano-vectordb", "networkx", "numpy",