Merge pull request #1949 from danielaskdd/main

Fix: remove query params from cache key generation for keyword extraction
2025-08-14 03:09:09 +08:00 · 2025-08-14 03:09:09 +08:00 · 1be1649f75
commit 1be1649f75
parent 5b0e26d9da 7fb11193b0
3 changed files with 17 additions and 19 deletions
--- a/env.example
+++ b/env.example
@ -134,7 +134,13 @@ LLM_BINDING_API_KEY=your_api_key
 # LLM_BINDING_API_KEY=your_api_key
 # LLM_BINDING=openai
-### Most Commont Parameters for Ollama Server
+### OpenAI Specific Parameters
 ### Apply frequency penalty to prevent the LLM from generating repetitive or looping outputs
 # OPENAI_LLM_FREQUENCY_PENALTY=1.1
 ### use the following command to see all support options for openai and azure_openai
 ### lightrag-server --llm-binding openai --help
 ### Ollama Server Specific Parameters
 ### Time out in seconds, None for infinite timeout
 TIMEOUT=240
 ### OLLAMA_LLM_NUM_CTX must be larger than MAX_TOTAL_TOKENS + 2000
--- a/lightrag/operate.py
+++ b/lightrag/operate.py
@ -1747,7 +1747,7 @@ async def kg_query(
        query_param.user_prompt or "",
        query_param.enable_rerank,
    )
-    cached_response, quantized, min_val, max_val = await handle_cache(
+    cached_response = await handle_cache(
        hashing_kv, args_hash, query, query_param.mode, cache_type="query"
    )
    if cached_response is not None:
@ -1922,18 +1922,10 @@ async def extract_keywords_only(
    args_hash = compute_args_hash(
        param.mode,
        text,
        param.response_type,
        param.top_k,
        param.chunk_top_k,
        param.max_entity_tokens,
        param.max_relation_tokens,
        param.max_total_tokens,
        param.hl_keywords or [],
        param.ll_keywords or [],
        param.user_prompt or "",
        param.enable_rerank,
    )
-    cached_response, quantized, min_val, max_val = await handle_cache(
+    cached_response = await handle_cache(
        hashing_kv, args_hash, text, param.mode, cache_type="keywords"
    )
    if cached_response is not None:
@ -3020,7 +3012,7 @@ async def naive_query(
        query_param.user_prompt or "",
        query_param.enable_rerank,
    )
-    cached_response, quantized, min_val, max_val = await handle_cache(
+    cached_response = await handle_cache(
        hashing_kv, args_hash, query, query_param.mode, cache_type="query"
    )
    if cached_response is not None:
--- a/lightrag/utils.py
+++ b/lightrag/utils.py
@ -762,27 +762,27 @@ async def handle_cache(
    prompt,
    mode="default",
    cache_type=None,
-):
+) -> str | None:
    """Generic cache handling function with flattened cache keys"""
    if hashing_kv is None:
-        return None, None, None, None
+        return None
    if mode != "default":  # handle cache for all type of query
        if not hashing_kv.global_config.get("enable_llm_cache"):
-            return None, None, None, None
+            return None
    else:  # handle cache for entity extraction
        if not hashing_kv.global_config.get("enable_llm_cache_for_entity_extract"):
-            return None, None, None, None
+            return None
    # Use flattened cache key format: {mode}:{cache_type}:{hash}
    flattened_key = generate_cache_key(mode, cache_type, args_hash)
    cache_entry = await hashing_kv.get_by_id(flattened_key)
    if cache_entry:
        logger.debug(f"Flattened cache hit(key:{flattened_key})")
-        return cache_entry["return"], None, None, None
+        return cache_entry["return"]
    logger.debug(f"Cache missed(mode:{mode} type:{cache_type})")
-    return None, None, None, None
+    return None
@dataclass
@ -1409,7 +1409,7 @@ async def use_llm_func_with_cache(
        # Generate cache key for this LLM call
        cache_key = generate_cache_key("default", cache_type, arg_hash)
-        cached_return, _1, _2, _3 = await handle_cache(
+        cached_return = await handle_cache(
            llm_response_cache,
            arg_hash,
            _prompt,