Merge pull request #1949 from danielaskdd/main
Fix: remove query params from cache key generation for keyword extraction
This commit is contained in:
commit
1be1649f75
3 changed files with 17 additions and 19 deletions
|
|
@ -134,7 +134,13 @@ LLM_BINDING_API_KEY=your_api_key
|
||||||
# LLM_BINDING_API_KEY=your_api_key
|
# LLM_BINDING_API_KEY=your_api_key
|
||||||
# LLM_BINDING=openai
|
# LLM_BINDING=openai
|
||||||
|
|
||||||
### Most Commont Parameters for Ollama Server
|
### OpenAI Specific Parameters
|
||||||
|
### Apply frequency penalty to prevent the LLM from generating repetitive or looping outputs
|
||||||
|
# OPENAI_LLM_FREQUENCY_PENALTY=1.1
|
||||||
|
### use the following command to see all support options for openai and azure_openai
|
||||||
|
### lightrag-server --llm-binding openai --help
|
||||||
|
|
||||||
|
### Ollama Server Specific Parameters
|
||||||
### Time out in seconds, None for infinite timeout
|
### Time out in seconds, None for infinite timeout
|
||||||
TIMEOUT=240
|
TIMEOUT=240
|
||||||
### OLLAMA_LLM_NUM_CTX must be larger than MAX_TOTAL_TOKENS + 2000
|
### OLLAMA_LLM_NUM_CTX must be larger than MAX_TOTAL_TOKENS + 2000
|
||||||
|
|
|
||||||
|
|
@ -1747,7 +1747,7 @@ async def kg_query(
|
||||||
query_param.user_prompt or "",
|
query_param.user_prompt or "",
|
||||||
query_param.enable_rerank,
|
query_param.enable_rerank,
|
||||||
)
|
)
|
||||||
cached_response, quantized, min_val, max_val = await handle_cache(
|
cached_response = await handle_cache(
|
||||||
hashing_kv, args_hash, query, query_param.mode, cache_type="query"
|
hashing_kv, args_hash, query, query_param.mode, cache_type="query"
|
||||||
)
|
)
|
||||||
if cached_response is not None:
|
if cached_response is not None:
|
||||||
|
|
@ -1922,18 +1922,10 @@ async def extract_keywords_only(
|
||||||
args_hash = compute_args_hash(
|
args_hash = compute_args_hash(
|
||||||
param.mode,
|
param.mode,
|
||||||
text,
|
text,
|
||||||
param.response_type,
|
|
||||||
param.top_k,
|
|
||||||
param.chunk_top_k,
|
|
||||||
param.max_entity_tokens,
|
|
||||||
param.max_relation_tokens,
|
|
||||||
param.max_total_tokens,
|
|
||||||
param.hl_keywords or [],
|
param.hl_keywords or [],
|
||||||
param.ll_keywords or [],
|
param.ll_keywords or [],
|
||||||
param.user_prompt or "",
|
|
||||||
param.enable_rerank,
|
|
||||||
)
|
)
|
||||||
cached_response, quantized, min_val, max_val = await handle_cache(
|
cached_response = await handle_cache(
|
||||||
hashing_kv, args_hash, text, param.mode, cache_type="keywords"
|
hashing_kv, args_hash, text, param.mode, cache_type="keywords"
|
||||||
)
|
)
|
||||||
if cached_response is not None:
|
if cached_response is not None:
|
||||||
|
|
@ -3020,7 +3012,7 @@ async def naive_query(
|
||||||
query_param.user_prompt or "",
|
query_param.user_prompt or "",
|
||||||
query_param.enable_rerank,
|
query_param.enable_rerank,
|
||||||
)
|
)
|
||||||
cached_response, quantized, min_val, max_val = await handle_cache(
|
cached_response = await handle_cache(
|
||||||
hashing_kv, args_hash, query, query_param.mode, cache_type="query"
|
hashing_kv, args_hash, query, query_param.mode, cache_type="query"
|
||||||
)
|
)
|
||||||
if cached_response is not None:
|
if cached_response is not None:
|
||||||
|
|
|
||||||
|
|
@ -762,27 +762,27 @@ async def handle_cache(
|
||||||
prompt,
|
prompt,
|
||||||
mode="default",
|
mode="default",
|
||||||
cache_type=None,
|
cache_type=None,
|
||||||
):
|
) -> str | None:
|
||||||
"""Generic cache handling function with flattened cache keys"""
|
"""Generic cache handling function with flattened cache keys"""
|
||||||
if hashing_kv is None:
|
if hashing_kv is None:
|
||||||
return None, None, None, None
|
return None
|
||||||
|
|
||||||
if mode != "default": # handle cache for all type of query
|
if mode != "default": # handle cache for all type of query
|
||||||
if not hashing_kv.global_config.get("enable_llm_cache"):
|
if not hashing_kv.global_config.get("enable_llm_cache"):
|
||||||
return None, None, None, None
|
return None
|
||||||
else: # handle cache for entity extraction
|
else: # handle cache for entity extraction
|
||||||
if not hashing_kv.global_config.get("enable_llm_cache_for_entity_extract"):
|
if not hashing_kv.global_config.get("enable_llm_cache_for_entity_extract"):
|
||||||
return None, None, None, None
|
return None
|
||||||
|
|
||||||
# Use flattened cache key format: {mode}:{cache_type}:{hash}
|
# Use flattened cache key format: {mode}:{cache_type}:{hash}
|
||||||
flattened_key = generate_cache_key(mode, cache_type, args_hash)
|
flattened_key = generate_cache_key(mode, cache_type, args_hash)
|
||||||
cache_entry = await hashing_kv.get_by_id(flattened_key)
|
cache_entry = await hashing_kv.get_by_id(flattened_key)
|
||||||
if cache_entry:
|
if cache_entry:
|
||||||
logger.debug(f"Flattened cache hit(key:{flattened_key})")
|
logger.debug(f"Flattened cache hit(key:{flattened_key})")
|
||||||
return cache_entry["return"], None, None, None
|
return cache_entry["return"]
|
||||||
|
|
||||||
logger.debug(f"Cache missed(mode:{mode} type:{cache_type})")
|
logger.debug(f"Cache missed(mode:{mode} type:{cache_type})")
|
||||||
return None, None, None, None
|
return None
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
|
|
@ -1409,7 +1409,7 @@ async def use_llm_func_with_cache(
|
||||||
# Generate cache key for this LLM call
|
# Generate cache key for this LLM call
|
||||||
cache_key = generate_cache_key("default", cache_type, arg_hash)
|
cache_key = generate_cache_key("default", cache_type, arg_hash)
|
||||||
|
|
||||||
cached_return, _1, _2, _3 = await handle_cache(
|
cached_return = await handle_cache(
|
||||||
llm_response_cache,
|
llm_response_cache,
|
||||||
arg_hash,
|
arg_hash,
|
||||||
_prompt,
|
_prompt,
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue