diff --git a/env.example b/env.example
index be054576..e322e582 100644
--- a/env.example
+++ b/env.example
@@ -134,7 +134,13 @@ LLM_BINDING_API_KEY=your_api_key
 # LLM_BINDING_API_KEY=your_api_key
 # LLM_BINDING=openai
 
-### Most Commont Parameters for Ollama Server
+### OpenAI Specific Parameters
+### Apply frequency penalty to prevent the LLM from generating repetitive or looping outputs
+# OPENAI_LLM_FREQUENCY_PENALTY=1.1
+### use the following command to see all support options for openai and azure_openai
+### lightrag-server --llm-binding openai --help
+
+### Ollama Server Specific Parameters
 ### Time out in seconds, None for infinite timeout
 TIMEOUT=240
 ### OLLAMA_LLM_NUM_CTX must be larger than MAX_TOTAL_TOKENS + 2000
diff --git a/lightrag/operate.py b/lightrag/operate.py
index dd8d54be..7fb150f7 100644
--- a/lightrag/operate.py
+++ b/lightrag/operate.py
@@ -1747,7 +1747,7 @@ async def kg_query(
         query_param.user_prompt or "",
         query_param.enable_rerank,
     )
-    cached_response, quantized, min_val, max_val = await handle_cache(
+    cached_response = await handle_cache(
         hashing_kv, args_hash, query, query_param.mode, cache_type="query"
     )
     if cached_response is not None:
@@ -1922,18 +1922,10 @@ async def extract_keywords_only(
     args_hash = compute_args_hash(
         param.mode,
         text,
-        param.response_type,
-        param.top_k,
-        param.chunk_top_k,
-        param.max_entity_tokens,
-        param.max_relation_tokens,
-        param.max_total_tokens,
         param.hl_keywords or [],
         param.ll_keywords or [],
-        param.user_prompt or "",
-        param.enable_rerank,
     )
-    cached_response, quantized, min_val, max_val = await handle_cache(
+    cached_response = await handle_cache(
         hashing_kv, args_hash, text, param.mode, cache_type="keywords"
     )
     if cached_response is not None:
@@ -3020,7 +3012,7 @@ async def naive_query(
         query_param.user_prompt or "",
         query_param.enable_rerank,
     )
-    cached_response, quantized, min_val, max_val = await handle_cache(
+    cached_response = await handle_cache(
         hashing_kv, args_hash, query, query_param.mode, cache_type="query"
     )
     if cached_response is not None:
diff --git a/lightrag/utils.py b/lightrag/utils.py
index 96b7bdc3..bea9962a 100644
--- a/lightrag/utils.py
+++ b/lightrag/utils.py
@@ -762,27 +762,27 @@ async def handle_cache(
     prompt,
     mode="default",
     cache_type=None,
-):
+) -> str | None:
     """Generic cache handling function with flattened cache keys"""
     if hashing_kv is None:
-        return None, None, None, None
+        return None
 
     if mode != "default":  # handle cache for all type of query
         if not hashing_kv.global_config.get("enable_llm_cache"):
-            return None, None, None, None
+            return None
     else:  # handle cache for entity extraction
         if not hashing_kv.global_config.get("enable_llm_cache_for_entity_extract"):
-            return None, None, None, None
+            return None
 
     # Use flattened cache key format: {mode}:{cache_type}:{hash}
     flattened_key = generate_cache_key(mode, cache_type, args_hash)
     cache_entry = await hashing_kv.get_by_id(flattened_key)
     if cache_entry:
         logger.debug(f"Flattened cache hit(key:{flattened_key})")
-        return cache_entry["return"], None, None, None
+        return cache_entry["return"]
 
     logger.debug(f"Cache missed(mode:{mode} type:{cache_type})")
-    return None, None, None, None
+    return None
 
 
 @dataclass
@@ -1409,7 +1409,7 @@ async def use_llm_func_with_cache(
         # Generate cache key for this LLM call
         cache_key = generate_cache_key("default", cache_type, arg_hash)
 
-        cached_return, _1, _2, _3 = await handle_cache(
+        cached_return = await handle_cache(
             llm_response_cache,
             arg_hash,
             _prompt,