From 9a62101e9de576c840013302a7bbde76d7b852de Mon Sep 17 00:00:00 2001
From: yangdx <gzdaniel@me.com>
Date: Thu, 14 Aug 2025 01:50:27 +0800
Subject: [PATCH 1/3] Add OpenAI frequency penalty sample env params

---
 env.example | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/env.example b/env.example
index be054576..e322e582 100644
--- a/env.example
+++ b/env.example
@@ -134,7 +134,13 @@ LLM_BINDING_API_KEY=your_api_key
 # LLM_BINDING_API_KEY=your_api_key
 # LLM_BINDING=openai
 
-### Most Commont Parameters for Ollama Server
+### OpenAI Specific Parameters
+### Apply frequency penalty to prevent the LLM from generating repetitive or looping outputs
+# OPENAI_LLM_FREQUENCY_PENALTY=1.1
+### use the following command to see all support options for openai and azure_openai
+### lightrag-server --llm-binding openai --help
+
+### Ollama Server Specific Parameters
 ### Time out in seconds, None for infinite timeout
 TIMEOUT=240
 ### OLLAMA_LLM_NUM_CTX must be larger than MAX_TOTAL_TOKENS + 2000

From 331dcf050910618b9bb5e3028da4597770199f72 Mon Sep 17 00:00:00 2001
From: yangdx <gzdaniel@me.com>
Date: Thu, 14 Aug 2025 02:36:01 +0800
Subject: [PATCH 2/3] Remove query params from cache key generation for keyword
 extration

---
 lightrag/operate.py | 14 +++-----------
 lightrag/utils.py   | 14 +++++++-------
 2 files changed, 10 insertions(+), 18 deletions(-)

diff --git a/lightrag/operate.py b/lightrag/operate.py
index dd8d54be..7fb150f7 100644
--- a/lightrag/operate.py
+++ b/lightrag/operate.py
@@ -1747,7 +1747,7 @@ async def kg_query(
         query_param.user_prompt or "",
         query_param.enable_rerank,
     )
-    cached_response, quantized, min_val, max_val = await handle_cache(
+    cached_response = await handle_cache(
         hashing_kv, args_hash, query, query_param.mode, cache_type="query"
     )
     if cached_response is not None:
@@ -1922,18 +1922,10 @@ async def extract_keywords_only(
     args_hash = compute_args_hash(
         param.mode,
         text,
-        param.response_type,
-        param.top_k,
-        param.chunk_top_k,
-        param.max_entity_tokens,
-        param.max_relation_tokens,
-        param.max_total_tokens,
         param.hl_keywords or [],
         param.ll_keywords or [],
-        param.user_prompt or "",
-        param.enable_rerank,
     )
-    cached_response, quantized, min_val, max_val = await handle_cache(
+    cached_response = await handle_cache(
         hashing_kv, args_hash, text, param.mode, cache_type="keywords"
     )
     if cached_response is not None:
@@ -3020,7 +3012,7 @@ async def naive_query(
         query_param.user_prompt or "",
         query_param.enable_rerank,
     )
-    cached_response, quantized, min_val, max_val = await handle_cache(
+    cached_response = await handle_cache(
         hashing_kv, args_hash, query, query_param.mode, cache_type="query"
     )
     if cached_response is not None:
diff --git a/lightrag/utils.py b/lightrag/utils.py
index 96b7bdc3..5216fac1 100644
--- a/lightrag/utils.py
+++ b/lightrag/utils.py
@@ -762,27 +762,27 @@ async def handle_cache(
     prompt,
     mode="default",
     cache_type=None,
-):
+) -> str|None:
     """Generic cache handling function with flattened cache keys"""
     if hashing_kv is None:
-        return None, None, None, None
+        return None
 
     if mode != "default":  # handle cache for all type of query
         if not hashing_kv.global_config.get("enable_llm_cache"):
-            return None, None, None, None
+            return None
     else:  # handle cache for entity extraction
         if not hashing_kv.global_config.get("enable_llm_cache_for_entity_extract"):
-            return None, None, None, None
+            return None
 
     # Use flattened cache key format: {mode}:{cache_type}:{hash}
     flattened_key = generate_cache_key(mode, cache_type, args_hash)
     cache_entry = await hashing_kv.get_by_id(flattened_key)
     if cache_entry:
         logger.debug(f"Flattened cache hit(key:{flattened_key})")
-        return cache_entry["return"], None, None, None
+        return cache_entry["return"]
 
     logger.debug(f"Cache missed(mode:{mode} type:{cache_type})")
-    return None, None, None, None
+    return None
 
 
 @dataclass
@@ -1409,7 +1409,7 @@ async def use_llm_func_with_cache(
         # Generate cache key for this LLM call
         cache_key = generate_cache_key("default", cache_type, arg_hash)
 
-        cached_return, _1, _2, _3 = await handle_cache(
+        cached_return = await handle_cache(
             llm_response_cache,
             arg_hash,
             _prompt,

From 7fb11193b0efed6bd7384e328580ca23e36dbd43 Mon Sep 17 00:00:00 2001
From: yangdx <gzdaniel@me.com>
Date: Thu, 14 Aug 2025 03:07:29 +0800
Subject: [PATCH 3/3] Fix linting

---
 lightrag/utils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lightrag/utils.py b/lightrag/utils.py
index 5216fac1..bea9962a 100644
--- a/lightrag/utils.py
+++ b/lightrag/utils.py
@@ -762,7 +762,7 @@ async def handle_cache(
     prompt,
     mode="default",
     cache_type=None,
-) -> str|None:
+) -> str | None:
     """Generic cache handling function with flattened cache keys"""
     if hashing_kv is None:
         return None