From fd2ff358bf664d12bdeb960ff10274e3562e9afd Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jos=C3=A9=20Carlos?= <eng.jcfneto@gmail.com>
Date: Sat, 6 Dec 2025 09:50:12 -0300
Subject: [PATCH 1/2] fix(prompt): use language parameter in
 keywords_extraction prompt

The language parameter was being passed to the prompt but was not
being used. Added {language} placeholder to ensure keywords are
extracted in the configured language.
---
 lightrag/prompt.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/lightrag/prompt.py b/lightrag/prompt.py
index bf514fe8..64e105c2 100644
--- a/lightrag/prompt.py
+++ b/lightrag/prompt.py
@@ -374,6 +374,7 @@ Given a user query, your task is to extract two distinct types of keywords:
 2. **Source of Truth**: All keywords must be explicitly derived from the user query, with both high-level and low-level keyword categories are required to contain content.
 3. **Concise & Meaningful**: Keywords should be concise words or meaningful phrases. Prioritize multi-word phrases when they represent a single concept. For example, from "latest financial report of Apple Inc.", you should extract "latest financial report" and "Apple Inc." rather than "latest", "financial", "report", and "Apple".
 4. **Handle Edge Cases**: For queries that are too simple, vague, or nonsensical (e.g., "hello", "ok", "asdfghjkl"), you must return a JSON object with empty lists for both keyword types.
+5. **Language**: All extracted keywords MUST be in {language}. Proper nouns (e.g., personal names, place names, organization names) should be kept in their original language.
 
 ---Examples---
 {examples}

From d8cd48f43b9fa48b94be16300df9039d05c72c2c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jos=C3=A9=20Carlos?= <eng.jcfneto@gmail.com>
Date: Thu, 11 Dec 2025 12:43:42 -0300
Subject: [PATCH 2/2] fix(cache): include language parameter in keywords
 extraction cache key

---
 lightrag/operate.py | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/lightrag/operate.py b/lightrag/operate.py
index c6724974..05940899 100644
--- a/lightrag/operate.py
+++ b/lightrag/operate.py
@@ -3264,10 +3264,16 @@ async def extract_keywords_only(
     It ONLY extracts keywords (hl_keywords, ll_keywords).
     """
 
-    # 1. Handle cache if needed - add cache type for keywords
+    # 1. Build the examples
+    examples = "\n".join(PROMPTS["keywords_extraction_examples"])
+
+    language = global_config["addon_params"].get("language", DEFAULT_SUMMARY_LANGUAGE)
+
+    # 2. Handle cache if needed - add cache type for keywords
     args_hash = compute_args_hash(
         param.mode,
         text,
+        language,
     )
     cached_result = await handle_cache(
         hashing_kv, args_hash, text, param.mode, cache_type="keywords"
@@ -3284,11 +3290,6 @@ async def extract_keywords_only(
                 "Invalid cache format for keywords, proceeding with extraction"
             )
 
-    # 2. Build the examples
-    examples = "\n".join(PROMPTS["keywords_extraction_examples"])
-
-    language = global_config["addon_params"].get("language", DEFAULT_SUMMARY_LANGUAGE)
-
     # 3. Build the keyword-extraction prompt
     kw_prompt = PROMPTS["keywords_extraction"].format(
         query=text,