diff --git a/lightrag/operate.py b/lightrag/operate.py index dd323557..faab7f26 100644 --- a/lightrag/operate.py +++ b/lightrag/operate.py @@ -3266,10 +3266,16 @@ async def extract_keywords_only( It ONLY extracts keywords (hl_keywords, ll_keywords). """ - # 1. Handle cache if needed - add cache type for keywords + # 1. Build the examples + examples = "\n".join(PROMPTS["keywords_extraction_examples"]) + + language = global_config["addon_params"].get("language", DEFAULT_SUMMARY_LANGUAGE) + + # 2. Handle cache if needed - add cache type for keywords args_hash = compute_args_hash( param.mode, text, + language, ) cached_result = await handle_cache( hashing_kv, args_hash, text, param.mode, cache_type="keywords" @@ -3286,11 +3292,6 @@ async def extract_keywords_only( "Invalid cache format for keywords, proceeding with extraction" ) - # 2. Build the examples - examples = "\n".join(PROMPTS["keywords_extraction_examples"]) - - language = global_config["addon_params"].get("language", DEFAULT_SUMMARY_LANGUAGE) - # 3. Build the keyword-extraction prompt kw_prompt = PROMPTS["keywords_extraction"].format( query=text, diff --git a/lightrag/prompt.py b/lightrag/prompt.py index 6230197b..dcd829d4 100644 --- a/lightrag/prompt.py +++ b/lightrag/prompt.py @@ -384,6 +384,7 @@ Given a user query, your task is to extract two distinct types of keywords: 2. **Source of Truth**: All keywords must be explicitly derived from the user query, with both high-level and low-level keyword categories are required to contain content. 3. **Concise & Meaningful**: Keywords should be concise words or meaningful phrases. Prioritize multi-word phrases when they represent a single concept. For example, from "latest financial report of Apple Inc.", you should extract "latest financial report" and "Apple Inc." rather than "latest", "financial", "report", and "Apple". 4. **Handle Edge Cases**: For queries that are too simple, vague, or nonsensical (e.g., "hello", "ok", "asdfghjkl"), you must return a JSON object with empty lists for both keyword types. +5. **Language**: All extracted keywords MUST be in {language}. Proper nouns (e.g., personal names, place names, organization names) should be kept in their original language. ---Examples--- {examples}