From 114b400905469c03393f8ffff9420606307ee6b8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Rapha=C3=ABl=20MANSUY?= <raphael.mansuy@gmail.com>
Date: Thu, 4 Dec 2025 19:18:38 +0800
Subject: [PATCH] cherry-pick f24a2616

---
 lightrag/api/routers/query_routes.py | 40 ++++++++++++++++++++++++++++
 1 file changed, 40 insertions(+)

diff --git a/lightrag/api/routers/query_routes.py b/lightrag/api/routers/query_routes.py
index c9436004..5f6a2955 100644
--- a/lightrag/api/routers/query_routes.py
+++ b/lightrag/api/routers/query_routes.py
@@ -75,6 +75,16 @@ class QueryRequest(BaseModel):
         ge=1,
     )
 
+    hl_keywords: list[str] = Field(
+        default_factory=list,
+        description="List of high-level keywords to prioritize in retrieval. Leave empty to use the LLM to generate the keywords.",
+    )
+
+    ll_keywords: list[str] = Field(
+        default_factory=list,
+        description="List of low-level keywords to refine retrieval focus. Leave empty to use the LLM to generate the keywords.",
+    )
+
     conversation_history: Optional[List[Dict[str, Any]]] = Field(
         default=None,
         description="Stores past conversation history to maintain context. Format: [{'role': 'user/assistant', 'content': 'message'}].",
@@ -357,6 +367,16 @@ def create_query_routes(rag, api_key: Optional[str] = None, top_k: int = 60, rag
         }
         ```
 
+        Bypass initial LLM call by providing high-level and low-level keywords:
+        ```json
+        {
+            "query": "What is Retrieval-Augmented-Generation?",
+            "hl_keywords": ["machine learning", "information retrieval", "natural language processing"],
+            "ll_keywords": ["retrieval augmented generation", "RAG", "knowledge base"],
+            "mode": "mix"
+        }
+        ```
+
         Advanced query with references:
         ```json
         {
@@ -574,6 +594,16 @@ def create_query_routes(rag, api_key: Optional[str] = None, top_k: int = 60, rag
         }
         ```
 
+        Bypass initial LLM call by providing high-level and low-level keywords:
+        ```json
+        {
+            "query": "What is Retrieval-Augmented-Generation?",
+            "hl_keywords": ["machine learning", "information retrieval", "natural language processing"],
+            "ll_keywords": ["retrieval augmented generation", "RAG", "knowledge base"],
+            "mode": "mix"
+        }
+        ```
+
         Complete response query:
         ```json
         {
@@ -1063,6 +1093,16 @@ def create_query_routes(rag, api_key: Optional[str] = None, top_k: int = 60, rag
         }
         ```
 
+        Bypass initial LLM call by providing high-level and low-level keywords:
+        ```json
+        {
+            "query": "What is Retrieval-Augmented-Generation?",
+            "hl_keywords": ["machine learning", "information retrieval", "natural language processing"],
+            "ll_keywords": ["retrieval augmented generation", "RAG", "knowledge base"],
+            "mode": "mix"
+        }
+        ```
+
         **Response Analysis:**
         - **Empty arrays**: Normal for certain modes (e.g., naive mode has no entities/relationships)
         - **Processing info**: Shows retrieval statistics and token usage