cherry-pick f24a2616

2025-12-04 19:18:38 +08:00 · 2025-12-04 19:18:38 +08:00 · 114b400905
commit 114b400905
parent 96715f92e1
1 changed files with 40 additions and 0 deletions
--- a/lightrag/api/routers/query_routes.py
+++ b/lightrag/api/routers/query_routes.py
@ -75,6 +75,16 @@ class QueryRequest(BaseModel):
        ge=1,
    )

+    hl_keywords: list[str] = Field(
+        default_factory=list,
+        description="List of high-level keywords to prioritize in retrieval. Leave empty to use the LLM to generate the keywords.",
+    )
+
+    ll_keywords: list[str] = Field(
+        default_factory=list,
+        description="List of low-level keywords to refine retrieval focus. Leave empty to use the LLM to generate the keywords.",
+    )
+
    conversation_history: Optional[List[Dict[str, Any]]] = Field(
        default=None,
        description="Stores past conversation history to maintain context. Format: [{'role': 'user/assistant', 'content': 'message'}].",
@ -357,6 +367,16 @@ def create_query_routes(rag, api_key: Optional[str] = None, top_k: int = 60, rag
        }
        ```

+        Bypass initial LLM call by providing high-level and low-level keywords:
+        ```json
+        {
+            "query": "What is Retrieval-Augmented-Generation?",
+            "hl_keywords": ["machine learning", "information retrieval", "natural language processing"],
+            "ll_keywords": ["retrieval augmented generation", "RAG", "knowledge base"],
+            "mode": "mix"
+        }
+        ```
+
        Advanced query with references:
        ```json
        {
@ -574,6 +594,16 @@ def create_query_routes(rag, api_key: Optional[str] = None, top_k: int = 60, rag
        }
        ```

+        Bypass initial LLM call by providing high-level and low-level keywords:
+        ```json
+        {
+            "query": "What is Retrieval-Augmented-Generation?",
+            "hl_keywords": ["machine learning", "information retrieval", "natural language processing"],
+            "ll_keywords": ["retrieval augmented generation", "RAG", "knowledge base"],
+            "mode": "mix"
+        }
+        ```
+
        Complete response query:
        ```json
        {
@ -1063,6 +1093,16 @@ def create_query_routes(rag, api_key: Optional[str] = None, top_k: int = 60, rag
        }
        ```

+        Bypass initial LLM call by providing high-level and low-level keywords:
+        ```json
+        {
+            "query": "What is Retrieval-Augmented-Generation?",
+            "hl_keywords": ["machine learning", "information retrieval", "natural language processing"],
+            "ll_keywords": ["retrieval augmented generation", "RAG", "knowledge base"],
+            "mode": "mix"
+        }
+        ```
+
        **Response Analysis:**
        - **Empty arrays**: Normal for certain modes (e.g., naive mode has no entities/relationships)
        - **Processing info**: Shows retrieval statistics and token usage