From cf064579ce99f323c7c6a95fc6ff1299cd12d8ed Mon Sep 17 00:00:00 2001
From: yangdx <gzdaniel@me.com>
Date: Fri, 8 Aug 2025 14:59:39 +0800
Subject: [PATCH] Remove deprecated keyword extraction query methods

- Delete query_with_keywords function
- Remove kg_query_with_keywords helper
- Drop separate keyword extraction methods
---
 lightrag/lightrag.py |  56 ------------
 lightrag/operate.py  | 200 -------------------------------------------
 2 files changed, 256 deletions(-)

diff --git a/lightrag/lightrag.py b/lightrag/lightrag.py
index 467265f0..b35caab2 100644
--- a/lightrag/lightrag.py
+++ b/lightrag/lightrag.py
@@ -72,7 +72,6 @@ from .operate import (
     merge_nodes_and_edges,
     kg_query,
     naive_query,
-    query_with_keywords,
     _rebuild_knowledge_from_chunks,
 )
 from .constants import GRAPH_FIELD_SEP
@@ -1816,8 +1815,6 @@ class LightRAG:
         """
         # If a custom model is provided in param, temporarily update global config
         global_config = asdict(self)
-        # Save original query for vector search
-        param.original_query = query
 
         if param.mode in ["local", "global", "hybrid", "mix"]:
             response = await kg_query(
@@ -1859,59 +1856,6 @@ class LightRAG:
         await self._query_done()
         return response
 
-    # TODO: Deprecated, use user_prompt in QueryParam instead
-    def query_with_separate_keyword_extraction(
-        self, query: str, prompt: str, param: QueryParam = QueryParam()
-    ):
-        """
-        Query with separate keyword extraction step.
-
-        This method extracts keywords from the query first, then uses them for the query.
-
-        Args:
-            query: User query
-            prompt: Additional prompt for the query
-            param: Query parameters
-
-        Returns:
-            Query response
-        """
-        loop = always_get_an_event_loop()
-        return loop.run_until_complete(
-            self.aquery_with_separate_keyword_extraction(query, prompt, param)
-        )
-
-    # TODO: Deprecated, use user_prompt in QueryParam instead
-    async def aquery_with_separate_keyword_extraction(
-        self, query: str, prompt: str, param: QueryParam = QueryParam()
-    ) -> str | AsyncIterator[str]:
-        """
-        Async version of query_with_separate_keyword_extraction.
-
-        Args:
-            query: User query
-            prompt: Additional prompt for the query
-            param: Query parameters
-
-        Returns:
-            Query response or async iterator
-        """
-        response = await query_with_keywords(
-            query=query,
-            prompt=prompt,
-            param=param,
-            knowledge_graph_inst=self.chunk_entity_relation_graph,
-            entities_vdb=self.entities_vdb,
-            relationships_vdb=self.relationships_vdb,
-            chunks_vdb=self.chunks_vdb,
-            text_chunks_db=self.text_chunks,
-            global_config=asdict(self),
-            hashing_kv=self.llm_response_cache,
-        )
-
-        await self._query_done()
-        return response
-
     async def _query_done(self):
         await self.llm_response_cache.index_done_callback()
 
diff --git a/lightrag/operate.py b/lightrag/operate.py
index 99a3d072..7725caca 100644
--- a/lightrag/operate.py
+++ b/lightrag/operate.py
@@ -3187,203 +3187,3 @@ async def naive_query(
         )
 
     return response
-
-
-# TODO: Deprecated, use user_prompt in QueryParam instead
-async def kg_query_with_keywords(
-    query: str,
-    knowledge_graph_inst: BaseGraphStorage,
-    entities_vdb: BaseVectorStorage,
-    relationships_vdb: BaseVectorStorage,
-    text_chunks_db: BaseKVStorage,
-    query_param: QueryParam,
-    global_config: dict[str, str],
-    hashing_kv: BaseKVStorage | None = None,
-    ll_keywords: list[str] = [],
-    hl_keywords: list[str] = [],
-    chunks_vdb: BaseVectorStorage | None = None,
-) -> str | AsyncIterator[str]:
-    """
-    Refactored kg_query that does NOT extract keywords by itself.
-    It expects hl_keywords and ll_keywords to be set in query_param, or defaults to empty.
-    Then it uses those to build context and produce a final LLM response.
-    """
-    if query_param.model_func:
-        use_model_func = query_param.model_func
-    else:
-        use_model_func = global_config["llm_model_func"]
-        # Apply higher priority (5) to query relation LLM function
-        use_model_func = partial(use_model_func, _priority=5)
-
-    args_hash = compute_args_hash(query_param.mode, query)
-    cached_response, quantized, min_val, max_val = await handle_cache(
-        hashing_kv, args_hash, query, query_param.mode, cache_type="query"
-    )
-    if cached_response is not None:
-        return cached_response
-
-    # If neither has any keywords, you could handle that logic here.
-    if not hl_keywords and not ll_keywords:
-        logger.warning(
-            "No keywords found in query_param. Could default to global mode or fail."
-        )
-        return PROMPTS["fail_response"]
-    if not ll_keywords and query_param.mode in ["local", "hybrid"]:
-        logger.warning("low_level_keywords is empty, switching to global mode.")
-        query_param.mode = "global"
-    if not hl_keywords and query_param.mode in ["global", "hybrid"]:
-        logger.warning("high_level_keywords is empty, switching to local mode.")
-        query_param.mode = "local"
-
-    ll_keywords_str = ", ".join(ll_keywords) if ll_keywords else ""
-    hl_keywords_str = ", ".join(hl_keywords) if hl_keywords else ""
-
-    context = await _build_query_context(
-        query,
-        ll_keywords_str,
-        hl_keywords_str,
-        knowledge_graph_inst,
-        entities_vdb,
-        relationships_vdb,
-        text_chunks_db,
-        query_param,
-        chunks_vdb=chunks_vdb,
-    )
-    if not context:
-        return PROMPTS["fail_response"]
-
-    if query_param.only_need_context:
-        return context
-
-    # Process conversation history
-    history_context = ""
-    if query_param.conversation_history:
-        history_context = get_conversation_turns(
-            query_param.conversation_history, query_param.history_turns
-        )
-
-    sys_prompt_temp = PROMPTS["rag_response"]
-    sys_prompt = sys_prompt_temp.format(
-        context_data=context,
-        response_type=query_param.response_type,
-        history=history_context,
-    )
-
-    if query_param.only_need_prompt:
-        return sys_prompt
-
-    tokenizer: Tokenizer = global_config["tokenizer"]
-    len_of_prompts = len(tokenizer.encode(query + sys_prompt))
-    logger.debug(
-        f"[kg_query_with_keywords] Sending to LLM: {len_of_prompts:,} tokens (Query: {len(tokenizer.encode(query))}, System: {len(tokenizer.encode(sys_prompt))})"
-    )
-
-    # 6. Generate response
-    response = await use_model_func(
-        query,
-        system_prompt=sys_prompt,
-        stream=query_param.stream,
-    )
-
-    # Clean up response content
-    if isinstance(response, str) and len(response) > len(sys_prompt):
-        response = (
-            response.replace(sys_prompt, "")
-            .replace("user", "")
-            .replace("model", "")
-            .replace(query, "")
-            .replace("<system>", "")
-            .replace("</system>", "")
-            .strip()
-        )
-
-        if hashing_kv.global_config.get("enable_llm_cache"):
-            await save_to_cache(
-                hashing_kv,
-                CacheData(
-                    args_hash=args_hash,
-                    content=response,
-                    prompt=query,
-                    mode=query_param.mode,
-                    cache_type="query",
-                ),
-            )
-
-    return response
-
-
-# TODO: Deprecated, use user_prompt in QueryParam instead
-async def query_with_keywords(
-    query: str,
-    prompt: str,
-    param: QueryParam,
-    knowledge_graph_inst: BaseGraphStorage,
-    entities_vdb: BaseVectorStorage,
-    relationships_vdb: BaseVectorStorage,
-    chunks_vdb: BaseVectorStorage,
-    text_chunks_db: BaseKVStorage,
-    global_config: dict[str, str],
-    hashing_kv: BaseKVStorage | None = None,
-) -> str | AsyncIterator[str]:
-    """
-    Extract keywords from the query and then use them for retrieving information.
-
-    1. Extracts high-level and low-level keywords from the query
-    2. Formats the query with the extracted keywords and prompt
-    3. Uses the appropriate query method based on param.mode
-
-    Args:
-        query: The user's query
-        prompt: Additional prompt to prepend to the query
-        param: Query parameters
-        knowledge_graph_inst: Knowledge graph storage
-        entities_vdb: Entities vector database
-        relationships_vdb: Relationships vector database
-        chunks_vdb: Document chunks vector database
-        text_chunks_db: Text chunks storage
-        global_config: Global configuration
-        hashing_kv: Cache storage
-
-    Returns:
-        Query response or async iterator
-    """
-    # Extract keywords
-    hl_keywords, ll_keywords = await get_keywords_from_query(
-        query=query,
-        query_param=param,
-        global_config=global_config,
-        hashing_kv=hashing_kv,
-    )
-
-    # Create a new string with the prompt and the keywords
-    keywords_str = ", ".join(ll_keywords + hl_keywords)
-    formatted_question = (
-        f"{prompt}\n\n### Keywords\n\n{keywords_str}\n\n### Query\n\n{query}"
-    )
-
-    # Use appropriate query method based on mode
-    if param.mode in ["local", "global", "hybrid", "mix"]:
-        return await kg_query_with_keywords(
-            formatted_question,
-            knowledge_graph_inst,
-            entities_vdb,
-            relationships_vdb,
-            text_chunks_db,
-            param,
-            global_config,
-            hashing_kv=hashing_kv,
-            hl_keywords=hl_keywords,
-            ll_keywords=ll_keywords,
-            chunks_vdb=chunks_vdb,
-        )
-    elif param.mode == "naive":
-        return await naive_query(
-            formatted_question,
-            chunks_vdb,
-            text_chunks_db,
-            param,
-            global_config,
-            hashing_kv=hashing_kv,
-        )
-    else:
-        raise ValueError(f"Unknown mode {param.mode}")