From cf064579ce99f323c7c6a95fc6ff1299cd12d8ed Mon Sep 17 00:00:00 2001 From: yangdx Date: Fri, 8 Aug 2025 14:59:39 +0800 Subject: [PATCH] Remove deprecated keyword extraction query methods - Delete query_with_keywords function - Remove kg_query_with_keywords helper - Drop separate keyword extraction methods --- lightrag/lightrag.py | 56 ------------ lightrag/operate.py | 200 ------------------------------------------- 2 files changed, 256 deletions(-) diff --git a/lightrag/lightrag.py b/lightrag/lightrag.py index 467265f0..b35caab2 100644 --- a/lightrag/lightrag.py +++ b/lightrag/lightrag.py @@ -72,7 +72,6 @@ from .operate import ( merge_nodes_and_edges, kg_query, naive_query, - query_with_keywords, _rebuild_knowledge_from_chunks, ) from .constants import GRAPH_FIELD_SEP @@ -1816,8 +1815,6 @@ class LightRAG: """ # If a custom model is provided in param, temporarily update global config global_config = asdict(self) - # Save original query for vector search - param.original_query = query if param.mode in ["local", "global", "hybrid", "mix"]: response = await kg_query( @@ -1859,59 +1856,6 @@ class LightRAG: await self._query_done() return response - # TODO: Deprecated, use user_prompt in QueryParam instead - def query_with_separate_keyword_extraction( - self, query: str, prompt: str, param: QueryParam = QueryParam() - ): - """ - Query with separate keyword extraction step. - - This method extracts keywords from the query first, then uses them for the query. - - Args: - query: User query - prompt: Additional prompt for the query - param: Query parameters - - Returns: - Query response - """ - loop = always_get_an_event_loop() - return loop.run_until_complete( - self.aquery_with_separate_keyword_extraction(query, prompt, param) - ) - - # TODO: Deprecated, use user_prompt in QueryParam instead - async def aquery_with_separate_keyword_extraction( - self, query: str, prompt: str, param: QueryParam = QueryParam() - ) -> str | AsyncIterator[str]: - """ - Async version of query_with_separate_keyword_extraction. - - Args: - query: User query - prompt: Additional prompt for the query - param: Query parameters - - Returns: - Query response or async iterator - """ - response = await query_with_keywords( - query=query, - prompt=prompt, - param=param, - knowledge_graph_inst=self.chunk_entity_relation_graph, - entities_vdb=self.entities_vdb, - relationships_vdb=self.relationships_vdb, - chunks_vdb=self.chunks_vdb, - text_chunks_db=self.text_chunks, - global_config=asdict(self), - hashing_kv=self.llm_response_cache, - ) - - await self._query_done() - return response - async def _query_done(self): await self.llm_response_cache.index_done_callback() diff --git a/lightrag/operate.py b/lightrag/operate.py index 99a3d072..7725caca 100644 --- a/lightrag/operate.py +++ b/lightrag/operate.py @@ -3187,203 +3187,3 @@ async def naive_query( ) return response - - -# TODO: Deprecated, use user_prompt in QueryParam instead -async def kg_query_with_keywords( - query: str, - knowledge_graph_inst: BaseGraphStorage, - entities_vdb: BaseVectorStorage, - relationships_vdb: BaseVectorStorage, - text_chunks_db: BaseKVStorage, - query_param: QueryParam, - global_config: dict[str, str], - hashing_kv: BaseKVStorage | None = None, - ll_keywords: list[str] = [], - hl_keywords: list[str] = [], - chunks_vdb: BaseVectorStorage | None = None, -) -> str | AsyncIterator[str]: - """ - Refactored kg_query that does NOT extract keywords by itself. - It expects hl_keywords and ll_keywords to be set in query_param, or defaults to empty. - Then it uses those to build context and produce a final LLM response. - """ - if query_param.model_func: - use_model_func = query_param.model_func - else: - use_model_func = global_config["llm_model_func"] - # Apply higher priority (5) to query relation LLM function - use_model_func = partial(use_model_func, _priority=5) - - args_hash = compute_args_hash(query_param.mode, query) - cached_response, quantized, min_val, max_val = await handle_cache( - hashing_kv, args_hash, query, query_param.mode, cache_type="query" - ) - if cached_response is not None: - return cached_response - - # If neither has any keywords, you could handle that logic here. - if not hl_keywords and not ll_keywords: - logger.warning( - "No keywords found in query_param. Could default to global mode or fail." - ) - return PROMPTS["fail_response"] - if not ll_keywords and query_param.mode in ["local", "hybrid"]: - logger.warning("low_level_keywords is empty, switching to global mode.") - query_param.mode = "global" - if not hl_keywords and query_param.mode in ["global", "hybrid"]: - logger.warning("high_level_keywords is empty, switching to local mode.") - query_param.mode = "local" - - ll_keywords_str = ", ".join(ll_keywords) if ll_keywords else "" - hl_keywords_str = ", ".join(hl_keywords) if hl_keywords else "" - - context = await _build_query_context( - query, - ll_keywords_str, - hl_keywords_str, - knowledge_graph_inst, - entities_vdb, - relationships_vdb, - text_chunks_db, - query_param, - chunks_vdb=chunks_vdb, - ) - if not context: - return PROMPTS["fail_response"] - - if query_param.only_need_context: - return context - - # Process conversation history - history_context = "" - if query_param.conversation_history: - history_context = get_conversation_turns( - query_param.conversation_history, query_param.history_turns - ) - - sys_prompt_temp = PROMPTS["rag_response"] - sys_prompt = sys_prompt_temp.format( - context_data=context, - response_type=query_param.response_type, - history=history_context, - ) - - if query_param.only_need_prompt: - return sys_prompt - - tokenizer: Tokenizer = global_config["tokenizer"] - len_of_prompts = len(tokenizer.encode(query + sys_prompt)) - logger.debug( - f"[kg_query_with_keywords] Sending to LLM: {len_of_prompts:,} tokens (Query: {len(tokenizer.encode(query))}, System: {len(tokenizer.encode(sys_prompt))})" - ) - - # 6. Generate response - response = await use_model_func( - query, - system_prompt=sys_prompt, - stream=query_param.stream, - ) - - # Clean up response content - if isinstance(response, str) and len(response) > len(sys_prompt): - response = ( - response.replace(sys_prompt, "") - .replace("user", "") - .replace("model", "") - .replace(query, "") - .replace("", "") - .replace("", "") - .strip() - ) - - if hashing_kv.global_config.get("enable_llm_cache"): - await save_to_cache( - hashing_kv, - CacheData( - args_hash=args_hash, - content=response, - prompt=query, - mode=query_param.mode, - cache_type="query", - ), - ) - - return response - - -# TODO: Deprecated, use user_prompt in QueryParam instead -async def query_with_keywords( - query: str, - prompt: str, - param: QueryParam, - knowledge_graph_inst: BaseGraphStorage, - entities_vdb: BaseVectorStorage, - relationships_vdb: BaseVectorStorage, - chunks_vdb: BaseVectorStorage, - text_chunks_db: BaseKVStorage, - global_config: dict[str, str], - hashing_kv: BaseKVStorage | None = None, -) -> str | AsyncIterator[str]: - """ - Extract keywords from the query and then use them for retrieving information. - - 1. Extracts high-level and low-level keywords from the query - 2. Formats the query with the extracted keywords and prompt - 3. Uses the appropriate query method based on param.mode - - Args: - query: The user's query - prompt: Additional prompt to prepend to the query - param: Query parameters - knowledge_graph_inst: Knowledge graph storage - entities_vdb: Entities vector database - relationships_vdb: Relationships vector database - chunks_vdb: Document chunks vector database - text_chunks_db: Text chunks storage - global_config: Global configuration - hashing_kv: Cache storage - - Returns: - Query response or async iterator - """ - # Extract keywords - hl_keywords, ll_keywords = await get_keywords_from_query( - query=query, - query_param=param, - global_config=global_config, - hashing_kv=hashing_kv, - ) - - # Create a new string with the prompt and the keywords - keywords_str = ", ".join(ll_keywords + hl_keywords) - formatted_question = ( - f"{prompt}\n\n### Keywords\n\n{keywords_str}\n\n### Query\n\n{query}" - ) - - # Use appropriate query method based on mode - if param.mode in ["local", "global", "hybrid", "mix"]: - return await kg_query_with_keywords( - formatted_question, - knowledge_graph_inst, - entities_vdb, - relationships_vdb, - text_chunks_db, - param, - global_config, - hashing_kv=hashing_kv, - hl_keywords=hl_keywords, - ll_keywords=ll_keywords, - chunks_vdb=chunks_vdb, - ) - elif param.mode == "naive": - return await naive_query( - formatted_question, - chunks_vdb, - text_chunks_db, - param, - global_config, - hashing_kv=hashing_kv, - ) - else: - raise ValueError(f"Unknown mode {param.mode}")