Remove deprecated keyword extraction query methods
- Delete query_with_keywords function - Remove kg_query_with_keywords helper - Drop separate keyword extraction methods
This commit is contained in:
parent
f5ac6a9f4b
commit
cf064579ce
2 changed files with 0 additions and 256 deletions
|
|
@ -72,7 +72,6 @@ from .operate import (
|
|||
merge_nodes_and_edges,
|
||||
kg_query,
|
||||
naive_query,
|
||||
query_with_keywords,
|
||||
_rebuild_knowledge_from_chunks,
|
||||
)
|
||||
from .constants import GRAPH_FIELD_SEP
|
||||
|
|
@ -1816,8 +1815,6 @@ class LightRAG:
|
|||
"""
|
||||
# If a custom model is provided in param, temporarily update global config
|
||||
global_config = asdict(self)
|
||||
# Save original query for vector search
|
||||
param.original_query = query
|
||||
|
||||
if param.mode in ["local", "global", "hybrid", "mix"]:
|
||||
response = await kg_query(
|
||||
|
|
@ -1859,59 +1856,6 @@ class LightRAG:
|
|||
await self._query_done()
|
||||
return response
|
||||
|
||||
# TODO: Deprecated, use user_prompt in QueryParam instead
|
||||
def query_with_separate_keyword_extraction(
|
||||
self, query: str, prompt: str, param: QueryParam = QueryParam()
|
||||
):
|
||||
"""
|
||||
Query with separate keyword extraction step.
|
||||
|
||||
This method extracts keywords from the query first, then uses them for the query.
|
||||
|
||||
Args:
|
||||
query: User query
|
||||
prompt: Additional prompt for the query
|
||||
param: Query parameters
|
||||
|
||||
Returns:
|
||||
Query response
|
||||
"""
|
||||
loop = always_get_an_event_loop()
|
||||
return loop.run_until_complete(
|
||||
self.aquery_with_separate_keyword_extraction(query, prompt, param)
|
||||
)
|
||||
|
||||
# TODO: Deprecated, use user_prompt in QueryParam instead
|
||||
async def aquery_with_separate_keyword_extraction(
|
||||
self, query: str, prompt: str, param: QueryParam = QueryParam()
|
||||
) -> str | AsyncIterator[str]:
|
||||
"""
|
||||
Async version of query_with_separate_keyword_extraction.
|
||||
|
||||
Args:
|
||||
query: User query
|
||||
prompt: Additional prompt for the query
|
||||
param: Query parameters
|
||||
|
||||
Returns:
|
||||
Query response or async iterator
|
||||
"""
|
||||
response = await query_with_keywords(
|
||||
query=query,
|
||||
prompt=prompt,
|
||||
param=param,
|
||||
knowledge_graph_inst=self.chunk_entity_relation_graph,
|
||||
entities_vdb=self.entities_vdb,
|
||||
relationships_vdb=self.relationships_vdb,
|
||||
chunks_vdb=self.chunks_vdb,
|
||||
text_chunks_db=self.text_chunks,
|
||||
global_config=asdict(self),
|
||||
hashing_kv=self.llm_response_cache,
|
||||
)
|
||||
|
||||
await self._query_done()
|
||||
return response
|
||||
|
||||
async def _query_done(self):
|
||||
await self.llm_response_cache.index_done_callback()
|
||||
|
||||
|
|
|
|||
|
|
@ -3187,203 +3187,3 @@ async def naive_query(
|
|||
)
|
||||
|
||||
return response
|
||||
|
||||
|
||||
# TODO: Deprecated, use user_prompt in QueryParam instead
|
||||
async def kg_query_with_keywords(
|
||||
query: str,
|
||||
knowledge_graph_inst: BaseGraphStorage,
|
||||
entities_vdb: BaseVectorStorage,
|
||||
relationships_vdb: BaseVectorStorage,
|
||||
text_chunks_db: BaseKVStorage,
|
||||
query_param: QueryParam,
|
||||
global_config: dict[str, str],
|
||||
hashing_kv: BaseKVStorage | None = None,
|
||||
ll_keywords: list[str] = [],
|
||||
hl_keywords: list[str] = [],
|
||||
chunks_vdb: BaseVectorStorage | None = None,
|
||||
) -> str | AsyncIterator[str]:
|
||||
"""
|
||||
Refactored kg_query that does NOT extract keywords by itself.
|
||||
It expects hl_keywords and ll_keywords to be set in query_param, or defaults to empty.
|
||||
Then it uses those to build context and produce a final LLM response.
|
||||
"""
|
||||
if query_param.model_func:
|
||||
use_model_func = query_param.model_func
|
||||
else:
|
||||
use_model_func = global_config["llm_model_func"]
|
||||
# Apply higher priority (5) to query relation LLM function
|
||||
use_model_func = partial(use_model_func, _priority=5)
|
||||
|
||||
args_hash = compute_args_hash(query_param.mode, query)
|
||||
cached_response, quantized, min_val, max_val = await handle_cache(
|
||||
hashing_kv, args_hash, query, query_param.mode, cache_type="query"
|
||||
)
|
||||
if cached_response is not None:
|
||||
return cached_response
|
||||
|
||||
# If neither has any keywords, you could handle that logic here.
|
||||
if not hl_keywords and not ll_keywords:
|
||||
logger.warning(
|
||||
"No keywords found in query_param. Could default to global mode or fail."
|
||||
)
|
||||
return PROMPTS["fail_response"]
|
||||
if not ll_keywords and query_param.mode in ["local", "hybrid"]:
|
||||
logger.warning("low_level_keywords is empty, switching to global mode.")
|
||||
query_param.mode = "global"
|
||||
if not hl_keywords and query_param.mode in ["global", "hybrid"]:
|
||||
logger.warning("high_level_keywords is empty, switching to local mode.")
|
||||
query_param.mode = "local"
|
||||
|
||||
ll_keywords_str = ", ".join(ll_keywords) if ll_keywords else ""
|
||||
hl_keywords_str = ", ".join(hl_keywords) if hl_keywords else ""
|
||||
|
||||
context = await _build_query_context(
|
||||
query,
|
||||
ll_keywords_str,
|
||||
hl_keywords_str,
|
||||
knowledge_graph_inst,
|
||||
entities_vdb,
|
||||
relationships_vdb,
|
||||
text_chunks_db,
|
||||
query_param,
|
||||
chunks_vdb=chunks_vdb,
|
||||
)
|
||||
if not context:
|
||||
return PROMPTS["fail_response"]
|
||||
|
||||
if query_param.only_need_context:
|
||||
return context
|
||||
|
||||
# Process conversation history
|
||||
history_context = ""
|
||||
if query_param.conversation_history:
|
||||
history_context = get_conversation_turns(
|
||||
query_param.conversation_history, query_param.history_turns
|
||||
)
|
||||
|
||||
sys_prompt_temp = PROMPTS["rag_response"]
|
||||
sys_prompt = sys_prompt_temp.format(
|
||||
context_data=context,
|
||||
response_type=query_param.response_type,
|
||||
history=history_context,
|
||||
)
|
||||
|
||||
if query_param.only_need_prompt:
|
||||
return sys_prompt
|
||||
|
||||
tokenizer: Tokenizer = global_config["tokenizer"]
|
||||
len_of_prompts = len(tokenizer.encode(query + sys_prompt))
|
||||
logger.debug(
|
||||
f"[kg_query_with_keywords] Sending to LLM: {len_of_prompts:,} tokens (Query: {len(tokenizer.encode(query))}, System: {len(tokenizer.encode(sys_prompt))})"
|
||||
)
|
||||
|
||||
# 6. Generate response
|
||||
response = await use_model_func(
|
||||
query,
|
||||
system_prompt=sys_prompt,
|
||||
stream=query_param.stream,
|
||||
)
|
||||
|
||||
# Clean up response content
|
||||
if isinstance(response, str) and len(response) > len(sys_prompt):
|
||||
response = (
|
||||
response.replace(sys_prompt, "")
|
||||
.replace("user", "")
|
||||
.replace("model", "")
|
||||
.replace(query, "")
|
||||
.replace("<system>", "")
|
||||
.replace("</system>", "")
|
||||
.strip()
|
||||
)
|
||||
|
||||
if hashing_kv.global_config.get("enable_llm_cache"):
|
||||
await save_to_cache(
|
||||
hashing_kv,
|
||||
CacheData(
|
||||
args_hash=args_hash,
|
||||
content=response,
|
||||
prompt=query,
|
||||
mode=query_param.mode,
|
||||
cache_type="query",
|
||||
),
|
||||
)
|
||||
|
||||
return response
|
||||
|
||||
|
||||
# TODO: Deprecated, use user_prompt in QueryParam instead
|
||||
async def query_with_keywords(
|
||||
query: str,
|
||||
prompt: str,
|
||||
param: QueryParam,
|
||||
knowledge_graph_inst: BaseGraphStorage,
|
||||
entities_vdb: BaseVectorStorage,
|
||||
relationships_vdb: BaseVectorStorage,
|
||||
chunks_vdb: BaseVectorStorage,
|
||||
text_chunks_db: BaseKVStorage,
|
||||
global_config: dict[str, str],
|
||||
hashing_kv: BaseKVStorage | None = None,
|
||||
) -> str | AsyncIterator[str]:
|
||||
"""
|
||||
Extract keywords from the query and then use them for retrieving information.
|
||||
|
||||
1. Extracts high-level and low-level keywords from the query
|
||||
2. Formats the query with the extracted keywords and prompt
|
||||
3. Uses the appropriate query method based on param.mode
|
||||
|
||||
Args:
|
||||
query: The user's query
|
||||
prompt: Additional prompt to prepend to the query
|
||||
param: Query parameters
|
||||
knowledge_graph_inst: Knowledge graph storage
|
||||
entities_vdb: Entities vector database
|
||||
relationships_vdb: Relationships vector database
|
||||
chunks_vdb: Document chunks vector database
|
||||
text_chunks_db: Text chunks storage
|
||||
global_config: Global configuration
|
||||
hashing_kv: Cache storage
|
||||
|
||||
Returns:
|
||||
Query response or async iterator
|
||||
"""
|
||||
# Extract keywords
|
||||
hl_keywords, ll_keywords = await get_keywords_from_query(
|
||||
query=query,
|
||||
query_param=param,
|
||||
global_config=global_config,
|
||||
hashing_kv=hashing_kv,
|
||||
)
|
||||
|
||||
# Create a new string with the prompt and the keywords
|
||||
keywords_str = ", ".join(ll_keywords + hl_keywords)
|
||||
formatted_question = (
|
||||
f"{prompt}\n\n### Keywords\n\n{keywords_str}\n\n### Query\n\n{query}"
|
||||
)
|
||||
|
||||
# Use appropriate query method based on mode
|
||||
if param.mode in ["local", "global", "hybrid", "mix"]:
|
||||
return await kg_query_with_keywords(
|
||||
formatted_question,
|
||||
knowledge_graph_inst,
|
||||
entities_vdb,
|
||||
relationships_vdb,
|
||||
text_chunks_db,
|
||||
param,
|
||||
global_config,
|
||||
hashing_kv=hashing_kv,
|
||||
hl_keywords=hl_keywords,
|
||||
ll_keywords=ll_keywords,
|
||||
chunks_vdb=chunks_vdb,
|
||||
)
|
||||
elif param.mode == "naive":
|
||||
return await naive_query(
|
||||
formatted_question,
|
||||
chunks_vdb,
|
||||
text_chunks_db,
|
||||
param,
|
||||
global_config,
|
||||
hashing_kv=hashing_kv,
|
||||
)
|
||||
else:
|
||||
raise ValueError(f"Unknown mode {param.mode}")
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue