diff --git a/lightrag/operate.py b/lightrag/operate.py index 808c1c15..c4f04da7 100644 --- a/lightrag/operate.py +++ b/lightrag/operate.py @@ -21,7 +21,6 @@ from .utils import ( handle_cache, save_to_cache, CacheData, - get_conversation_turns, use_llm_func_with_cache, update_chunk_cache_list, remove_think_tags, @@ -2180,13 +2179,6 @@ async def kg_query( if context is None: return PROMPTS["fail_response"] - # Process conversation history - history_context = "" - if query_param.conversation_history: - history_context = get_conversation_turns( - query_param.conversation_history, query_param.history_turns - ) - # Build system prompt user_prompt = ( query_param.user_prompt @@ -2197,7 +2189,6 @@ async def kg_query( sys_prompt = sys_prompt_temp.format( context_data=context, response_type=query_param.response_type, - history=history_context, user_prompt=user_prompt, ) @@ -2213,8 +2204,9 @@ async def kg_query( response = await use_model_func( query, system_prompt=sys_prompt, - stream=query_param.stream, + history_messages=query_param.conversation_history, enable_cot=True, + stream=query_param.stream, ) if isinstance(response, str) and len(response) > len(sys_prompt): response = ( @@ -2327,14 +2319,7 @@ async def extract_keywords_only( language = global_config["addon_params"].get("language", DEFAULT_SUMMARY_LANGUAGE) - # 3. Process conversation history - # history_context = "" - # if param.conversation_history: - # history_context = get_conversation_turns( - # param.conversation_history, param.history_turns - # ) - - # 4. Build the keyword-extraction prompt + # 3. Build the keyword-extraction prompt kw_prompt = PROMPTS["keywords_extraction"].format( query=text, examples=examples, @@ -2347,7 +2332,7 @@ async def extract_keywords_only( f"[extract_keywords] Sending to LLM: {len_of_prompts:,} tokens (Prompt: {len_of_prompts})" ) - # 5. Call the LLM for keyword extraction + # 4. Call the LLM for keyword extraction if param.model_func: use_model_func = param.model_func else: @@ -2357,7 +2342,7 @@ async def extract_keywords_only( result = await use_model_func(kw_prompt, keyword_extraction=True) - # 6. Parse out JSON from the LLM response + # 5. Parse out JSON from the LLM response result = remove_think_tags(result) try: keywords_data = json_repair.loads(result) @@ -2372,7 +2357,7 @@ async def extract_keywords_only( hl_keywords = keywords_data.get("high_level_keywords", []) ll_keywords = keywords_data.get("low_level_keywords", []) - # 7. Cache only the processed keywords with cache type + # 6. Cache only the processed keywords with cache type if hl_keywords or ll_keywords: cache_data = { "high_level_keywords": hl_keywords, @@ -3171,7 +3156,6 @@ async def _build_llm_context( # Create sample system prompt for overhead calculation sample_sys_prompt = sys_prompt_template.format( - history="", # History not included in context length calculation context_data="", # Empty for overhead calculation response_type=response_type, user_prompt=user_prompt, @@ -3963,14 +3947,6 @@ async def naive_query( global_config.get("max_total_tokens", DEFAULT_MAX_TOTAL_TOKENS), ) - # Calculate conversation history tokens - history_context = "" - if query_param.conversation_history: - history_context = get_conversation_turns( - query_param.conversation_history, query_param.history_turns - ) - history_tokens = len(tokenizer.encode(history_context)) if history_context else 0 - # Calculate system prompt template tokens (excluding content_data) user_prompt = query_param.user_prompt if query_param.user_prompt else "" response_type = ( @@ -3988,7 +3964,6 @@ async def naive_query( sample_sys_prompt = sys_prompt_template.format( content_data="", # Empty for overhead calculation response_type=response_type, - history=history_context, user_prompt=user_prompt, ) sys_prompt_template_tokens = len(tokenizer.encode(sample_sys_prompt)) @@ -4004,7 +3979,7 @@ async def naive_query( available_chunk_tokens = max_total_tokens - used_tokens logger.debug( - f"Naive query token allocation - Total: {max_total_tokens}, History: {history_tokens}, SysPrompt: {sys_prompt_overhead}, Buffer: {buffer_tokens}, Available for chunks: {available_chunk_tokens}" + f"Naive query token allocation - Total: {max_total_tokens}, SysPrompt: {sys_prompt_overhead}, Buffer: {buffer_tokens}, Available for chunks: {available_chunk_tokens}" ) # Process chunks using unified processing with dynamic token limit @@ -4040,12 +4015,6 @@ async def naive_query( ``` """ - # Process conversation history - history_context = "" - if query_param.conversation_history: - history_context = get_conversation_turns( - query_param.conversation_history, query_param.history_turns - ) # Build system prompt user_prompt = ( @@ -4057,7 +4026,6 @@ async def naive_query( sys_prompt = sys_prompt_temp.format( content_data=text_units_str, response_type=query_param.response_type, - history=history_context, user_prompt=user_prompt, ) @@ -4072,8 +4040,9 @@ async def naive_query( response = await use_model_func( query, system_prompt=sys_prompt, - stream=query_param.stream, + history_messages=query_param.conversation_history, enable_cot=True, + stream=query_param.stream, ) if isinstance(response, str) and len(response) > len(sys_prompt): diff --git a/lightrag/prompt.py b/lightrag/prompt.py index a6347edb..94896fb5 100644 --- a/lightrag/prompt.py +++ b/lightrag/prompt.py @@ -176,32 +176,30 @@ You are a helpful assistant responding to user query about Knowledge Graph and D Generate a concise response based on Knowledge Base and follow Response Rules, considering both current query and the conversation history if provided. Summarize all information in the provided Knowledge Base, and incorporating general knowledge relevant to the Knowledge Base. Do not include information not provided by Knowledge Base. ----Conversation History--- -{history} - ---Knowledge Graph and Document Chunks--- + {context_data} ---Response Guidelines--- -**1. Content & Adherence:** -- Strictly adhere to the provided context from the Knowledge Base. Do not invent, assume, or include any information not present in the source data. -- If the answer cannot be found in the provided context, state that you do not have enough information to answer. -- Ensure the response maintains continuity with the conversation history. +1. **Content & Adherence:** + - Strictly adhere to the provided context from the Knowledge Base. Do not invent, assume, or include any information not present in the source data. + - If the answer cannot be found in the provided context, state that you do not have enough information to answer. + - Ensure the response maintains continuity with the conversation history. -**2. Formatting & Language:** -- Format the response using markdown with appropriate section headings. -- The response language must in the same language as the user's question. -- Target format and length: {response_type} +2. **Formatting & Language:** + - Format the response using markdown with appropriate section headings. + - The response language must in the same language as the user's question. + - Target format and length: {response_type} -**3. Citations / References:** -- At the end of the response, under a "References" section, each citation must clearly indicate its origin (KG or DC). -- The maximum number of citations is 5, including both KG and DC. -- Use the following formats for citations: - - For a Knowledge Graph Entity: `[KG] ` - - For a Knowledge Graph Relationship: `[KG] - ` - - For a Document Chunk: `[DC] ` +3. **Citations / References:** + - At the end of the response, under a "References" section, each citation must clearly indicate its origin (KG or DC). + - The maximum number of citations is 5, including both KG and DC. + - Use the following formats for citations: + - For a Knowledge Graph Entity: `[KG] ` + - For a Knowledge Graph Relationship: `[KG] ~ ` + - For a Document Chunk: `[DC] ` ----USER CONTEXT--- +---User Context--- - Additional user prompt: {user_prompt} ---Response--- diff --git a/lightrag/utils.py b/lightrag/utils.py index 0a5234ce..0fc0fb03 100644 --- a/lightrag/utils.py +++ b/lightrag/utils.py @@ -1144,68 +1144,6 @@ def exists_func(obj, func_name: str) -> bool: return False -def get_conversation_turns( - conversation_history: list[dict[str, Any]], num_turns: int -) -> str: - """ - Process conversation history to get the specified number of complete turns. - - Args: - conversation_history: List of conversation messages in chronological order - num_turns: Number of complete turns to include - - Returns: - Formatted string of the conversation history - """ - # Check if num_turns is valid - if num_turns <= 0: - return "" - - # Group messages into turns - turns: list[list[dict[str, Any]]] = [] - messages: list[dict[str, Any]] = [] - - # First, filter out keyword extraction messages - for msg in conversation_history: - if msg["role"] == "assistant" and ( - msg["content"].startswith('{ "high_level_keywords"') - or msg["content"].startswith("{'high_level_keywords'") - ): - continue - messages.append(msg) - - # Then process messages in chronological order - i = 0 - while i < len(messages) - 1: - msg1 = messages[i] - msg2 = messages[i + 1] - - # Check if we have a user-assistant or assistant-user pair - if (msg1["role"] == "user" and msg2["role"] == "assistant") or ( - msg1["role"] == "assistant" and msg2["role"] == "user" - ): - # Always put user message first in the turn - if msg1["role"] == "assistant": - turn = [msg2, msg1] # user, assistant - else: - turn = [msg1, msg2] # user, assistant - turns.append(turn) - i += 2 - - # Keep only the most recent num_turns - if len(turns) > num_turns: - turns = turns[-num_turns:] - - # Format the turns into a string - formatted_turns: list[str] = [] - for turn in turns: - formatted_turns.extend( - [f"user: {turn[0]['content']}", f"assistant: {turn[1]['content']}"] - ) - - return "\n".join(formatted_turns) - - def always_get_an_event_loop() -> asyncio.AbstractEventLoop: """ Ensure that there is always an event loop available.