Refactor conversation history handling to use LLM native message format

• Remove get_conversation_turns utility • Pass history_messages to LLM directly • Clean up prompt template formatting
2025-09-10 11:56:58 +08:00 · 2025-09-10 11:56:58 +08:00 · 2dd143c935
commit 2dd143c935
parent e078ab7103
3 changed files with 26 additions and 121 deletions
--- a/lightrag/operate.py
+++ b/lightrag/operate.py
@ -21,7 +21,6 @@ from .utils import (
    handle_cache,
    save_to_cache,
    CacheData,
-    get_conversation_turns,
    use_llm_func_with_cache,
    update_chunk_cache_list,
    remove_think_tags,
@ -2180,13 +2179,6 @@ async def kg_query(
    if context is None:
        return PROMPTS["fail_response"]

-    # Process conversation history
-    history_context = ""
-    if query_param.conversation_history:
-        history_context = get_conversation_turns(
-            query_param.conversation_history, query_param.history_turns
-        )
-
    # Build system prompt
    user_prompt = (
        query_param.user_prompt
@ -2197,7 +2189,6 @@ async def kg_query(
    sys_prompt = sys_prompt_temp.format(
        context_data=context,
        response_type=query_param.response_type,
-        history=history_context,
        user_prompt=user_prompt,
    )

@ -2213,8 +2204,9 @@ async def kg_query(
    response = await use_model_func(
        query,
        system_prompt=sys_prompt,
-        stream=query_param.stream,
+        history_messages=query_param.conversation_history,
        enable_cot=True,
+        stream=query_param.stream,
    )
    if isinstance(response, str) and len(response) > len(sys_prompt):
        response = (
@ -2327,14 +2319,7 @@ async def extract_keywords_only(

    language = global_config["addon_params"].get("language", DEFAULT_SUMMARY_LANGUAGE)

-    # 3. Process conversation history
-    # history_context = ""
-    # if param.conversation_history:
-    #     history_context = get_conversation_turns(
-    #         param.conversation_history, param.history_turns
-    #     )
-
-    # 4. Build the keyword-extraction prompt
+    # 3. Build the keyword-extraction prompt
    kw_prompt = PROMPTS["keywords_extraction"].format(
        query=text,
        examples=examples,
@ -2347,7 +2332,7 @@ async def extract_keywords_only(
        f"[extract_keywords] Sending to LLM: {len_of_prompts:,} tokens (Prompt: {len_of_prompts})"
    )

-    # 5. Call the LLM for keyword extraction
+    # 4. Call the LLM for keyword extraction
    if param.model_func:
        use_model_func = param.model_func
    else:
@ -2357,7 +2342,7 @@ async def extract_keywords_only(

    result = await use_model_func(kw_prompt, keyword_extraction=True)

-    # 6. Parse out JSON from the LLM response
+    # 5. Parse out JSON from the LLM response
    result = remove_think_tags(result)
    try:
        keywords_data = json_repair.loads(result)
@ -2372,7 +2357,7 @@ async def extract_keywords_only(
    hl_keywords = keywords_data.get("high_level_keywords", [])
    ll_keywords = keywords_data.get("low_level_keywords", [])

-    # 7. Cache only the processed keywords with cache type
+    # 6. Cache only the processed keywords with cache type
    if hl_keywords or ll_keywords:
        cache_data = {
            "high_level_keywords": hl_keywords,
@ -3171,7 +3156,6 @@ async def _build_llm_context(

        # Create sample system prompt for overhead calculation
        sample_sys_prompt = sys_prompt_template.format(
-            history="",  # History not included in context length calculation
            context_data="",  # Empty for overhead calculation
            response_type=response_type,
            user_prompt=user_prompt,
@ -3963,14 +3947,6 @@ async def naive_query(
        global_config.get("max_total_tokens", DEFAULT_MAX_TOTAL_TOKENS),
    )

-    # Calculate conversation history tokens
-    history_context = ""
-    if query_param.conversation_history:
-        history_context = get_conversation_turns(
-            query_param.conversation_history, query_param.history_turns
-        )
-    history_tokens = len(tokenizer.encode(history_context)) if history_context else 0
-
    # Calculate system prompt template tokens (excluding content_data)
    user_prompt = query_param.user_prompt if query_param.user_prompt else ""
    response_type = (
@ -3988,7 +3964,6 @@ async def naive_query(
    sample_sys_prompt = sys_prompt_template.format(
        content_data="",  # Empty for overhead calculation
        response_type=response_type,
-        history=history_context,
        user_prompt=user_prompt,
    )
    sys_prompt_template_tokens = len(tokenizer.encode(sample_sys_prompt))
@ -4004,7 +3979,7 @@ async def naive_query(
    available_chunk_tokens = max_total_tokens - used_tokens

    logger.debug(
-        f"Naive query token allocation - Total: {max_total_tokens}, History: {history_tokens}, SysPrompt: {sys_prompt_overhead}, Buffer: {buffer_tokens}, Available for chunks: {available_chunk_tokens}"
+        f"Naive query token allocation - Total: {max_total_tokens}, SysPrompt: {sys_prompt_overhead}, Buffer: {buffer_tokens}, Available for chunks: {available_chunk_tokens}"
    )

    # Process chunks using unified processing with dynamic token limit
@ -4040,12 +4015,6 @@ async def naive_query(
 ```

 """
-    # Process conversation history
-    history_context = ""
-    if query_param.conversation_history:
-        history_context = get_conversation_turns(
-            query_param.conversation_history, query_param.history_turns
-        )

    # Build system prompt
    user_prompt = (
@ -4057,7 +4026,6 @@ async def naive_query(
    sys_prompt = sys_prompt_temp.format(
        content_data=text_units_str,
        response_type=query_param.response_type,
-        history=history_context,
        user_prompt=user_prompt,
    )

@ -4072,8 +4040,9 @@ async def naive_query(
    response = await use_model_func(
        query,
        system_prompt=sys_prompt,
-        stream=query_param.stream,
+        history_messages=query_param.conversation_history,
        enable_cot=True,
+        stream=query_param.stream,
    )

    if isinstance(response, str) and len(response) > len(sys_prompt):
--- a/lightrag/prompt.py
+++ b/lightrag/prompt.py
@ -176,32 +176,30 @@ You are a helpful assistant responding to user query about Knowledge Graph and D

 Generate a concise response based on Knowledge Base and follow Response Rules, considering both current query and the conversation history if provided. Summarize all information in the provided Knowledge Base, and incorporating general knowledge relevant to the Knowledge Base. Do not include information not provided by Knowledge Base.

---Conversation History---
-{history}
-
 ---Knowledge Graph and Document Chunks---
+
 {context_data}

 ---Response Guidelines---
-**1. Content & Adherence:**
- Strictly adhere to the provided context from the Knowledge Base. Do not invent, assume, or include any information not present in the source data.
- If the answer cannot be found in the provided context, state that you do not have enough information to answer.
- Ensure the response maintains continuity with the conversation history.
+1. **Content & Adherence:**
+  - Strictly adhere to the provided context from the Knowledge Base. Do not invent, assume, or include any information not present in the source data.
+  - If the answer cannot be found in the provided context, state that you do not have enough information to answer.
+  - Ensure the response maintains continuity with the conversation history.

-**2. Formatting & Language:**
- Format the response using markdown with appropriate section headings.
- The response language must in the same language as the user's question.
- Target format and length: {response_type}
+2. **Formatting & Language:**
+  - Format the response using markdown with appropriate section headings.
+  - The response language must in the same language as the user's question.
+  - Target format and length: {response_type}

-**3. Citations / References:**
- At the end of the response, under a "References" section, each citation must clearly indicate its origin (KG or DC).
- The maximum number of citations is 5, including both KG and DC.
- Use the following formats for citations:
-  - For a Knowledge Graph Entity: `[KG] <entity_name>`
-  - For a Knowledge Graph Relationship: `[KG] <entity1_name> - <entity2_name>`
-  - For a Document Chunk: `[DC] <file_path_or_document_name>`
+3. **Citations / References:**
+  - At the end of the response, under a "References" section, each citation must clearly indicate its origin (KG or DC).
+  - The maximum number of citations is 5, including both KG and DC.
+  - Use the following formats for citations:
+    - For a Knowledge Graph Entity: `[KG] <entity_name>`
+    - For a Knowledge Graph Relationship: `[KG] <entity1_name> ~ <entity2_name>`
+    - For a Document Chunk: `[DC] <file_path_or_document_name>`

---USER CONTEXT---
+---User Context---
 - Additional user prompt: {user_prompt}

 ---Response---
--- a/lightrag/utils.py
+++ b/lightrag/utils.py
@ -1144,68 +1144,6 @@ def exists_func(obj, func_name: str) -> bool:
        return False


-def get_conversation_turns(
-    conversation_history: list[dict[str, Any]], num_turns: int
-) -> str:
-    """
-    Process conversation history to get the specified number of complete turns.
-
-    Args:
-        conversation_history: List of conversation messages in chronological order
-        num_turns: Number of complete turns to include
-
-    Returns:
-        Formatted string of the conversation history
-    """
-    # Check if num_turns is valid
-    if num_turns <= 0:
-        return ""
-
-    # Group messages into turns
-    turns: list[list[dict[str, Any]]] = []
-    messages: list[dict[str, Any]] = []
-
-    # First, filter out keyword extraction messages
-    for msg in conversation_history:
-        if msg["role"] == "assistant" and (
-            msg["content"].startswith('{ "high_level_keywords"')
-            or msg["content"].startswith("{'high_level_keywords'")
-        ):
-            continue
-        messages.append(msg)
-
-    # Then process messages in chronological order
-    i = 0
-    while i < len(messages) - 1:
-        msg1 = messages[i]
-        msg2 = messages[i + 1]
-
-        # Check if we have a user-assistant or assistant-user pair
-        if (msg1["role"] == "user" and msg2["role"] == "assistant") or (
-            msg1["role"] == "assistant" and msg2["role"] == "user"
-        ):
-            # Always put user message first in the turn
-            if msg1["role"] == "assistant":
-                turn = [msg2, msg1]  # user, assistant
-            else:
-                turn = [msg1, msg2]  # user, assistant
-            turns.append(turn)
-        i += 2
-
-    # Keep only the most recent num_turns
-    if len(turns) > num_turns:
-        turns = turns[-num_turns:]
-
-    # Format the turns into a string
-    formatted_turns: list[str] = []
-    for turn in turns:
-        formatted_turns.extend(
-            [f"user: {turn[0]['content']}", f"assistant: {turn[1]['content']}"]
-        )
-
-    return "\n".join(formatted_turns)
-
-
 def always_get_an_event_loop() -> asyncio.AbstractEventLoop:
    """
    Ensure that there is always an event loop available.