Refactor prompts and context building for better maintainability

- Extract context templates to PROMPTS - Unify token calculation logic - Simplify user_prompt formatting - Reduce code duplication - Improve prompt structure consistency
2025-09-26 12:39:06 +08:00 · 2025-09-26 12:39:06 +08:00 · cbdc4c4bdf
commit cbdc4c4bdf
parent 1a0dc94f55
2 changed files with 165 additions and 190 deletions
--- a/lightrag/operate.py
+++ b/lightrag/operate.py
@ -2389,14 +2389,19 @@ async def kg_query(
            content=context_result.context, raw_data=context_result.raw_data
        )
    user_prompt = f"\n\n{query_param.user_prompt}" if query_param.user_prompt else "n/a"
    response_type = (
        query_param.response_type
        if query_param.response_type
        else "Multiple Paragraphs"
    )
    # Build system prompt
    sys_prompt_temp = system_prompt if system_prompt else PROMPTS["rag_response"]
    sys_prompt = sys_prompt_temp.format(
-        user_prompt=f"```\n{query_param.user_prompt}\n```"
+        response_type=response_type,
-        if query_param.user_prompt
+        user_prompt=user_prompt,
        else "n/a",
        context_data=context_result.context,
        response_type=query_param.response_type,
    )
    user_query = query
@ -3152,108 +3157,78 @@ async def _build_llm_context(
        global_config.get("max_total_tokens", DEFAULT_MAX_TOTAL_TOKENS),
    )
    # Get the system prompt template from PROMPTS or global_config
    sys_prompt_template = global_config.get(
        "system_prompt_template", PROMPTS["rag_response"]
    )
    kg_context_template = PROMPTS["kg_query_context"]
    user_prompt = query_param.user_prompt if query_param.user_prompt else ""
    response_type = (
        query_param.response_type
        if query_param.response_type
        else "Multiple Paragraphs"
    )
    entities_str = "\n".join(
        json.dumps(entity, ensure_ascii=False) for entity in entities_context
    )
    relations_str = "\n".join(
        json.dumps(relation, ensure_ascii=False) for relation in relations_context
    )
    # Calculate preliminary kg context tokens
    pre_kg_context = kg_context_template.format(
        entities_str=entities_str,
        relations_str=relations_str,
        text_chunks_str="",
        reference_list_str="",
    )
    kg_context_tokens = len(tokenizer.encode(pre_kg_context))
    # Calculate preliminary system prompt tokens
    pre_sys_prompt = sys_prompt_template.format(
        context_data="",  # Empty for overhead calculation
        response_type=response_type,
        user_prompt=user_prompt,
    )
    sys_prompt_tokens = len(tokenizer.encode(pre_sys_prompt))
    # Calculate available tokens for text chunks
    query_tokens = len(tokenizer.encode(query))
    buffer_tokens = 200  # reserved for reference list and safety buffer
    available_chunk_tokens = max_total_tokens - (
        sys_prompt_tokens + kg_context_tokens + query_tokens + buffer_tokens
    )
    logger.debug(
        f"Token allocation - Total: {max_total_tokens}, SysPrompt: {sys_prompt_tokens}, Query: {query_tokens}, KG: {kg_context_tokens}, Buffer: {buffer_tokens}, Available for chunks: {available_chunk_tokens}"
    )
    # Apply token truncation to chunks using the dynamic limit
    truncated_chunks = await process_chunks_unified(
        query=query,
        unique_chunks=merged_chunks,
        query_param=query_param,
        global_config=global_config,
        source_type=query_param.mode,
        chunk_token_limit=available_chunk_tokens,  # Pass dynamic limit
    )
    # Generate reference list from truncated chunks using the new common function
    reference_list, truncated_chunks = generate_reference_list_from_chunks(
        truncated_chunks
    )
    # Rebuild text_units_context with truncated chunks
    # The actual tokens may be slightly less than available_chunk_tokens due to deduplication logic
    text_units_context = []
-    truncated_chunks = []
+    for i, chunk in enumerate(truncated_chunks):
-
+        text_units_context.append(
-    if merged_chunks:
+            {
-        # Calculate dynamic token limit for text chunks
+                "reference_id": chunk["reference_id"],
-        entities_str = "\n".join(
+                "content": chunk["content"],
-            json.dumps(entity, ensure_ascii=False) for entity in entities_context
+            }
        )
        relations_str = "\n".join(
            json.dumps(relation, ensure_ascii=False) for relation in relations_context
        )
        # Calculate base context tokens (entities + relations + template)
        kg_context_template = """-----Entities(KG)-----
 ```json
 {entities_str}
 ```
 -----Relationships(KG)-----
 ```json
 {relations_str}
 ```
 -----Document Chunks(DC)-----
 ```json
 ```
 -----Refrence Document List-----
 The reference documents list in Document Chunks(DC) is as follows (reference_id in square brackets):
 """
        kg_context = kg_context_template.format(
            entities_str=entities_str, relations_str=relations_str
        )
        kg_context_tokens = len(tokenizer.encode(kg_context))
        # Calculate system prompt template overhead
        user_prompt = query_param.user_prompt if query_param.user_prompt else ""
        response_type = (
            query_param.response_type
            if query_param.response_type
            else "Multiple Paragraphs"
        )
        # Get the system prompt template from PROMPTS or global_config
        sys_prompt_template = global_config.get(
            "system_prompt_template", PROMPTS["rag_response"]
        )
        # Create sample system prompt for overhead calculation
        sample_sys_prompt = sys_prompt_template.format(
            context_data="",  # Empty for overhead calculation
            response_type=response_type,
            user_prompt=user_prompt,
        )
        sys_prompt_template_tokens = len(tokenizer.encode(sample_sys_prompt))
        # Total system prompt overhead = template + query tokens
        query_tokens = len(tokenizer.encode(query))
        sys_prompt_overhead = sys_prompt_template_tokens + query_tokens
        buffer_tokens = 100  # Safety buffer as requested
        # Calculate available tokens for text chunks
        used_tokens = kg_context_tokens + sys_prompt_overhead + buffer_tokens
        available_chunk_tokens = max_total_tokens - used_tokens
        logger.debug(
            f"Token allocation - Total: {max_total_tokens}, SysPrompt: {sys_prompt_overhead}, KG: {kg_context_tokens}, Buffer: {buffer_tokens}, Available for chunks: {available_chunk_tokens}"
        )
        # Apply token truncation to chunks using the dynamic limit
        truncated_chunks = await process_chunks_unified(
            query=query,
            unique_chunks=merged_chunks,
            query_param=query_param,
            global_config=global_config,
            source_type=query_param.mode,
            chunk_token_limit=available_chunk_tokens,  # Pass dynamic limit
        )
        # Generate reference list from truncated chunks using the new common function
        reference_list, truncated_chunks = generate_reference_list_from_chunks(
            truncated_chunks
        )
        # Rebuild text_units_context with truncated chunks
        # The actual tokens may be slightly less than available_chunk_tokens due to deduplication logic
        for i, chunk in enumerate(truncated_chunks):
            text_units_context.append(
                {
                    "reference_id": chunk["reference_id"],
                    "content": chunk["content"],
                }
            )
        logger.debug(
            f"Final chunk processing: {len(merged_chunks)} -> {len(text_units_context)} (chunk available tokens: {available_chunk_tokens})"
        )
    logger.info(
@ -3292,12 +3267,6 @@ The reference documents list in Document Chunks(DC) is as follows (reference_id
        if chunk_tracking_log:
            logger.info(f"chunks S+F/O: {' '.join(chunk_tracking_log)}")
    entities_str = "\n".join(
        json.dumps(entity, ensure_ascii=False) for entity in entities_context
    )
    relations_str = "\n".join(
        json.dumps(relation, ensure_ascii=False) for relation in relations_context
    )
    text_units_str = "\n".join(
        json.dumps(text_unit, ensure_ascii=False) for text_unit in text_units_context
    )
@ -3307,31 +3276,12 @@ The reference documents list in Document Chunks(DC) is as follows (reference_id
        if ref["reference_id"]
    )
-    result = f"""-----Entities(KG)-----
+    result = kg_context_template.format(
-
+        entities_str=entities_str,
-```json
+        relations_str=relations_str,
-{entities_str}
+        text_chunks_str=text_units_str,
-```
+        reference_list_str=reference_list_str,
-
+    )
 -----Relationships(KG)-----
 ```json
 {relations_str}
 ```
 -----Document Chunks(DC)-----
 ```json
 {text_units_str}
 ```
 -----Refrence Document List-----
 Document Chunks (DC) reference documents : (Each entry begins with [reference_id])
 {reference_list_str}
 """
    # Always return both context and complete data structure (unified approach)
    logger.debug(
@ -3416,11 +3366,7 @@ async def _build_query_context(
        query_embedding=search_result["query_embedding"],
    )
-    if (
+    if not merged_chunks:
        not merged_chunks
        and not truncation_result["entities_context"]
        and not truncation_result["relations_context"]
    ):
        return None
    # Stage 4: Build final LLM context with dynamic token processing
@ -4156,7 +4102,7 @@ async def naive_query(
    )
    # Calculate system prompt template tokens (excluding content_data)
-    user_prompt = query_param.user_prompt if query_param.user_prompt else ""
+    user_prompt = f"\n\n{query_param.user_prompt}" if query_param.user_prompt else "n/a"
    response_type = (
        query_param.response_type
        if query_param.response_type
@ -4168,26 +4114,23 @@ async def naive_query(
        system_prompt if system_prompt else PROMPTS["naive_rag_response"]
    )
-    # Create a sample system prompt with empty content_data to calculate overhead
+    # Create a preliminary system prompt with empty content_data to calculate overhead
-    sample_sys_prompt = sys_prompt_template.format(
+    pre_sys_prompt = sys_prompt_template.format(
        content_data="",  # Empty for overhead calculation
        response_type=response_type,
        user_prompt=user_prompt,
        content_data="",  # Empty for overhead calculation
    )
    sys_prompt_template_tokens = len(tokenizer.encode(sample_sys_prompt))
    # Total system prompt overhead = template + query tokens
    query_tokens = len(tokenizer.encode(query))
    sys_prompt_overhead = sys_prompt_template_tokens + query_tokens
    buffer_tokens = 100  # Safety buffer
    # Calculate available tokens for chunks
-    used_tokens = sys_prompt_overhead + buffer_tokens
+    sys_prompt_tokens = len(tokenizer.encode(pre_sys_prompt))
-    available_chunk_tokens = max_total_tokens - used_tokens
+    query_tokens = len(tokenizer.encode(query))
    buffer_tokens = 200  # reserved for reference list and safety buffer
    available_chunk_tokens = max_total_tokens - (
        sys_prompt_tokens + query_tokens + buffer_tokens
    )
    logger.debug(
-        f"Naive query token allocation - Total: {max_total_tokens}, SysPrompt: {sys_prompt_overhead}, Buffer: {buffer_tokens}, Available for chunks: {available_chunk_tokens}"
+        f"Naive query token allocation - Total: {max_total_tokens}, SysPrompt: {sys_prompt_tokens}, Query: {query_tokens}, Buffer: {buffer_tokens}, Available for chunks: {available_chunk_tokens}"
    )
    # Process chunks using unified processing with dynamic token limit
@ -4247,29 +4190,19 @@ async def naive_query(
        if ref["reference_id"]
    )
-    context_content = f"""
+    naive_context_template = PROMPTS["naive_query_context"]
---Document Chunks(DC)---
+    context_content = naive_context_template.format(
-
+        text_chunks_str=text_units_str,
-```json
+        reference_list_str=reference_list_str,
-{text_units_str}
+    )
 ```
 -----Refrence Document List-----
 {reference_list_str}
 """
    if query_param.only_need_context and not query_param.only_need_prompt:
        return QueryResult(content=context_content, raw_data=raw_data)
-    sys_prompt_temp = system_prompt if system_prompt else PROMPTS["naive_rag_response"]
+    sys_prompt = sys_prompt_template.format(
    sys_prompt = sys_prompt_temp.format(
        user_prompt=f"```\n{query_param.user_prompt}\n```"
        if query_param.user_prompt
        else "n/a",
        content_data=text_units_str,
        response_type=query_param.response_type,
        user_prompt=user_prompt,
        content_data=context_content,
    )
    user_query = query
--- a/lightrag/prompt.py
+++ b/lightrag/prompt.py
@ -212,15 +212,18 @@ PROMPTS["fail_response"] = (
 )
 PROMPTS["rag_response"] = """---Role---
 You are an expert AI assistant specializing in synthesizing information from a provided knowledge base. Your primary function is to answer user queries accurately by ONLY using the information within the provided `Source Data`.
 ---Goal---
 Generate a comprehensive, well-structured answer to the user query.
 The answer must integrate relevant facts from the Knowledge Graph and Document Chunks found in the `Source Data`.
 Consider the conversation history if provided to maintain conversational flow and avoid repeating information.
 ---Instructions---
-1. **Step-by-Step Instruction:**
+
 **1. Step-by-Step Instruction:**
  - Carefully determine the user's query intent in the context of the conversation history to fully understand the user's information need.
  - Scrutinize the `Source Data`(both Knowledge Graph and Document Chunks). Identify and extract all pieces of information that are directly relevant to answering the user query.
  - Weave the extracted facts into a coherent and logical response. Your own knowledge must ONLY be used to formulate fluent sentences and connect ideas, NOT to introduce any external information.
@ -228,24 +231,24 @@ Consider the conversation history if provided to maintain conversational flow an
  - Generate a reference section at the end of the response. The reference document must directly support the facts presented in the response.
  - Do not generate anything after the reference section.
-2. **Content & Grounding:**
+**2. Content & Grounding:**
  - Strictly adhere to the provided context from the `Source Data`; DO NOT invent, assume, or infer any information not explicitly stated.
  - If the answer cannot be found in the `Source Data`, state that you do not have enough information to answer. Do not attempt to guess.
-3. **Formatting & Language:**
+**3. Formatting & Language:**
  - The response MUST be in the same language as the user query.
  - Use Markdown for clear formatting (e.g., headings, bold, lists).
  - The response should be presented in {response_type}.
-4. **References Section Format:**
+**4. References Section Format:**
  - The References section should be under heading: `### References`
-  - Reference list entries should adhere to the format: `* [n] Document Title`. Do not include a caret (`^`) immediately after the opening square bracket (`[`).
+  - Reference list entries should adhere to the format: `* [n] Document Title`. Do not include a caret (`^`) after opening square bracket (`[`).
  - The Document Title in the citation must retain its original language.
  - Output each citation on an individual line
  - Provide maximum of 5 most relevant citations.
  - Do not generate footnotes section or any text after the references.
-5. **Reference Section Example:**
+**5. Reference Section Example:**
 ```
 ### References
 * [1] Document Title One
@ -253,26 +256,26 @@ Consider the conversation history if provided to maintain conversational flow an
 * [3] Document Title Three
 ```
-6. **Additional Instructions**: {user_prompt}
+**6. Additional Instructions**: {user_prompt}
 ---Source Data---
 Knowledge Graph and Document Chunks:
 {context_data}
 """
 PROMPTS["naive_rag_response"] = """---Role---
 You are an expert AI assistant specializing in synthesizing information from a provided knowledge base. Your primary function is to answer user queries accurately by ONLY using the information within the provided `Source Data`.
 ---Goal---
 Generate a comprehensive, well-structured answer to the user query.
 The answer must integrate relevant facts from the Document Chunks found in the `Source Data`.
 Consider the conversation history if provided to maintain conversational flow and avoid repeating information.
 ---Instructions---
-1. **Think Step-by-Step:**
+
 **1. Think Step-by-Step:**
  - Carefully determine the user's query intent in the context of the conversation history to fully understand the user's information need.
  - Scrutinize the `Source Data`(Document Chunks). Identify and extract all pieces of information that are directly relevant to answering the user query.
  - Weave the extracted facts into a coherent and logical response. Your own knowledge must ONLY be used to formulate fluent sentences and connect ideas, NOT to introduce any external information.
@ -280,24 +283,24 @@ Consider the conversation history if provided to maintain conversational flow an
  - Generate a reference section at the end of the response. The reference document must directly support the facts presented in the response.
  - Do not generate anything after the reference section.
-2. **Content & Grounding:**
+**2. Content & Grounding:**
  - Strictly adhere to the provided context from the `Source Data`; DO NOT invent, assume, or infer any information not explicitly stated.
  - If the answer cannot be found in the `Source Data`, state that you do not have enough information to answer. Do not attempt to guess.
-3. **Formatting & Language:**
+**3. Formatting & Language:**
  - The response MUST be in the same language as the user query.
  - Use Markdown for clear formatting (e.g., headings, bold, lists).
  - The response should be presented in {response_type}.
-4. **References Section Format:**
+**4. References Section Format:**
  - The References section should be under heading: `### References`
-  - Reference list entries should adhere to the format: `* [n] Document Title`. Do not include a caret (`^`) immediately after the opening square bracket (`[`).
+  - Reference list entries should adhere to the format: `* [n] Document Title`. Do not include a caret (`^`) after opening square bracket (`[`).
  - The Document Title in the citation must retain its original language.
  - Output each citation on an individual line
  - Provide maximum of 5 most relevant citations.
  - Do not generate footnotes section or any text after the references.
-5. **Reference Section Example:**
+**5. Reference Section Example:**
 ```
 ### References
 * [1] Document Title One
@ -305,16 +308,55 @@ Consider the conversation history if provided to maintain conversational flow an
 * [3] Document Title Three
 ```
-6. **Additional Instructions**: {user_prompt}
+**6. Additional Instructions**: {user_prompt}
 ---Source Data---
 Document Chunks:
 {content_data}
 """
 PROMPTS["kg_query_context"] = """
 Entities Data From Knowledge Graph(KG):
 ```json
 {entities_str}
 ```
 Relationships Data From Knowledge Graph(KG):
 ```json
 {relations_str}
 ```
 Original Texts From Document Chunks(DC):
 ```json
 {text_chunks_str}
 ```
 Document Chunks (DC) Reference Document List: (Each entry begins with [reference_id])
 {reference_list_str}
 """
 PROMPTS["naive_query_context"] = """
 Original Texts From Document Chunks(DC):
 ```json
 {text_chunks_str}
 ```
 Document Chunks (DC) Reference Document List: (Each entry begins with [reference_id])
 {reference_list_str}
 """
 PROMPTS["keywords_extraction"] = """---Role---
 You are an expert keyword extractor, specializing in analyzing user queries for a Retrieval-Augmented Generation (RAG) system. Your purpose is to identify both high-level and low-level keywords in the user's query that will be used for effective document retrieval.