Refactor prompts and context building for better maintainability

- Extract context templates to PROMPTS - Unify token calculation logic - Simplify user_prompt formatting - Reduce code duplication - Improve prompt structure consistency
2025-09-26 12:39:06 +08:00 · 2025-09-26 12:39:06 +08:00 · cbdc4c4bdf
commit cbdc4c4bdf
parent 1a0dc94f55
2 changed files with 165 additions and 190 deletions
--- a/lightrag/operate.py
+++ b/lightrag/operate.py
@ -2389,14 +2389,19 @@ async def kg_query(
            content=context_result.context, raw_data=context_result.raw_data
        )

+    user_prompt = f"\n\n{query_param.user_prompt}" if query_param.user_prompt else "n/a"
+    response_type = (
+        query_param.response_type
+        if query_param.response_type
+        else "Multiple Paragraphs"
+    )
+
    # Build system prompt
    sys_prompt_temp = system_prompt if system_prompt else PROMPTS["rag_response"]
    sys_prompt = sys_prompt_temp.format(
-        user_prompt=f"```\n{query_param.user_prompt}\n```"
-        if query_param.user_prompt
-        else "n/a",
+        response_type=response_type,
+        user_prompt=user_prompt,
        context_data=context_result.context,
-        response_type=query_param.response_type,
    )

    user_query = query
@ -3152,108 +3157,78 @@ async def _build_llm_context(
        global_config.get("max_total_tokens", DEFAULT_MAX_TOTAL_TOKENS),
    )

+    # Get the system prompt template from PROMPTS or global_config
+    sys_prompt_template = global_config.get(
+        "system_prompt_template", PROMPTS["rag_response"]
+    )
+
+    kg_context_template = PROMPTS["kg_query_context"]
+    user_prompt = query_param.user_prompt if query_param.user_prompt else ""
+    response_type = (
+        query_param.response_type
+        if query_param.response_type
+        else "Multiple Paragraphs"
+    )
+
+    entities_str = "\n".join(
+        json.dumps(entity, ensure_ascii=False) for entity in entities_context
+    )
+    relations_str = "\n".join(
+        json.dumps(relation, ensure_ascii=False) for relation in relations_context
+    )
+
+    # Calculate preliminary kg context tokens
+    pre_kg_context = kg_context_template.format(
+        entities_str=entities_str,
+        relations_str=relations_str,
+        text_chunks_str="",
+        reference_list_str="",
+    )
+    kg_context_tokens = len(tokenizer.encode(pre_kg_context))
+
+    # Calculate preliminary system prompt tokens
+    pre_sys_prompt = sys_prompt_template.format(
+        context_data="",  # Empty for overhead calculation
+        response_type=response_type,
+        user_prompt=user_prompt,
+    )
+    sys_prompt_tokens = len(tokenizer.encode(pre_sys_prompt))
+
+    # Calculate available tokens for text chunks
+    query_tokens = len(tokenizer.encode(query))
+    buffer_tokens = 200  # reserved for reference list and safety buffer
+    available_chunk_tokens = max_total_tokens - (
+        sys_prompt_tokens + kg_context_tokens + query_tokens + buffer_tokens
+    )
+
+    logger.debug(
+        f"Token allocation - Total: {max_total_tokens}, SysPrompt: {sys_prompt_tokens}, Query: {query_tokens}, KG: {kg_context_tokens}, Buffer: {buffer_tokens}, Available for chunks: {available_chunk_tokens}"
+    )
+
+    # Apply token truncation to chunks using the dynamic limit
+    truncated_chunks = await process_chunks_unified(
+        query=query,
+        unique_chunks=merged_chunks,
+        query_param=query_param,
+        global_config=global_config,
+        source_type=query_param.mode,
+        chunk_token_limit=available_chunk_tokens,  # Pass dynamic limit
+    )
+
+    # Generate reference list from truncated chunks using the new common function
+    reference_list, truncated_chunks = generate_reference_list_from_chunks(
+        truncated_chunks
+    )
+
+    # Rebuild text_units_context with truncated chunks
+    # The actual tokens may be slightly less than available_chunk_tokens due to deduplication logic
    text_units_context = []
-    truncated_chunks = []
-
-    if merged_chunks:
-        # Calculate dynamic token limit for text chunks
-        entities_str = "\n".join(
-            json.dumps(entity, ensure_ascii=False) for entity in entities_context
-        )
-        relations_str = "\n".join(
-            json.dumps(relation, ensure_ascii=False) for relation in relations_context
-        )
-
-        # Calculate base context tokens (entities + relations + template)
-        kg_context_template = """-----Entities(KG)-----
-
-```json
-{entities_str}
-```
-
-----Relationships(KG)-----
-
-```json
-{relations_str}
-```
-
-----Document Chunks(DC)-----
-
-```json
-```
-
-----Refrence Document List-----
-
-The reference documents list in Document Chunks(DC) is as follows (reference_id in square brackets):
-
-"""
-        kg_context = kg_context_template.format(
-            entities_str=entities_str, relations_str=relations_str
-        )
-        kg_context_tokens = len(tokenizer.encode(kg_context))
-
-        # Calculate system prompt template overhead
-        user_prompt = query_param.user_prompt if query_param.user_prompt else ""
-        response_type = (
-            query_param.response_type
-            if query_param.response_type
-            else "Multiple Paragraphs"
-        )
-
-        # Get the system prompt template from PROMPTS or global_config
-        sys_prompt_template = global_config.get(
-            "system_prompt_template", PROMPTS["rag_response"]
-        )
-
-        # Create sample system prompt for overhead calculation
-        sample_sys_prompt = sys_prompt_template.format(
-            context_data="",  # Empty for overhead calculation
-            response_type=response_type,
-            user_prompt=user_prompt,
-        )
-        sys_prompt_template_tokens = len(tokenizer.encode(sample_sys_prompt))
-
-        # Total system prompt overhead = template + query tokens
-        query_tokens = len(tokenizer.encode(query))
-        sys_prompt_overhead = sys_prompt_template_tokens + query_tokens
-
-        buffer_tokens = 100  # Safety buffer as requested
-
-        # Calculate available tokens for text chunks
-        used_tokens = kg_context_tokens + sys_prompt_overhead + buffer_tokens
-        available_chunk_tokens = max_total_tokens - used_tokens
-
-        logger.debug(
-            f"Token allocation - Total: {max_total_tokens}, SysPrompt: {sys_prompt_overhead}, KG: {kg_context_tokens}, Buffer: {buffer_tokens}, Available for chunks: {available_chunk_tokens}"
-        )
-
-        # Apply token truncation to chunks using the dynamic limit
-        truncated_chunks = await process_chunks_unified(
-            query=query,
-            unique_chunks=merged_chunks,
-            query_param=query_param,
-            global_config=global_config,
-            source_type=query_param.mode,
-            chunk_token_limit=available_chunk_tokens,  # Pass dynamic limit
-        )
-
-        # Generate reference list from truncated chunks using the new common function
-        reference_list, truncated_chunks = generate_reference_list_from_chunks(
-            truncated_chunks
-        )
-
-        # Rebuild text_units_context with truncated chunks
-        # The actual tokens may be slightly less than available_chunk_tokens due to deduplication logic
-        for i, chunk in enumerate(truncated_chunks):
-            text_units_context.append(
-                {
-                    "reference_id": chunk["reference_id"],
-                    "content": chunk["content"],
-                }
-            )
-
-        logger.debug(
-            f"Final chunk processing: {len(merged_chunks)} -> {len(text_units_context)} (chunk available tokens: {available_chunk_tokens})"
+    for i, chunk in enumerate(truncated_chunks):
+        text_units_context.append(
+            {
+                "reference_id": chunk["reference_id"],
+                "content": chunk["content"],
+            }
        )

    logger.info(
@ -3292,12 +3267,6 @@ The reference documents list in Document Chunks(DC) is as follows (reference_id
        if chunk_tracking_log:
            logger.info(f"chunks S+F/O: {' '.join(chunk_tracking_log)}")

-    entities_str = "\n".join(
-        json.dumps(entity, ensure_ascii=False) for entity in entities_context
-    )
-    relations_str = "\n".join(
-        json.dumps(relation, ensure_ascii=False) for relation in relations_context
-    )
    text_units_str = "\n".join(
        json.dumps(text_unit, ensure_ascii=False) for text_unit in text_units_context
    )
@ -3307,31 +3276,12 @@ The reference documents list in Document Chunks(DC) is as follows (reference_id
        if ref["reference_id"]
    )

-    result = f"""-----Entities(KG)-----
-
-```json
-{entities_str}
-```
-
-----Relationships(KG)-----
-
-```json
-{relations_str}
-```
-
-----Document Chunks(DC)-----
-
-```json
-{text_units_str}
-```
-
-----Refrence Document List-----
-
-Document Chunks (DC) reference documents : (Each entry begins with [reference_id])
-
-{reference_list_str}
-
-"""
+    result = kg_context_template.format(
+        entities_str=entities_str,
+        relations_str=relations_str,
+        text_chunks_str=text_units_str,
+        reference_list_str=reference_list_str,
+    )

    # Always return both context and complete data structure (unified approach)
    logger.debug(
@ -3416,11 +3366,7 @@ async def _build_query_context(
        query_embedding=search_result["query_embedding"],
    )

-    if (
-        not merged_chunks
-        and not truncation_result["entities_context"]
-        and not truncation_result["relations_context"]
-    ):
+    if not merged_chunks:
        return None

    # Stage 4: Build final LLM context with dynamic token processing
@ -4156,7 +4102,7 @@ async def naive_query(
    )

    # Calculate system prompt template tokens (excluding content_data)
-    user_prompt = query_param.user_prompt if query_param.user_prompt else ""
+    user_prompt = f"\n\n{query_param.user_prompt}" if query_param.user_prompt else "n/a"
    response_type = (
        query_param.response_type
        if query_param.response_type
@ -4168,26 +4114,23 @@ async def naive_query(
        system_prompt if system_prompt else PROMPTS["naive_rag_response"]
    )

-    # Create a sample system prompt with empty content_data to calculate overhead
-    sample_sys_prompt = sys_prompt_template.format(
-        content_data="",  # Empty for overhead calculation
+    # Create a preliminary system prompt with empty content_data to calculate overhead
+    pre_sys_prompt = sys_prompt_template.format(
        response_type=response_type,
        user_prompt=user_prompt,
+        content_data="",  # Empty for overhead calculation
    )
-    sys_prompt_template_tokens = len(tokenizer.encode(sample_sys_prompt))
-
-    # Total system prompt overhead = template + query tokens
-    query_tokens = len(tokenizer.encode(query))
-    sys_prompt_overhead = sys_prompt_template_tokens + query_tokens
-
-    buffer_tokens = 100  # Safety buffer

    # Calculate available tokens for chunks
-    used_tokens = sys_prompt_overhead + buffer_tokens
-    available_chunk_tokens = max_total_tokens - used_tokens
+    sys_prompt_tokens = len(tokenizer.encode(pre_sys_prompt))
+    query_tokens = len(tokenizer.encode(query))
+    buffer_tokens = 200  # reserved for reference list and safety buffer
+    available_chunk_tokens = max_total_tokens - (
+        sys_prompt_tokens + query_tokens + buffer_tokens
+    )

    logger.debug(
-        f"Naive query token allocation - Total: {max_total_tokens}, SysPrompt: {sys_prompt_overhead}, Buffer: {buffer_tokens}, Available for chunks: {available_chunk_tokens}"
+        f"Naive query token allocation - Total: {max_total_tokens}, SysPrompt: {sys_prompt_tokens}, Query: {query_tokens}, Buffer: {buffer_tokens}, Available for chunks: {available_chunk_tokens}"
    )

    # Process chunks using unified processing with dynamic token limit
@ -4247,29 +4190,19 @@ async def naive_query(
        if ref["reference_id"]
    )

-    context_content = f"""
---Document Chunks(DC)---
-
-```json
-{text_units_str}
-```
-
-----Refrence Document List-----
-
-{reference_list_str}
-
-"""
+    naive_context_template = PROMPTS["naive_query_context"]
+    context_content = naive_context_template.format(
+        text_chunks_str=text_units_str,
+        reference_list_str=reference_list_str,
+    )

    if query_param.only_need_context and not query_param.only_need_prompt:
        return QueryResult(content=context_content, raw_data=raw_data)

-    sys_prompt_temp = system_prompt if system_prompt else PROMPTS["naive_rag_response"]
-    sys_prompt = sys_prompt_temp.format(
-        user_prompt=f"```\n{query_param.user_prompt}\n```"
-        if query_param.user_prompt
-        else "n/a",
-        content_data=text_units_str,
+    sys_prompt = sys_prompt_template.format(
        response_type=query_param.response_type,
+        user_prompt=user_prompt,
+        content_data=context_content,
    )

    user_query = query
--- a/lightrag/prompt.py
+++ b/lightrag/prompt.py
@ -212,15 +212,18 @@ PROMPTS["fail_response"] = (
 )

 PROMPTS["rag_response"] = """---Role---
+
 You are an expert AI assistant specializing in synthesizing information from a provided knowledge base. Your primary function is to answer user queries accurately by ONLY using the information within the provided `Source Data`.

 ---Goal---
+
 Generate a comprehensive, well-structured answer to the user query.
 The answer must integrate relevant facts from the Knowledge Graph and Document Chunks found in the `Source Data`.
 Consider the conversation history if provided to maintain conversational flow and avoid repeating information.

 ---Instructions---
-1. **Step-by-Step Instruction:**
+
+**1. Step-by-Step Instruction:**
  - Carefully determine the user's query intent in the context of the conversation history to fully understand the user's information need.
  - Scrutinize the `Source Data`(both Knowledge Graph and Document Chunks). Identify and extract all pieces of information that are directly relevant to answering the user query.
  - Weave the extracted facts into a coherent and logical response. Your own knowledge must ONLY be used to formulate fluent sentences and connect ideas, NOT to introduce any external information.
@ -228,24 +231,24 @@ Consider the conversation history if provided to maintain conversational flow an
  - Generate a reference section at the end of the response. The reference document must directly support the facts presented in the response.
  - Do not generate anything after the reference section.

-2. **Content & Grounding:**
+**2. Content & Grounding:**
  - Strictly adhere to the provided context from the `Source Data`; DO NOT invent, assume, or infer any information not explicitly stated.
  - If the answer cannot be found in the `Source Data`, state that you do not have enough information to answer. Do not attempt to guess.

-3. **Formatting & Language:**
+**3. Formatting & Language:**
  - The response MUST be in the same language as the user query.
  - Use Markdown for clear formatting (e.g., headings, bold, lists).
  - The response should be presented in {response_type}.

-4. **References Section Format:**
+**4. References Section Format:**
  - The References section should be under heading: `### References`
-  - Reference list entries should adhere to the format: `* [n] Document Title`. Do not include a caret (`^`) immediately after the opening square bracket (`[`).
+  - Reference list entries should adhere to the format: `* [n] Document Title`. Do not include a caret (`^`) after opening square bracket (`[`).
  - The Document Title in the citation must retain its original language.
  - Output each citation on an individual line
  - Provide maximum of 5 most relevant citations.
  - Do not generate footnotes section or any text after the references.

-5. **Reference Section Example:**
+**5. Reference Section Example:**
 ```
 ### References
 * [1] Document Title One
@ -253,26 +256,26 @@ Consider the conversation history if provided to maintain conversational flow an
 * [3] Document Title Three
 ```

-6. **Additional Instructions**: {user_prompt}
+**6. Additional Instructions**: {user_prompt}


 ---Source Data---
-Knowledge Graph and Document Chunks:
-
 {context_data}
-
 """

 PROMPTS["naive_rag_response"] = """---Role---
+
 You are an expert AI assistant specializing in synthesizing information from a provided knowledge base. Your primary function is to answer user queries accurately by ONLY using the information within the provided `Source Data`.

 ---Goal---
+
 Generate a comprehensive, well-structured answer to the user query.
 The answer must integrate relevant facts from the Document Chunks found in the `Source Data`.
 Consider the conversation history if provided to maintain conversational flow and avoid repeating information.

 ---Instructions---
-1. **Think Step-by-Step:**
+
+**1. Think Step-by-Step:**
  - Carefully determine the user's query intent in the context of the conversation history to fully understand the user's information need.
  - Scrutinize the `Source Data`(Document Chunks). Identify and extract all pieces of information that are directly relevant to answering the user query.
  - Weave the extracted facts into a coherent and logical response. Your own knowledge must ONLY be used to formulate fluent sentences and connect ideas, NOT to introduce any external information.
@ -280,24 +283,24 @@ Consider the conversation history if provided to maintain conversational flow an
  - Generate a reference section at the end of the response. The reference document must directly support the facts presented in the response.
  - Do not generate anything after the reference section.

-2. **Content & Grounding:**
+**2. Content & Grounding:**
  - Strictly adhere to the provided context from the `Source Data`; DO NOT invent, assume, or infer any information not explicitly stated.
  - If the answer cannot be found in the `Source Data`, state that you do not have enough information to answer. Do not attempt to guess.

-3. **Formatting & Language:**
+**3. Formatting & Language:**
  - The response MUST be in the same language as the user query.
  - Use Markdown for clear formatting (e.g., headings, bold, lists).
  - The response should be presented in {response_type}.

-4. **References Section Format:**
+**4. References Section Format:**
  - The References section should be under heading: `### References`
-  - Reference list entries should adhere to the format: `* [n] Document Title`. Do not include a caret (`^`) immediately after the opening square bracket (`[`).
+  - Reference list entries should adhere to the format: `* [n] Document Title`. Do not include a caret (`^`) after opening square bracket (`[`).
  - The Document Title in the citation must retain its original language.
  - Output each citation on an individual line
  - Provide maximum of 5 most relevant citations.
  - Do not generate footnotes section or any text after the references.

-5. **Reference Section Example:**
+**5. Reference Section Example:**
 ```
 ### References
 * [1] Document Title One
@ -305,16 +308,55 @@ Consider the conversation history if provided to maintain conversational flow an
 * [3] Document Title Three
 ```

-6. **Additional Instructions**: {user_prompt}
+**6. Additional Instructions**: {user_prompt}


 ---Source Data---
+
 Document Chunks:

 {content_data}

 """

+PROMPTS["kg_query_context"] = """
+Entities Data From Knowledge Graph(KG):
+
+```json
+{entities_str}
+```
+
+Relationships Data From Knowledge Graph(KG):
+
+```json
+{relations_str}
+```
+
+Original Texts From Document Chunks(DC):
+
+```json
+{text_chunks_str}
+```
+
+Document Chunks (DC) Reference Document List: (Each entry begins with [reference_id])
+
+{reference_list_str}
+
+"""
+
+PROMPTS["naive_query_context"] = """
+Original Texts From Document Chunks(DC):
+
+```json
+{text_chunks_str}
+```
+
+Document Chunks (DC) Reference Document List: (Each entry begins with [reference_id])
+
+{reference_list_str}
+
+"""
+
 PROMPTS["keywords_extraction"] = """---Role---
 You are an expert keyword extractor, specializing in analyzing user queries for a Retrieval-Augmented Generation (RAG) system. Your purpose is to identify both high-level and low-level keywords in the user's query that will be used for effective document retrieval.