Refactor prompts and context building for better maintainability

- Extract context templates to PROMPTS
- Unify token calculation logic
- Simplify user_prompt formatting
- Reduce code duplication
- Improve prompt structure consistency
This commit is contained in:
yangdx 2025-09-26 12:39:06 +08:00
parent 1a0dc94f55
commit cbdc4c4bdf
2 changed files with 165 additions and 190 deletions

View file

@ -2389,14 +2389,19 @@ async def kg_query(
content=context_result.context, raw_data=context_result.raw_data
)
user_prompt = f"\n\n{query_param.user_prompt}" if query_param.user_prompt else "n/a"
response_type = (
query_param.response_type
if query_param.response_type
else "Multiple Paragraphs"
)
# Build system prompt
sys_prompt_temp = system_prompt if system_prompt else PROMPTS["rag_response"]
sys_prompt = sys_prompt_temp.format(
user_prompt=f"```\n{query_param.user_prompt}\n```"
if query_param.user_prompt
else "n/a",
response_type=response_type,
user_prompt=user_prompt,
context_data=context_result.context,
response_type=query_param.response_type,
)
user_query = query
@ -3152,108 +3157,78 @@ async def _build_llm_context(
global_config.get("max_total_tokens", DEFAULT_MAX_TOTAL_TOKENS),
)
# Get the system prompt template from PROMPTS or global_config
sys_prompt_template = global_config.get(
"system_prompt_template", PROMPTS["rag_response"]
)
kg_context_template = PROMPTS["kg_query_context"]
user_prompt = query_param.user_prompt if query_param.user_prompt else ""
response_type = (
query_param.response_type
if query_param.response_type
else "Multiple Paragraphs"
)
entities_str = "\n".join(
json.dumps(entity, ensure_ascii=False) for entity in entities_context
)
relations_str = "\n".join(
json.dumps(relation, ensure_ascii=False) for relation in relations_context
)
# Calculate preliminary kg context tokens
pre_kg_context = kg_context_template.format(
entities_str=entities_str,
relations_str=relations_str,
text_chunks_str="",
reference_list_str="",
)
kg_context_tokens = len(tokenizer.encode(pre_kg_context))
# Calculate preliminary system prompt tokens
pre_sys_prompt = sys_prompt_template.format(
context_data="", # Empty for overhead calculation
response_type=response_type,
user_prompt=user_prompt,
)
sys_prompt_tokens = len(tokenizer.encode(pre_sys_prompt))
# Calculate available tokens for text chunks
query_tokens = len(tokenizer.encode(query))
buffer_tokens = 200 # reserved for reference list and safety buffer
available_chunk_tokens = max_total_tokens - (
sys_prompt_tokens + kg_context_tokens + query_tokens + buffer_tokens
)
logger.debug(
f"Token allocation - Total: {max_total_tokens}, SysPrompt: {sys_prompt_tokens}, Query: {query_tokens}, KG: {kg_context_tokens}, Buffer: {buffer_tokens}, Available for chunks: {available_chunk_tokens}"
)
# Apply token truncation to chunks using the dynamic limit
truncated_chunks = await process_chunks_unified(
query=query,
unique_chunks=merged_chunks,
query_param=query_param,
global_config=global_config,
source_type=query_param.mode,
chunk_token_limit=available_chunk_tokens, # Pass dynamic limit
)
# Generate reference list from truncated chunks using the new common function
reference_list, truncated_chunks = generate_reference_list_from_chunks(
truncated_chunks
)
# Rebuild text_units_context with truncated chunks
# The actual tokens may be slightly less than available_chunk_tokens due to deduplication logic
text_units_context = []
truncated_chunks = []
if merged_chunks:
# Calculate dynamic token limit for text chunks
entities_str = "\n".join(
json.dumps(entity, ensure_ascii=False) for entity in entities_context
)
relations_str = "\n".join(
json.dumps(relation, ensure_ascii=False) for relation in relations_context
)
# Calculate base context tokens (entities + relations + template)
kg_context_template = """-----Entities(KG)-----
```json
{entities_str}
```
-----Relationships(KG)-----
```json
{relations_str}
```
-----Document Chunks(DC)-----
```json
```
-----Refrence Document List-----
The reference documents list in Document Chunks(DC) is as follows (reference_id in square brackets):
"""
kg_context = kg_context_template.format(
entities_str=entities_str, relations_str=relations_str
)
kg_context_tokens = len(tokenizer.encode(kg_context))
# Calculate system prompt template overhead
user_prompt = query_param.user_prompt if query_param.user_prompt else ""
response_type = (
query_param.response_type
if query_param.response_type
else "Multiple Paragraphs"
)
# Get the system prompt template from PROMPTS or global_config
sys_prompt_template = global_config.get(
"system_prompt_template", PROMPTS["rag_response"]
)
# Create sample system prompt for overhead calculation
sample_sys_prompt = sys_prompt_template.format(
context_data="", # Empty for overhead calculation
response_type=response_type,
user_prompt=user_prompt,
)
sys_prompt_template_tokens = len(tokenizer.encode(sample_sys_prompt))
# Total system prompt overhead = template + query tokens
query_tokens = len(tokenizer.encode(query))
sys_prompt_overhead = sys_prompt_template_tokens + query_tokens
buffer_tokens = 100 # Safety buffer as requested
# Calculate available tokens for text chunks
used_tokens = kg_context_tokens + sys_prompt_overhead + buffer_tokens
available_chunk_tokens = max_total_tokens - used_tokens
logger.debug(
f"Token allocation - Total: {max_total_tokens}, SysPrompt: {sys_prompt_overhead}, KG: {kg_context_tokens}, Buffer: {buffer_tokens}, Available for chunks: {available_chunk_tokens}"
)
# Apply token truncation to chunks using the dynamic limit
truncated_chunks = await process_chunks_unified(
query=query,
unique_chunks=merged_chunks,
query_param=query_param,
global_config=global_config,
source_type=query_param.mode,
chunk_token_limit=available_chunk_tokens, # Pass dynamic limit
)
# Generate reference list from truncated chunks using the new common function
reference_list, truncated_chunks = generate_reference_list_from_chunks(
truncated_chunks
)
# Rebuild text_units_context with truncated chunks
# The actual tokens may be slightly less than available_chunk_tokens due to deduplication logic
for i, chunk in enumerate(truncated_chunks):
text_units_context.append(
{
"reference_id": chunk["reference_id"],
"content": chunk["content"],
}
)
logger.debug(
f"Final chunk processing: {len(merged_chunks)} -> {len(text_units_context)} (chunk available tokens: {available_chunk_tokens})"
for i, chunk in enumerate(truncated_chunks):
text_units_context.append(
{
"reference_id": chunk["reference_id"],
"content": chunk["content"],
}
)
logger.info(
@ -3292,12 +3267,6 @@ The reference documents list in Document Chunks(DC) is as follows (reference_id
if chunk_tracking_log:
logger.info(f"chunks S+F/O: {' '.join(chunk_tracking_log)}")
entities_str = "\n".join(
json.dumps(entity, ensure_ascii=False) for entity in entities_context
)
relations_str = "\n".join(
json.dumps(relation, ensure_ascii=False) for relation in relations_context
)
text_units_str = "\n".join(
json.dumps(text_unit, ensure_ascii=False) for text_unit in text_units_context
)
@ -3307,31 +3276,12 @@ The reference documents list in Document Chunks(DC) is as follows (reference_id
if ref["reference_id"]
)
result = f"""-----Entities(KG)-----
```json
{entities_str}
```
-----Relationships(KG)-----
```json
{relations_str}
```
-----Document Chunks(DC)-----
```json
{text_units_str}
```
-----Refrence Document List-----
Document Chunks (DC) reference documents : (Each entry begins with [reference_id])
{reference_list_str}
"""
result = kg_context_template.format(
entities_str=entities_str,
relations_str=relations_str,
text_chunks_str=text_units_str,
reference_list_str=reference_list_str,
)
# Always return both context and complete data structure (unified approach)
logger.debug(
@ -3416,11 +3366,7 @@ async def _build_query_context(
query_embedding=search_result["query_embedding"],
)
if (
not merged_chunks
and not truncation_result["entities_context"]
and not truncation_result["relations_context"]
):
if not merged_chunks:
return None
# Stage 4: Build final LLM context with dynamic token processing
@ -4156,7 +4102,7 @@ async def naive_query(
)
# Calculate system prompt template tokens (excluding content_data)
user_prompt = query_param.user_prompt if query_param.user_prompt else ""
user_prompt = f"\n\n{query_param.user_prompt}" if query_param.user_prompt else "n/a"
response_type = (
query_param.response_type
if query_param.response_type
@ -4168,26 +4114,23 @@ async def naive_query(
system_prompt if system_prompt else PROMPTS["naive_rag_response"]
)
# Create a sample system prompt with empty content_data to calculate overhead
sample_sys_prompt = sys_prompt_template.format(
content_data="", # Empty for overhead calculation
# Create a preliminary system prompt with empty content_data to calculate overhead
pre_sys_prompt = sys_prompt_template.format(
response_type=response_type,
user_prompt=user_prompt,
content_data="", # Empty for overhead calculation
)
sys_prompt_template_tokens = len(tokenizer.encode(sample_sys_prompt))
# Total system prompt overhead = template + query tokens
query_tokens = len(tokenizer.encode(query))
sys_prompt_overhead = sys_prompt_template_tokens + query_tokens
buffer_tokens = 100 # Safety buffer
# Calculate available tokens for chunks
used_tokens = sys_prompt_overhead + buffer_tokens
available_chunk_tokens = max_total_tokens - used_tokens
sys_prompt_tokens = len(tokenizer.encode(pre_sys_prompt))
query_tokens = len(tokenizer.encode(query))
buffer_tokens = 200 # reserved for reference list and safety buffer
available_chunk_tokens = max_total_tokens - (
sys_prompt_tokens + query_tokens + buffer_tokens
)
logger.debug(
f"Naive query token allocation - Total: {max_total_tokens}, SysPrompt: {sys_prompt_overhead}, Buffer: {buffer_tokens}, Available for chunks: {available_chunk_tokens}"
f"Naive query token allocation - Total: {max_total_tokens}, SysPrompt: {sys_prompt_tokens}, Query: {query_tokens}, Buffer: {buffer_tokens}, Available for chunks: {available_chunk_tokens}"
)
# Process chunks using unified processing with dynamic token limit
@ -4247,29 +4190,19 @@ async def naive_query(
if ref["reference_id"]
)
context_content = f"""
---Document Chunks(DC)---
```json
{text_units_str}
```
-----Refrence Document List-----
{reference_list_str}
"""
naive_context_template = PROMPTS["naive_query_context"]
context_content = naive_context_template.format(
text_chunks_str=text_units_str,
reference_list_str=reference_list_str,
)
if query_param.only_need_context and not query_param.only_need_prompt:
return QueryResult(content=context_content, raw_data=raw_data)
sys_prompt_temp = system_prompt if system_prompt else PROMPTS["naive_rag_response"]
sys_prompt = sys_prompt_temp.format(
user_prompt=f"```\n{query_param.user_prompt}\n```"
if query_param.user_prompt
else "n/a",
content_data=text_units_str,
sys_prompt = sys_prompt_template.format(
response_type=query_param.response_type,
user_prompt=user_prompt,
content_data=context_content,
)
user_query = query

View file

@ -212,15 +212,18 @@ PROMPTS["fail_response"] = (
)
PROMPTS["rag_response"] = """---Role---
You are an expert AI assistant specializing in synthesizing information from a provided knowledge base. Your primary function is to answer user queries accurately by ONLY using the information within the provided `Source Data`.
---Goal---
Generate a comprehensive, well-structured answer to the user query.
The answer must integrate relevant facts from the Knowledge Graph and Document Chunks found in the `Source Data`.
Consider the conversation history if provided to maintain conversational flow and avoid repeating information.
---Instructions---
1. **Step-by-Step Instruction:**
**1. Step-by-Step Instruction:**
- Carefully determine the user's query intent in the context of the conversation history to fully understand the user's information need.
- Scrutinize the `Source Data`(both Knowledge Graph and Document Chunks). Identify and extract all pieces of information that are directly relevant to answering the user query.
- Weave the extracted facts into a coherent and logical response. Your own knowledge must ONLY be used to formulate fluent sentences and connect ideas, NOT to introduce any external information.
@ -228,24 +231,24 @@ Consider the conversation history if provided to maintain conversational flow an
- Generate a reference section at the end of the response. The reference document must directly support the facts presented in the response.
- Do not generate anything after the reference section.
2. **Content & Grounding:**
**2. Content & Grounding:**
- Strictly adhere to the provided context from the `Source Data`; DO NOT invent, assume, or infer any information not explicitly stated.
- If the answer cannot be found in the `Source Data`, state that you do not have enough information to answer. Do not attempt to guess.
3. **Formatting & Language:**
**3. Formatting & Language:**
- The response MUST be in the same language as the user query.
- Use Markdown for clear formatting (e.g., headings, bold, lists).
- The response should be presented in {response_type}.
4. **References Section Format:**
**4. References Section Format:**
- The References section should be under heading: `### References`
- Reference list entries should adhere to the format: `* [n] Document Title`. Do not include a caret (`^`) immediately after the opening square bracket (`[`).
- Reference list entries should adhere to the format: `* [n] Document Title`. Do not include a caret (`^`) after opening square bracket (`[`).
- The Document Title in the citation must retain its original language.
- Output each citation on an individual line
- Provide maximum of 5 most relevant citations.
- Do not generate footnotes section or any text after the references.
5. **Reference Section Example:**
**5. Reference Section Example:**
```
### References
* [1] Document Title One
@ -253,26 +256,26 @@ Consider the conversation history if provided to maintain conversational flow an
* [3] Document Title Three
```
6. **Additional Instructions**: {user_prompt}
**6. Additional Instructions**: {user_prompt}
---Source Data---
Knowledge Graph and Document Chunks:
{context_data}
"""
PROMPTS["naive_rag_response"] = """---Role---
You are an expert AI assistant specializing in synthesizing information from a provided knowledge base. Your primary function is to answer user queries accurately by ONLY using the information within the provided `Source Data`.
---Goal---
Generate a comprehensive, well-structured answer to the user query.
The answer must integrate relevant facts from the Document Chunks found in the `Source Data`.
Consider the conversation history if provided to maintain conversational flow and avoid repeating information.
---Instructions---
1. **Think Step-by-Step:**
**1. Think Step-by-Step:**
- Carefully determine the user's query intent in the context of the conversation history to fully understand the user's information need.
- Scrutinize the `Source Data`(Document Chunks). Identify and extract all pieces of information that are directly relevant to answering the user query.
- Weave the extracted facts into a coherent and logical response. Your own knowledge must ONLY be used to formulate fluent sentences and connect ideas, NOT to introduce any external information.
@ -280,24 +283,24 @@ Consider the conversation history if provided to maintain conversational flow an
- Generate a reference section at the end of the response. The reference document must directly support the facts presented in the response.
- Do not generate anything after the reference section.
2. **Content & Grounding:**
**2. Content & Grounding:**
- Strictly adhere to the provided context from the `Source Data`; DO NOT invent, assume, or infer any information not explicitly stated.
- If the answer cannot be found in the `Source Data`, state that you do not have enough information to answer. Do not attempt to guess.
3. **Formatting & Language:**
**3. Formatting & Language:**
- The response MUST be in the same language as the user query.
- Use Markdown for clear formatting (e.g., headings, bold, lists).
- The response should be presented in {response_type}.
4. **References Section Format:**
**4. References Section Format:**
- The References section should be under heading: `### References`
- Reference list entries should adhere to the format: `* [n] Document Title`. Do not include a caret (`^`) immediately after the opening square bracket (`[`).
- Reference list entries should adhere to the format: `* [n] Document Title`. Do not include a caret (`^`) after opening square bracket (`[`).
- The Document Title in the citation must retain its original language.
- Output each citation on an individual line
- Provide maximum of 5 most relevant citations.
- Do not generate footnotes section or any text after the references.
5. **Reference Section Example:**
**5. Reference Section Example:**
```
### References
* [1] Document Title One
@ -305,16 +308,55 @@ Consider the conversation history if provided to maintain conversational flow an
* [3] Document Title Three
```
6. **Additional Instructions**: {user_prompt}
**6. Additional Instructions**: {user_prompt}
---Source Data---
Document Chunks:
{content_data}
"""
PROMPTS["kg_query_context"] = """
Entities Data From Knowledge Graph(KG):
```json
{entities_str}
```
Relationships Data From Knowledge Graph(KG):
```json
{relations_str}
```
Original Texts From Document Chunks(DC):
```json
{text_chunks_str}
```
Document Chunks (DC) Reference Document List: (Each entry begins with [reference_id])
{reference_list_str}
"""
PROMPTS["naive_query_context"] = """
Original Texts From Document Chunks(DC):
```json
{text_chunks_str}
```
Document Chunks (DC) Reference Document List: (Each entry begins with [reference_id])
{reference_list_str}
"""
PROMPTS["keywords_extraction"] = """---Role---
You are an expert keyword extractor, specializing in analyzing user queries for a Retrieval-Augmented Generation (RAG) system. Your purpose is to identify both high-level and low-level keywords in the user's query that will be used for effective document retrieval.