Refactor prompts and context building for better maintainability
- Extract context templates to PROMPTS - Unify token calculation logic - Simplify user_prompt formatting - Reduce code duplication - Improve prompt structure consistency
This commit is contained in:
parent
1a0dc94f55
commit
cbdc4c4bdf
2 changed files with 165 additions and 190 deletions
|
|
@ -2389,14 +2389,19 @@ async def kg_query(
|
|||
content=context_result.context, raw_data=context_result.raw_data
|
||||
)
|
||||
|
||||
user_prompt = f"\n\n{query_param.user_prompt}" if query_param.user_prompt else "n/a"
|
||||
response_type = (
|
||||
query_param.response_type
|
||||
if query_param.response_type
|
||||
else "Multiple Paragraphs"
|
||||
)
|
||||
|
||||
# Build system prompt
|
||||
sys_prompt_temp = system_prompt if system_prompt else PROMPTS["rag_response"]
|
||||
sys_prompt = sys_prompt_temp.format(
|
||||
user_prompt=f"```\n{query_param.user_prompt}\n```"
|
||||
if query_param.user_prompt
|
||||
else "n/a",
|
||||
response_type=response_type,
|
||||
user_prompt=user_prompt,
|
||||
context_data=context_result.context,
|
||||
response_type=query_param.response_type,
|
||||
)
|
||||
|
||||
user_query = query
|
||||
|
|
@ -3152,108 +3157,78 @@ async def _build_llm_context(
|
|||
global_config.get("max_total_tokens", DEFAULT_MAX_TOTAL_TOKENS),
|
||||
)
|
||||
|
||||
# Get the system prompt template from PROMPTS or global_config
|
||||
sys_prompt_template = global_config.get(
|
||||
"system_prompt_template", PROMPTS["rag_response"]
|
||||
)
|
||||
|
||||
kg_context_template = PROMPTS["kg_query_context"]
|
||||
user_prompt = query_param.user_prompt if query_param.user_prompt else ""
|
||||
response_type = (
|
||||
query_param.response_type
|
||||
if query_param.response_type
|
||||
else "Multiple Paragraphs"
|
||||
)
|
||||
|
||||
entities_str = "\n".join(
|
||||
json.dumps(entity, ensure_ascii=False) for entity in entities_context
|
||||
)
|
||||
relations_str = "\n".join(
|
||||
json.dumps(relation, ensure_ascii=False) for relation in relations_context
|
||||
)
|
||||
|
||||
# Calculate preliminary kg context tokens
|
||||
pre_kg_context = kg_context_template.format(
|
||||
entities_str=entities_str,
|
||||
relations_str=relations_str,
|
||||
text_chunks_str="",
|
||||
reference_list_str="",
|
||||
)
|
||||
kg_context_tokens = len(tokenizer.encode(pre_kg_context))
|
||||
|
||||
# Calculate preliminary system prompt tokens
|
||||
pre_sys_prompt = sys_prompt_template.format(
|
||||
context_data="", # Empty for overhead calculation
|
||||
response_type=response_type,
|
||||
user_prompt=user_prompt,
|
||||
)
|
||||
sys_prompt_tokens = len(tokenizer.encode(pre_sys_prompt))
|
||||
|
||||
# Calculate available tokens for text chunks
|
||||
query_tokens = len(tokenizer.encode(query))
|
||||
buffer_tokens = 200 # reserved for reference list and safety buffer
|
||||
available_chunk_tokens = max_total_tokens - (
|
||||
sys_prompt_tokens + kg_context_tokens + query_tokens + buffer_tokens
|
||||
)
|
||||
|
||||
logger.debug(
|
||||
f"Token allocation - Total: {max_total_tokens}, SysPrompt: {sys_prompt_tokens}, Query: {query_tokens}, KG: {kg_context_tokens}, Buffer: {buffer_tokens}, Available for chunks: {available_chunk_tokens}"
|
||||
)
|
||||
|
||||
# Apply token truncation to chunks using the dynamic limit
|
||||
truncated_chunks = await process_chunks_unified(
|
||||
query=query,
|
||||
unique_chunks=merged_chunks,
|
||||
query_param=query_param,
|
||||
global_config=global_config,
|
||||
source_type=query_param.mode,
|
||||
chunk_token_limit=available_chunk_tokens, # Pass dynamic limit
|
||||
)
|
||||
|
||||
# Generate reference list from truncated chunks using the new common function
|
||||
reference_list, truncated_chunks = generate_reference_list_from_chunks(
|
||||
truncated_chunks
|
||||
)
|
||||
|
||||
# Rebuild text_units_context with truncated chunks
|
||||
# The actual tokens may be slightly less than available_chunk_tokens due to deduplication logic
|
||||
text_units_context = []
|
||||
truncated_chunks = []
|
||||
|
||||
if merged_chunks:
|
||||
# Calculate dynamic token limit for text chunks
|
||||
entities_str = "\n".join(
|
||||
json.dumps(entity, ensure_ascii=False) for entity in entities_context
|
||||
)
|
||||
relations_str = "\n".join(
|
||||
json.dumps(relation, ensure_ascii=False) for relation in relations_context
|
||||
)
|
||||
|
||||
# Calculate base context tokens (entities + relations + template)
|
||||
kg_context_template = """-----Entities(KG)-----
|
||||
|
||||
```json
|
||||
{entities_str}
|
||||
```
|
||||
|
||||
-----Relationships(KG)-----
|
||||
|
||||
```json
|
||||
{relations_str}
|
||||
```
|
||||
|
||||
-----Document Chunks(DC)-----
|
||||
|
||||
```json
|
||||
```
|
||||
|
||||
-----Refrence Document List-----
|
||||
|
||||
The reference documents list in Document Chunks(DC) is as follows (reference_id in square brackets):
|
||||
|
||||
"""
|
||||
kg_context = kg_context_template.format(
|
||||
entities_str=entities_str, relations_str=relations_str
|
||||
)
|
||||
kg_context_tokens = len(tokenizer.encode(kg_context))
|
||||
|
||||
# Calculate system prompt template overhead
|
||||
user_prompt = query_param.user_prompt if query_param.user_prompt else ""
|
||||
response_type = (
|
||||
query_param.response_type
|
||||
if query_param.response_type
|
||||
else "Multiple Paragraphs"
|
||||
)
|
||||
|
||||
# Get the system prompt template from PROMPTS or global_config
|
||||
sys_prompt_template = global_config.get(
|
||||
"system_prompt_template", PROMPTS["rag_response"]
|
||||
)
|
||||
|
||||
# Create sample system prompt for overhead calculation
|
||||
sample_sys_prompt = sys_prompt_template.format(
|
||||
context_data="", # Empty for overhead calculation
|
||||
response_type=response_type,
|
||||
user_prompt=user_prompt,
|
||||
)
|
||||
sys_prompt_template_tokens = len(tokenizer.encode(sample_sys_prompt))
|
||||
|
||||
# Total system prompt overhead = template + query tokens
|
||||
query_tokens = len(tokenizer.encode(query))
|
||||
sys_prompt_overhead = sys_prompt_template_tokens + query_tokens
|
||||
|
||||
buffer_tokens = 100 # Safety buffer as requested
|
||||
|
||||
# Calculate available tokens for text chunks
|
||||
used_tokens = kg_context_tokens + sys_prompt_overhead + buffer_tokens
|
||||
available_chunk_tokens = max_total_tokens - used_tokens
|
||||
|
||||
logger.debug(
|
||||
f"Token allocation - Total: {max_total_tokens}, SysPrompt: {sys_prompt_overhead}, KG: {kg_context_tokens}, Buffer: {buffer_tokens}, Available for chunks: {available_chunk_tokens}"
|
||||
)
|
||||
|
||||
# Apply token truncation to chunks using the dynamic limit
|
||||
truncated_chunks = await process_chunks_unified(
|
||||
query=query,
|
||||
unique_chunks=merged_chunks,
|
||||
query_param=query_param,
|
||||
global_config=global_config,
|
||||
source_type=query_param.mode,
|
||||
chunk_token_limit=available_chunk_tokens, # Pass dynamic limit
|
||||
)
|
||||
|
||||
# Generate reference list from truncated chunks using the new common function
|
||||
reference_list, truncated_chunks = generate_reference_list_from_chunks(
|
||||
truncated_chunks
|
||||
)
|
||||
|
||||
# Rebuild text_units_context with truncated chunks
|
||||
# The actual tokens may be slightly less than available_chunk_tokens due to deduplication logic
|
||||
for i, chunk in enumerate(truncated_chunks):
|
||||
text_units_context.append(
|
||||
{
|
||||
"reference_id": chunk["reference_id"],
|
||||
"content": chunk["content"],
|
||||
}
|
||||
)
|
||||
|
||||
logger.debug(
|
||||
f"Final chunk processing: {len(merged_chunks)} -> {len(text_units_context)} (chunk available tokens: {available_chunk_tokens})"
|
||||
for i, chunk in enumerate(truncated_chunks):
|
||||
text_units_context.append(
|
||||
{
|
||||
"reference_id": chunk["reference_id"],
|
||||
"content": chunk["content"],
|
||||
}
|
||||
)
|
||||
|
||||
logger.info(
|
||||
|
|
@ -3292,12 +3267,6 @@ The reference documents list in Document Chunks(DC) is as follows (reference_id
|
|||
if chunk_tracking_log:
|
||||
logger.info(f"chunks S+F/O: {' '.join(chunk_tracking_log)}")
|
||||
|
||||
entities_str = "\n".join(
|
||||
json.dumps(entity, ensure_ascii=False) for entity in entities_context
|
||||
)
|
||||
relations_str = "\n".join(
|
||||
json.dumps(relation, ensure_ascii=False) for relation in relations_context
|
||||
)
|
||||
text_units_str = "\n".join(
|
||||
json.dumps(text_unit, ensure_ascii=False) for text_unit in text_units_context
|
||||
)
|
||||
|
|
@ -3307,31 +3276,12 @@ The reference documents list in Document Chunks(DC) is as follows (reference_id
|
|||
if ref["reference_id"]
|
||||
)
|
||||
|
||||
result = f"""-----Entities(KG)-----
|
||||
|
||||
```json
|
||||
{entities_str}
|
||||
```
|
||||
|
||||
-----Relationships(KG)-----
|
||||
|
||||
```json
|
||||
{relations_str}
|
||||
```
|
||||
|
||||
-----Document Chunks(DC)-----
|
||||
|
||||
```json
|
||||
{text_units_str}
|
||||
```
|
||||
|
||||
-----Refrence Document List-----
|
||||
|
||||
Document Chunks (DC) reference documents : (Each entry begins with [reference_id])
|
||||
|
||||
{reference_list_str}
|
||||
|
||||
"""
|
||||
result = kg_context_template.format(
|
||||
entities_str=entities_str,
|
||||
relations_str=relations_str,
|
||||
text_chunks_str=text_units_str,
|
||||
reference_list_str=reference_list_str,
|
||||
)
|
||||
|
||||
# Always return both context and complete data structure (unified approach)
|
||||
logger.debug(
|
||||
|
|
@ -3416,11 +3366,7 @@ async def _build_query_context(
|
|||
query_embedding=search_result["query_embedding"],
|
||||
)
|
||||
|
||||
if (
|
||||
not merged_chunks
|
||||
and not truncation_result["entities_context"]
|
||||
and not truncation_result["relations_context"]
|
||||
):
|
||||
if not merged_chunks:
|
||||
return None
|
||||
|
||||
# Stage 4: Build final LLM context with dynamic token processing
|
||||
|
|
@ -4156,7 +4102,7 @@ async def naive_query(
|
|||
)
|
||||
|
||||
# Calculate system prompt template tokens (excluding content_data)
|
||||
user_prompt = query_param.user_prompt if query_param.user_prompt else ""
|
||||
user_prompt = f"\n\n{query_param.user_prompt}" if query_param.user_prompt else "n/a"
|
||||
response_type = (
|
||||
query_param.response_type
|
||||
if query_param.response_type
|
||||
|
|
@ -4168,26 +4114,23 @@ async def naive_query(
|
|||
system_prompt if system_prompt else PROMPTS["naive_rag_response"]
|
||||
)
|
||||
|
||||
# Create a sample system prompt with empty content_data to calculate overhead
|
||||
sample_sys_prompt = sys_prompt_template.format(
|
||||
content_data="", # Empty for overhead calculation
|
||||
# Create a preliminary system prompt with empty content_data to calculate overhead
|
||||
pre_sys_prompt = sys_prompt_template.format(
|
||||
response_type=response_type,
|
||||
user_prompt=user_prompt,
|
||||
content_data="", # Empty for overhead calculation
|
||||
)
|
||||
sys_prompt_template_tokens = len(tokenizer.encode(sample_sys_prompt))
|
||||
|
||||
# Total system prompt overhead = template + query tokens
|
||||
query_tokens = len(tokenizer.encode(query))
|
||||
sys_prompt_overhead = sys_prompt_template_tokens + query_tokens
|
||||
|
||||
buffer_tokens = 100 # Safety buffer
|
||||
|
||||
# Calculate available tokens for chunks
|
||||
used_tokens = sys_prompt_overhead + buffer_tokens
|
||||
available_chunk_tokens = max_total_tokens - used_tokens
|
||||
sys_prompt_tokens = len(tokenizer.encode(pre_sys_prompt))
|
||||
query_tokens = len(tokenizer.encode(query))
|
||||
buffer_tokens = 200 # reserved for reference list and safety buffer
|
||||
available_chunk_tokens = max_total_tokens - (
|
||||
sys_prompt_tokens + query_tokens + buffer_tokens
|
||||
)
|
||||
|
||||
logger.debug(
|
||||
f"Naive query token allocation - Total: {max_total_tokens}, SysPrompt: {sys_prompt_overhead}, Buffer: {buffer_tokens}, Available for chunks: {available_chunk_tokens}"
|
||||
f"Naive query token allocation - Total: {max_total_tokens}, SysPrompt: {sys_prompt_tokens}, Query: {query_tokens}, Buffer: {buffer_tokens}, Available for chunks: {available_chunk_tokens}"
|
||||
)
|
||||
|
||||
# Process chunks using unified processing with dynamic token limit
|
||||
|
|
@ -4247,29 +4190,19 @@ async def naive_query(
|
|||
if ref["reference_id"]
|
||||
)
|
||||
|
||||
context_content = f"""
|
||||
---Document Chunks(DC)---
|
||||
|
||||
```json
|
||||
{text_units_str}
|
||||
```
|
||||
|
||||
-----Refrence Document List-----
|
||||
|
||||
{reference_list_str}
|
||||
|
||||
"""
|
||||
naive_context_template = PROMPTS["naive_query_context"]
|
||||
context_content = naive_context_template.format(
|
||||
text_chunks_str=text_units_str,
|
||||
reference_list_str=reference_list_str,
|
||||
)
|
||||
|
||||
if query_param.only_need_context and not query_param.only_need_prompt:
|
||||
return QueryResult(content=context_content, raw_data=raw_data)
|
||||
|
||||
sys_prompt_temp = system_prompt if system_prompt else PROMPTS["naive_rag_response"]
|
||||
sys_prompt = sys_prompt_temp.format(
|
||||
user_prompt=f"```\n{query_param.user_prompt}\n```"
|
||||
if query_param.user_prompt
|
||||
else "n/a",
|
||||
content_data=text_units_str,
|
||||
sys_prompt = sys_prompt_template.format(
|
||||
response_type=query_param.response_type,
|
||||
user_prompt=user_prompt,
|
||||
content_data=context_content,
|
||||
)
|
||||
|
||||
user_query = query
|
||||
|
|
|
|||
|
|
@ -212,15 +212,18 @@ PROMPTS["fail_response"] = (
|
|||
)
|
||||
|
||||
PROMPTS["rag_response"] = """---Role---
|
||||
|
||||
You are an expert AI assistant specializing in synthesizing information from a provided knowledge base. Your primary function is to answer user queries accurately by ONLY using the information within the provided `Source Data`.
|
||||
|
||||
---Goal---
|
||||
|
||||
Generate a comprehensive, well-structured answer to the user query.
|
||||
The answer must integrate relevant facts from the Knowledge Graph and Document Chunks found in the `Source Data`.
|
||||
Consider the conversation history if provided to maintain conversational flow and avoid repeating information.
|
||||
|
||||
---Instructions---
|
||||
1. **Step-by-Step Instruction:**
|
||||
|
||||
**1. Step-by-Step Instruction:**
|
||||
- Carefully determine the user's query intent in the context of the conversation history to fully understand the user's information need.
|
||||
- Scrutinize the `Source Data`(both Knowledge Graph and Document Chunks). Identify and extract all pieces of information that are directly relevant to answering the user query.
|
||||
- Weave the extracted facts into a coherent and logical response. Your own knowledge must ONLY be used to formulate fluent sentences and connect ideas, NOT to introduce any external information.
|
||||
|
|
@ -228,24 +231,24 @@ Consider the conversation history if provided to maintain conversational flow an
|
|||
- Generate a reference section at the end of the response. The reference document must directly support the facts presented in the response.
|
||||
- Do not generate anything after the reference section.
|
||||
|
||||
2. **Content & Grounding:**
|
||||
**2. Content & Grounding:**
|
||||
- Strictly adhere to the provided context from the `Source Data`; DO NOT invent, assume, or infer any information not explicitly stated.
|
||||
- If the answer cannot be found in the `Source Data`, state that you do not have enough information to answer. Do not attempt to guess.
|
||||
|
||||
3. **Formatting & Language:**
|
||||
**3. Formatting & Language:**
|
||||
- The response MUST be in the same language as the user query.
|
||||
- Use Markdown for clear formatting (e.g., headings, bold, lists).
|
||||
- The response should be presented in {response_type}.
|
||||
|
||||
4. **References Section Format:**
|
||||
**4. References Section Format:**
|
||||
- The References section should be under heading: `### References`
|
||||
- Reference list entries should adhere to the format: `* [n] Document Title`. Do not include a caret (`^`) immediately after the opening square bracket (`[`).
|
||||
- Reference list entries should adhere to the format: `* [n] Document Title`. Do not include a caret (`^`) after opening square bracket (`[`).
|
||||
- The Document Title in the citation must retain its original language.
|
||||
- Output each citation on an individual line
|
||||
- Provide maximum of 5 most relevant citations.
|
||||
- Do not generate footnotes section or any text after the references.
|
||||
|
||||
5. **Reference Section Example:**
|
||||
**5. Reference Section Example:**
|
||||
```
|
||||
### References
|
||||
* [1] Document Title One
|
||||
|
|
@ -253,26 +256,26 @@ Consider the conversation history if provided to maintain conversational flow an
|
|||
* [3] Document Title Three
|
||||
```
|
||||
|
||||
6. **Additional Instructions**: {user_prompt}
|
||||
**6. Additional Instructions**: {user_prompt}
|
||||
|
||||
|
||||
---Source Data---
|
||||
Knowledge Graph and Document Chunks:
|
||||
|
||||
{context_data}
|
||||
|
||||
"""
|
||||
|
||||
PROMPTS["naive_rag_response"] = """---Role---
|
||||
|
||||
You are an expert AI assistant specializing in synthesizing information from a provided knowledge base. Your primary function is to answer user queries accurately by ONLY using the information within the provided `Source Data`.
|
||||
|
||||
---Goal---
|
||||
|
||||
Generate a comprehensive, well-structured answer to the user query.
|
||||
The answer must integrate relevant facts from the Document Chunks found in the `Source Data`.
|
||||
Consider the conversation history if provided to maintain conversational flow and avoid repeating information.
|
||||
|
||||
---Instructions---
|
||||
1. **Think Step-by-Step:**
|
||||
|
||||
**1. Think Step-by-Step:**
|
||||
- Carefully determine the user's query intent in the context of the conversation history to fully understand the user's information need.
|
||||
- Scrutinize the `Source Data`(Document Chunks). Identify and extract all pieces of information that are directly relevant to answering the user query.
|
||||
- Weave the extracted facts into a coherent and logical response. Your own knowledge must ONLY be used to formulate fluent sentences and connect ideas, NOT to introduce any external information.
|
||||
|
|
@ -280,24 +283,24 @@ Consider the conversation history if provided to maintain conversational flow an
|
|||
- Generate a reference section at the end of the response. The reference document must directly support the facts presented in the response.
|
||||
- Do not generate anything after the reference section.
|
||||
|
||||
2. **Content & Grounding:**
|
||||
**2. Content & Grounding:**
|
||||
- Strictly adhere to the provided context from the `Source Data`; DO NOT invent, assume, or infer any information not explicitly stated.
|
||||
- If the answer cannot be found in the `Source Data`, state that you do not have enough information to answer. Do not attempt to guess.
|
||||
|
||||
3. **Formatting & Language:**
|
||||
**3. Formatting & Language:**
|
||||
- The response MUST be in the same language as the user query.
|
||||
- Use Markdown for clear formatting (e.g., headings, bold, lists).
|
||||
- The response should be presented in {response_type}.
|
||||
|
||||
4. **References Section Format:**
|
||||
**4. References Section Format:**
|
||||
- The References section should be under heading: `### References`
|
||||
- Reference list entries should adhere to the format: `* [n] Document Title`. Do not include a caret (`^`) immediately after the opening square bracket (`[`).
|
||||
- Reference list entries should adhere to the format: `* [n] Document Title`. Do not include a caret (`^`) after opening square bracket (`[`).
|
||||
- The Document Title in the citation must retain its original language.
|
||||
- Output each citation on an individual line
|
||||
- Provide maximum of 5 most relevant citations.
|
||||
- Do not generate footnotes section or any text after the references.
|
||||
|
||||
5. **Reference Section Example:**
|
||||
**5. Reference Section Example:**
|
||||
```
|
||||
### References
|
||||
* [1] Document Title One
|
||||
|
|
@ -305,16 +308,55 @@ Consider the conversation history if provided to maintain conversational flow an
|
|||
* [3] Document Title Three
|
||||
```
|
||||
|
||||
6. **Additional Instructions**: {user_prompt}
|
||||
**6. Additional Instructions**: {user_prompt}
|
||||
|
||||
|
||||
---Source Data---
|
||||
|
||||
Document Chunks:
|
||||
|
||||
{content_data}
|
||||
|
||||
"""
|
||||
|
||||
PROMPTS["kg_query_context"] = """
|
||||
Entities Data From Knowledge Graph(KG):
|
||||
|
||||
```json
|
||||
{entities_str}
|
||||
```
|
||||
|
||||
Relationships Data From Knowledge Graph(KG):
|
||||
|
||||
```json
|
||||
{relations_str}
|
||||
```
|
||||
|
||||
Original Texts From Document Chunks(DC):
|
||||
|
||||
```json
|
||||
{text_chunks_str}
|
||||
```
|
||||
|
||||
Document Chunks (DC) Reference Document List: (Each entry begins with [reference_id])
|
||||
|
||||
{reference_list_str}
|
||||
|
||||
"""
|
||||
|
||||
PROMPTS["naive_query_context"] = """
|
||||
Original Texts From Document Chunks(DC):
|
||||
|
||||
```json
|
||||
{text_chunks_str}
|
||||
```
|
||||
|
||||
Document Chunks (DC) Reference Document List: (Each entry begins with [reference_id])
|
||||
|
||||
{reference_list_str}
|
||||
|
||||
"""
|
||||
|
||||
PROMPTS["keywords_extraction"] = """---Role---
|
||||
You are an expert keyword extractor, specializing in analyzing user queries for a Retrieval-Augmented Generation (RAG) system. Your purpose is to identify both high-level and low-level keywords in the user's query that will be used for effective document retrieval.
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue