diff --git a/lightrag/operate.py b/lightrag/operate.py index dc4ae1df..0f9e8edf 100644 --- a/lightrag/operate.py +++ b/lightrag/operate.py @@ -2389,14 +2389,19 @@ async def kg_query( content=context_result.context, raw_data=context_result.raw_data ) + user_prompt = f"\n\n{query_param.user_prompt}" if query_param.user_prompt else "n/a" + response_type = ( + query_param.response_type + if query_param.response_type + else "Multiple Paragraphs" + ) + # Build system prompt sys_prompt_temp = system_prompt if system_prompt else PROMPTS["rag_response"] sys_prompt = sys_prompt_temp.format( - user_prompt=f"```\n{query_param.user_prompt}\n```" - if query_param.user_prompt - else "n/a", + response_type=response_type, + user_prompt=user_prompt, context_data=context_result.context, - response_type=query_param.response_type, ) user_query = query @@ -3152,108 +3157,78 @@ async def _build_llm_context( global_config.get("max_total_tokens", DEFAULT_MAX_TOTAL_TOKENS), ) + # Get the system prompt template from PROMPTS or global_config + sys_prompt_template = global_config.get( + "system_prompt_template", PROMPTS["rag_response"] + ) + + kg_context_template = PROMPTS["kg_query_context"] + user_prompt = query_param.user_prompt if query_param.user_prompt else "" + response_type = ( + query_param.response_type + if query_param.response_type + else "Multiple Paragraphs" + ) + + entities_str = "\n".join( + json.dumps(entity, ensure_ascii=False) for entity in entities_context + ) + relations_str = "\n".join( + json.dumps(relation, ensure_ascii=False) for relation in relations_context + ) + + # Calculate preliminary kg context tokens + pre_kg_context = kg_context_template.format( + entities_str=entities_str, + relations_str=relations_str, + text_chunks_str="", + reference_list_str="", + ) + kg_context_tokens = len(tokenizer.encode(pre_kg_context)) + + # Calculate preliminary system prompt tokens + pre_sys_prompt = sys_prompt_template.format( + context_data="", # Empty for overhead calculation + response_type=response_type, + user_prompt=user_prompt, + ) + sys_prompt_tokens = len(tokenizer.encode(pre_sys_prompt)) + + # Calculate available tokens for text chunks + query_tokens = len(tokenizer.encode(query)) + buffer_tokens = 200 # reserved for reference list and safety buffer + available_chunk_tokens = max_total_tokens - ( + sys_prompt_tokens + kg_context_tokens + query_tokens + buffer_tokens + ) + + logger.debug( + f"Token allocation - Total: {max_total_tokens}, SysPrompt: {sys_prompt_tokens}, Query: {query_tokens}, KG: {kg_context_tokens}, Buffer: {buffer_tokens}, Available for chunks: {available_chunk_tokens}" + ) + + # Apply token truncation to chunks using the dynamic limit + truncated_chunks = await process_chunks_unified( + query=query, + unique_chunks=merged_chunks, + query_param=query_param, + global_config=global_config, + source_type=query_param.mode, + chunk_token_limit=available_chunk_tokens, # Pass dynamic limit + ) + + # Generate reference list from truncated chunks using the new common function + reference_list, truncated_chunks = generate_reference_list_from_chunks( + truncated_chunks + ) + + # Rebuild text_units_context with truncated chunks + # The actual tokens may be slightly less than available_chunk_tokens due to deduplication logic text_units_context = [] - truncated_chunks = [] - - if merged_chunks: - # Calculate dynamic token limit for text chunks - entities_str = "\n".join( - json.dumps(entity, ensure_ascii=False) for entity in entities_context - ) - relations_str = "\n".join( - json.dumps(relation, ensure_ascii=False) for relation in relations_context - ) - - # Calculate base context tokens (entities + relations + template) - kg_context_template = """-----Entities(KG)----- - -```json -{entities_str} -``` - ------Relationships(KG)----- - -```json -{relations_str} -``` - ------Document Chunks(DC)----- - -```json -``` - ------Refrence Document List----- - -The reference documents list in Document Chunks(DC) is as follows (reference_id in square brackets): - -""" - kg_context = kg_context_template.format( - entities_str=entities_str, relations_str=relations_str - ) - kg_context_tokens = len(tokenizer.encode(kg_context)) - - # Calculate system prompt template overhead - user_prompt = query_param.user_prompt if query_param.user_prompt else "" - response_type = ( - query_param.response_type - if query_param.response_type - else "Multiple Paragraphs" - ) - - # Get the system prompt template from PROMPTS or global_config - sys_prompt_template = global_config.get( - "system_prompt_template", PROMPTS["rag_response"] - ) - - # Create sample system prompt for overhead calculation - sample_sys_prompt = sys_prompt_template.format( - context_data="", # Empty for overhead calculation - response_type=response_type, - user_prompt=user_prompt, - ) - sys_prompt_template_tokens = len(tokenizer.encode(sample_sys_prompt)) - - # Total system prompt overhead = template + query tokens - query_tokens = len(tokenizer.encode(query)) - sys_prompt_overhead = sys_prompt_template_tokens + query_tokens - - buffer_tokens = 100 # Safety buffer as requested - - # Calculate available tokens for text chunks - used_tokens = kg_context_tokens + sys_prompt_overhead + buffer_tokens - available_chunk_tokens = max_total_tokens - used_tokens - - logger.debug( - f"Token allocation - Total: {max_total_tokens}, SysPrompt: {sys_prompt_overhead}, KG: {kg_context_tokens}, Buffer: {buffer_tokens}, Available for chunks: {available_chunk_tokens}" - ) - - # Apply token truncation to chunks using the dynamic limit - truncated_chunks = await process_chunks_unified( - query=query, - unique_chunks=merged_chunks, - query_param=query_param, - global_config=global_config, - source_type=query_param.mode, - chunk_token_limit=available_chunk_tokens, # Pass dynamic limit - ) - - # Generate reference list from truncated chunks using the new common function - reference_list, truncated_chunks = generate_reference_list_from_chunks( - truncated_chunks - ) - - # Rebuild text_units_context with truncated chunks - # The actual tokens may be slightly less than available_chunk_tokens due to deduplication logic - for i, chunk in enumerate(truncated_chunks): - text_units_context.append( - { - "reference_id": chunk["reference_id"], - "content": chunk["content"], - } - ) - - logger.debug( - f"Final chunk processing: {len(merged_chunks)} -> {len(text_units_context)} (chunk available tokens: {available_chunk_tokens})" + for i, chunk in enumerate(truncated_chunks): + text_units_context.append( + { + "reference_id": chunk["reference_id"], + "content": chunk["content"], + } ) logger.info( @@ -3292,12 +3267,6 @@ The reference documents list in Document Chunks(DC) is as follows (reference_id if chunk_tracking_log: logger.info(f"chunks S+F/O: {' '.join(chunk_tracking_log)}") - entities_str = "\n".join( - json.dumps(entity, ensure_ascii=False) for entity in entities_context - ) - relations_str = "\n".join( - json.dumps(relation, ensure_ascii=False) for relation in relations_context - ) text_units_str = "\n".join( json.dumps(text_unit, ensure_ascii=False) for text_unit in text_units_context ) @@ -3307,31 +3276,12 @@ The reference documents list in Document Chunks(DC) is as follows (reference_id if ref["reference_id"] ) - result = f"""-----Entities(KG)----- - -```json -{entities_str} -``` - ------Relationships(KG)----- - -```json -{relations_str} -``` - ------Document Chunks(DC)----- - -```json -{text_units_str} -``` - ------Refrence Document List----- - -Document Chunks (DC) reference documents : (Each entry begins with [reference_id]) - -{reference_list_str} - -""" + result = kg_context_template.format( + entities_str=entities_str, + relations_str=relations_str, + text_chunks_str=text_units_str, + reference_list_str=reference_list_str, + ) # Always return both context and complete data structure (unified approach) logger.debug( @@ -3416,11 +3366,7 @@ async def _build_query_context( query_embedding=search_result["query_embedding"], ) - if ( - not merged_chunks - and not truncation_result["entities_context"] - and not truncation_result["relations_context"] - ): + if not merged_chunks: return None # Stage 4: Build final LLM context with dynamic token processing @@ -4156,7 +4102,7 @@ async def naive_query( ) # Calculate system prompt template tokens (excluding content_data) - user_prompt = query_param.user_prompt if query_param.user_prompt else "" + user_prompt = f"\n\n{query_param.user_prompt}" if query_param.user_prompt else "n/a" response_type = ( query_param.response_type if query_param.response_type @@ -4168,26 +4114,23 @@ async def naive_query( system_prompt if system_prompt else PROMPTS["naive_rag_response"] ) - # Create a sample system prompt with empty content_data to calculate overhead - sample_sys_prompt = sys_prompt_template.format( - content_data="", # Empty for overhead calculation + # Create a preliminary system prompt with empty content_data to calculate overhead + pre_sys_prompt = sys_prompt_template.format( response_type=response_type, user_prompt=user_prompt, + content_data="", # Empty for overhead calculation ) - sys_prompt_template_tokens = len(tokenizer.encode(sample_sys_prompt)) - - # Total system prompt overhead = template + query tokens - query_tokens = len(tokenizer.encode(query)) - sys_prompt_overhead = sys_prompt_template_tokens + query_tokens - - buffer_tokens = 100 # Safety buffer # Calculate available tokens for chunks - used_tokens = sys_prompt_overhead + buffer_tokens - available_chunk_tokens = max_total_tokens - used_tokens + sys_prompt_tokens = len(tokenizer.encode(pre_sys_prompt)) + query_tokens = len(tokenizer.encode(query)) + buffer_tokens = 200 # reserved for reference list and safety buffer + available_chunk_tokens = max_total_tokens - ( + sys_prompt_tokens + query_tokens + buffer_tokens + ) logger.debug( - f"Naive query token allocation - Total: {max_total_tokens}, SysPrompt: {sys_prompt_overhead}, Buffer: {buffer_tokens}, Available for chunks: {available_chunk_tokens}" + f"Naive query token allocation - Total: {max_total_tokens}, SysPrompt: {sys_prompt_tokens}, Query: {query_tokens}, Buffer: {buffer_tokens}, Available for chunks: {available_chunk_tokens}" ) # Process chunks using unified processing with dynamic token limit @@ -4247,29 +4190,19 @@ async def naive_query( if ref["reference_id"] ) - context_content = f""" ----Document Chunks(DC)--- - -```json -{text_units_str} -``` - ------Refrence Document List----- - -{reference_list_str} - -""" + naive_context_template = PROMPTS["naive_query_context"] + context_content = naive_context_template.format( + text_chunks_str=text_units_str, + reference_list_str=reference_list_str, + ) if query_param.only_need_context and not query_param.only_need_prompt: return QueryResult(content=context_content, raw_data=raw_data) - sys_prompt_temp = system_prompt if system_prompt else PROMPTS["naive_rag_response"] - sys_prompt = sys_prompt_temp.format( - user_prompt=f"```\n{query_param.user_prompt}\n```" - if query_param.user_prompt - else "n/a", - content_data=text_units_str, + sys_prompt = sys_prompt_template.format( response_type=query_param.response_type, + user_prompt=user_prompt, + content_data=context_content, ) user_query = query diff --git a/lightrag/prompt.py b/lightrag/prompt.py index c912f340..5ce46425 100644 --- a/lightrag/prompt.py +++ b/lightrag/prompt.py @@ -212,15 +212,18 @@ PROMPTS["fail_response"] = ( ) PROMPTS["rag_response"] = """---Role--- + You are an expert AI assistant specializing in synthesizing information from a provided knowledge base. Your primary function is to answer user queries accurately by ONLY using the information within the provided `Source Data`. ---Goal--- + Generate a comprehensive, well-structured answer to the user query. The answer must integrate relevant facts from the Knowledge Graph and Document Chunks found in the `Source Data`. Consider the conversation history if provided to maintain conversational flow and avoid repeating information. ---Instructions--- -1. **Step-by-Step Instruction:** + +**1. Step-by-Step Instruction:** - Carefully determine the user's query intent in the context of the conversation history to fully understand the user's information need. - Scrutinize the `Source Data`(both Knowledge Graph and Document Chunks). Identify and extract all pieces of information that are directly relevant to answering the user query. - Weave the extracted facts into a coherent and logical response. Your own knowledge must ONLY be used to formulate fluent sentences and connect ideas, NOT to introduce any external information. @@ -228,24 +231,24 @@ Consider the conversation history if provided to maintain conversational flow an - Generate a reference section at the end of the response. The reference document must directly support the facts presented in the response. - Do not generate anything after the reference section. -2. **Content & Grounding:** +**2. Content & Grounding:** - Strictly adhere to the provided context from the `Source Data`; DO NOT invent, assume, or infer any information not explicitly stated. - If the answer cannot be found in the `Source Data`, state that you do not have enough information to answer. Do not attempt to guess. -3. **Formatting & Language:** +**3. Formatting & Language:** - The response MUST be in the same language as the user query. - Use Markdown for clear formatting (e.g., headings, bold, lists). - The response should be presented in {response_type}. -4. **References Section Format:** +**4. References Section Format:** - The References section should be under heading: `### References` - - Reference list entries should adhere to the format: `* [n] Document Title`. Do not include a caret (`^`) immediately after the opening square bracket (`[`). + - Reference list entries should adhere to the format: `* [n] Document Title`. Do not include a caret (`^`) after opening square bracket (`[`). - The Document Title in the citation must retain its original language. - Output each citation on an individual line - Provide maximum of 5 most relevant citations. - Do not generate footnotes section or any text after the references. -5. **Reference Section Example:** +**5. Reference Section Example:** ``` ### References * [1] Document Title One @@ -253,26 +256,26 @@ Consider the conversation history if provided to maintain conversational flow an * [3] Document Title Three ``` -6. **Additional Instructions**: {user_prompt} +**6. Additional Instructions**: {user_prompt} ---Source Data--- -Knowledge Graph and Document Chunks: - {context_data} - """ PROMPTS["naive_rag_response"] = """---Role--- + You are an expert AI assistant specializing in synthesizing information from a provided knowledge base. Your primary function is to answer user queries accurately by ONLY using the information within the provided `Source Data`. ---Goal--- + Generate a comprehensive, well-structured answer to the user query. The answer must integrate relevant facts from the Document Chunks found in the `Source Data`. Consider the conversation history if provided to maintain conversational flow and avoid repeating information. ---Instructions--- -1. **Think Step-by-Step:** + +**1. Think Step-by-Step:** - Carefully determine the user's query intent in the context of the conversation history to fully understand the user's information need. - Scrutinize the `Source Data`(Document Chunks). Identify and extract all pieces of information that are directly relevant to answering the user query. - Weave the extracted facts into a coherent and logical response. Your own knowledge must ONLY be used to formulate fluent sentences and connect ideas, NOT to introduce any external information. @@ -280,24 +283,24 @@ Consider the conversation history if provided to maintain conversational flow an - Generate a reference section at the end of the response. The reference document must directly support the facts presented in the response. - Do not generate anything after the reference section. -2. **Content & Grounding:** +**2. Content & Grounding:** - Strictly adhere to the provided context from the `Source Data`; DO NOT invent, assume, or infer any information not explicitly stated. - If the answer cannot be found in the `Source Data`, state that you do not have enough information to answer. Do not attempt to guess. -3. **Formatting & Language:** +**3. Formatting & Language:** - The response MUST be in the same language as the user query. - Use Markdown for clear formatting (e.g., headings, bold, lists). - The response should be presented in {response_type}. -4. **References Section Format:** +**4. References Section Format:** - The References section should be under heading: `### References` - - Reference list entries should adhere to the format: `* [n] Document Title`. Do not include a caret (`^`) immediately after the opening square bracket (`[`). + - Reference list entries should adhere to the format: `* [n] Document Title`. Do not include a caret (`^`) after opening square bracket (`[`). - The Document Title in the citation must retain its original language. - Output each citation on an individual line - Provide maximum of 5 most relevant citations. - Do not generate footnotes section or any text after the references. -5. **Reference Section Example:** +**5. Reference Section Example:** ``` ### References * [1] Document Title One @@ -305,16 +308,55 @@ Consider the conversation history if provided to maintain conversational flow an * [3] Document Title Three ``` -6. **Additional Instructions**: {user_prompt} +**6. Additional Instructions**: {user_prompt} ---Source Data--- + Document Chunks: {content_data} """ +PROMPTS["kg_query_context"] = """ +Entities Data From Knowledge Graph(KG): + +```json +{entities_str} +``` + +Relationships Data From Knowledge Graph(KG): + +```json +{relations_str} +``` + +Original Texts From Document Chunks(DC): + +```json +{text_chunks_str} +``` + +Document Chunks (DC) Reference Document List: (Each entry begins with [reference_id]) + +{reference_list_str} + +""" + +PROMPTS["naive_query_context"] = """ +Original Texts From Document Chunks(DC): + +```json +{text_chunks_str} +``` + +Document Chunks (DC) Reference Document List: (Each entry begins with [reference_id]) + +{reference_list_str} + +""" + PROMPTS["keywords_extraction"] = """---Role--- You are an expert keyword extractor, specializing in analyzing user queries for a Retrieval-Augmented Generation (RAG) system. Your purpose is to identify both high-level and low-level keywords in the user's query that will be used for effective document retrieval.