From 1bdd906753aabd835fe501e23e8c850336e16b00 Mon Sep 17 00:00:00 2001
From: clssck <foldvarszki@gmail.com>
Date: Mon, 1 Dec 2025 21:02:44 +0100
Subject: [PATCH] chore(lightrag): remove legacy prompts and clean up prompt.py

Remove unused LLM-generated citation prompts that were kept for backward
compatibility but never referenced in codebase. Consolidate duplicate
instructions in entity summarization prompt and fix minor typos.

- Remove rag_response_with_llm_citations prompt (dead code)
- Remove naive_rag_response_with_llm_citations prompt (dead code)
- Remove unused cite_ready_* backward compatibility aliases
- Consolidate duplicate context/objectivity instructions in summarize prompt
- Fix typo in example (extra parenthesis)
- Clarify delimiter documentation comment
---
 lightrag/prompt.py | 141 +++------------------------------------------
 1 file changed, 9 insertions(+), 132 deletions(-)

diff --git a/lightrag/prompt.py b/lightrag/prompt.py
index 2044caad..a3424a3c 100644
--- a/lightrag/prompt.py
+++ b/lightrag/prompt.py
@@ -4,7 +4,7 @@ from typing import Any
 
 PROMPTS: dict[str, Any] = {}
 
-# All delimiters must be formatted as "<|UPPER_CASE_STRING|>"
+# All delimiters must be formatted as "<|TOKEN|>" style markers (e.g., "<|#|>" or "<|COMPLETE|>")
 PROMPTS["DEFAULT_TUPLE_DELIMITER"] = "<|#|>"
 PROMPTS["DEFAULT_COMPLETION_DELIMITER"] = "<|COMPLETE|>"
 
@@ -16,7 +16,7 @@ You are a Knowledge Graph Specialist responsible for extracting entities and rel
     *   **Identification:** Identify clearly defined and meaningful entities in the input text.
     *   **Entity Details:** For each identified entity, extract the following information:
         *   `entity_name`: The name of the entity. If the entity name is case-insensitive, capitalize the first letter of each significant word (title case). Ensure **consistent naming** across the entire extraction process.
-        *   `entity_type`: Categorize the entity using one of the following types: `{entity_types}`. If none of the provided entity types apply, do not add new entity type and classify it as `Other`.
+        *   `entity_type`: Categorize the entity using one of the following types: `{entity_types}`. If none of the provided types apply, do not invent a new type; classify it as `Other`.
         *   `entity_description`: Provide a concise yet comprehensive description of the entity's attributes and activities, based *solely* on the information present in the input text.
     *   **Output Format - Entities:** Output a total of 4 fields for each entity, delimited by `{tuple_delimiter}`, on a single line. The first field *must* be the literal string `entity`.
         *   Format: `entity{tuple_delimiter}entity_name{tuple_delimiter}entity_type{tuple_delimiter}entity_description`
@@ -131,7 +131,7 @@ entity{tuple_delimiter}Jordan{tuple_delimiter}person{tuple_delimiter}Jordan shar
 entity{tuple_delimiter}Cruz{tuple_delimiter}person{tuple_delimiter}Cruz is associated with a vision of control and order, influencing the dynamics among other characters.
 entity{tuple_delimiter}The Device{tuple_delimiter}equipment{tuple_delimiter}The Device is central to the story, with potential game-changing implications, and is revered by Taylor.
 relation{tuple_delimiter}Alex{tuple_delimiter}Taylor{tuple_delimiter}power dynamics, observation{tuple_delimiter}Alex observes Taylor's authoritarian behavior and notes changes in Taylor's attitude toward the device.
-relation{tuple_delimiter}Alex{tuple_delimiter}Jordan{tuple_delimiter}shared goals, rebellion{tuple_delimiter}Alex and Jordan share a commitment to discovery, which contrasts with Cruz's vision.)
+relation{tuple_delimiter}Alex{tuple_delimiter}Jordan{tuple_delimiter}shared goals, rebellion{tuple_delimiter}Alex and Jordan share a commitment to discovery, which contrasts with Cruz's vision.
 relation{tuple_delimiter}Taylor{tuple_delimiter}Jordan{tuple_delimiter}conflict resolution, mutual respect{tuple_delimiter}Taylor and Jordan interact directly regarding the device, leading to a moment of mutual respect and an uneasy truce.
 relation{tuple_delimiter}Jordan{tuple_delimiter}Cruz{tuple_delimiter}ideological conflict, rebellion{tuple_delimiter}Jordan's commitment to discovery is in rebellion against Cruz's vision of control and order.
 relation{tuple_delimiter}Taylor{tuple_delimiter}The Device{tuple_delimiter}reverence, technological significance{tuple_delimiter}Taylor shows reverence towards the device, indicating its importance and potential impact.
@@ -199,18 +199,13 @@ Your task is to synthesize a list of descriptions of a given entity or relation
 1. Input Format: The description list is provided in JSON format. Each JSON object (representing a single description) appears on a new line within the `Description List` section.
 2. Output Format: The merged description will be returned as plain text, presented in multiple paragraphs, without any additional formatting or extraneous comments before or after the summary.
 3. Comprehensiveness: The summary must integrate all key information from *every* provided description. Do not omit any important facts or details.
-4. Context: Ensure the summary is written from an objective, third-person perspective; explicitly mention the name of the entity or relation for full clarity and context.
-5. Context & Objectivity:
-  - Write the summary from an objective, third-person perspective.
-  - Explicitly mention the full name of the entity or relation at the beginning of the summary to ensure immediate clarity and context.
-6. Conflict Handling:
+4. Clarity: Write from an objective, third-person perspective and explicitly mention the full name of the entity or relation at the beginning for immediate context.
+5. Conflict Handling:
   - In cases of conflicting or inconsistent descriptions, first determine if these conflicts arise from multiple, distinct entities or relationships that share the same name.
   - If distinct entities/relations are identified, summarize each one *separately* within the overall output.
   - If conflicts within a single entity/relation (e.g., historical discrepancies) exist, attempt to reconcile them or present both viewpoints with noted uncertainty.
-7. Length Constraint:The summary's total length must not exceed {summary_length} tokens, while still maintaining depth and completeness.
-8. Language: The entire output must be written in {language}. Proper nouns (e.g., personal names, place names, organization names) may in their original language if proper translation is not available.
-  - The entire output must be written in {language}.
-  - Proper nouns (e.g., personal names, place names, organization names) should be retained in their original language if a proper, widely accepted translation is not available or would cause ambiguity.
+6. Length Constraint: The summary's total length must not exceed {summary_length} tokens while still maintaining depth and completeness.
+7. Language: Write the entire output in {language}. Retain proper nouns (e.g., personal names, place names, organization names) in their original language if a clear, widely accepted translation is unavailable.
 
 ---Input---
 {description_type} Name: {description_name}
@@ -263,63 +258,6 @@ STRICT GROUNDING:
 - Entity summaries for overview, Source Excerpts for precision
 """
 
-# Legacy prompt with LLM-generated citations (for backward compatibility)
-PROMPTS["rag_response_with_llm_citations"] = """---Role---
-
-You are an expert AI assistant specializing in synthesizing information from a provided knowledge base. Your primary function is to answer user queries accurately by ONLY using the information within the provided **Context**.
-
----Goal---
-
-Generate a comprehensive, well-structured answer to the user query.
-The answer must integrate relevant facts from the Knowledge Graph and Document Chunks found in the **Context**.
-Consider the conversation history if provided to maintain conversational flow and avoid repeating information.
-
----Instructions---
-
-1. Step-by-Step Instruction:
-  - Carefully determine the user's query intent in the context of the conversation history to fully understand the user's information need.
-  - Scrutinize both `Knowledge Graph Data` and `Document Chunks` in the **Context**. Identify and extract all pieces of information that are directly relevant to answering the user query.
-  - Weave the extracted facts into a coherent and logical response. Your own knowledge must ONLY be used to formulate fluent sentences and connect ideas, NOT to introduce any external information.
-  - Track the reference_id of the document chunk which directly support the facts presented in the response. Correlate reference_id with the entries in the `Reference Document List` to generate the appropriate citations.
-  - Generate a references section at the end of the response. Each reference document must directly support the facts presented in the response.
-  - Do not generate anything after the reference section.
-
-2. Content & Grounding:
-  - Strictly adhere to the provided context from the **Context**; DO NOT invent, assume, or infer any information not explicitly stated.
-  - If the answer cannot be found in the **Context**, state that you do not have enough information to answer. Do not attempt to guess.
-  - CRITICAL FOR FACTS: When stating specific facts (dates, numbers, names, statistics), you MUST verify each fact appears EXACTLY in the provided context. If a specific date or number is not explicitly stated in the context, say "the exact [year/number/date] is not specified in the available information" rather than guessing.
-  - When the question asks "which" or "who" or "how many", provide ONLY the direct answer with facts from context. Do not elaborate with information not explicitly in the context.
-
-3. Formatting & Language:
-  - The response MUST be in the same language as the user query.
-  - The response MUST utilize Markdown formatting for enhanced clarity and structure (e.g., headings, bold text, bullet points).
-  - The response should be presented in {response_type}.
-
-4. References Section Format:
-  - The References section should be under heading: `### References`
-  - Reference list entries should adhere to the format: `* [n] Document Title`. Do not include a caret (`^`) after opening square bracket (`[`).
-  - The Document Title in the citation must retain its original language.
-  - Output each citation on an individual line
-  - Provide maximum of 5 most relevant citations.
-  - Do not generate footnotes section or any comment, summary, or explanation after the references.
-
-5. Reference Section Example:
-```
-### References
-
-- [1] Document Title One
-- [2] Document Title Two
-- [3] Document Title Three
-```
-
-6. Additional Instructions: {user_prompt}
-
-
----Context---
-
-{context_data}
-"""
-
 # Default naive RAG response prompt - cite-ready (no LLM-generated citations)
 PROMPTS["naive_rag_response"] = """---Role---
 
@@ -356,67 +294,6 @@ Generate a comprehensive, well-structured answer to the user query using ONLY in
 {content_data}
 """
 
-# Legacy naive RAG prompt with LLM-generated citations (for backward compatibility)
-PROMPTS["naive_rag_response_with_llm_citations"] = """---Role---
-
-You are an expert AI assistant specializing in synthesizing information from a provided knowledge base. Your primary function is to answer user queries accurately by ONLY using the information within the provided **Context**.
-
----Goal---
-
-Generate a comprehensive, well-structured answer to the user query.
-The answer must integrate relevant facts from the Document Chunks found in the **Context**.
-Consider the conversation history if provided to maintain conversational flow and avoid repeating information.
-
----Instructions---
-
-1. Step-by-Step Instruction:
-  - Carefully determine the user's query intent in the context of the conversation history to fully understand the user's information need.
-  - Scrutinize `Document Chunks` in the **Context**. Identify and extract all pieces of information that are directly relevant to answering the user query.
-  - Weave the extracted facts into a coherent and logical response. Your own knowledge must ONLY be used to formulate fluent sentences and connect ideas, NOT to introduce any external information.
-  - Track the reference_id of the document chunk which directly support the facts presented in the response. Correlate reference_id with the entries in the `Reference Document List` to generate the appropriate citations.
-  - Generate a **References** section at the end of the response. Each reference document must directly support the facts presented in the response.
-  - Do not generate anything after the reference section.
-
-2. Content & Grounding:
-  - Strictly adhere to the provided context from the **Context**; DO NOT invent, assume, or infer any information not explicitly stated.
-  - If the answer cannot be found in the **Context**, state that you do not have enough information to answer. Do not attempt to guess.
-  - CRITICAL FOR FACTS: When stating specific facts (dates, numbers, names, statistics), you MUST verify each fact appears EXACTLY in the provided context. If a specific date or number is not explicitly stated in the context, say "the exact [year/number/date] is not specified in the available information" rather than guessing.
-  - When the question asks "which" or "who" or "how many", provide ONLY the direct answer with facts from context. Do not elaborate with information not explicitly in the context.
-
-3. Formatting & Language:
-  - The response MUST be in the same language as the user query.
-  - The response MUST utilize Markdown formatting for enhanced clarity and structure (e.g., headings, bold text, bullet points).
-  - The response should be presented in {response_type}.
-
-4. References Section Format:
-  - The References section should be under heading: `### References`
-  - Reference list entries should adhere to the format: `* [n] Document Title`. Do not include a caret (`^`) after opening square bracket (`[`).
-  - The Document Title in the citation must retain its original language.
-  - Output each citation on an individual line
-  - Provide maximum of 5 most relevant citations.
-  - Do not generate footnotes section or any comment, summary, or explanation after the references.
-
-5. Reference Section Example:
-```
-### References
-
-- [1] Document Title One
-- [2] Document Title Two
-- [3] Document Title Three
-```
-
-6. Additional Instructions: {user_prompt}
-
-
----Context---
-
-{content_data}
-"""
-
-# Backward compatibility aliases - the default prompts are now cite-ready
-PROMPTS["cite_ready_rag_response"] = PROMPTS["rag_response"]
-PROMPTS["cite_ready_naive_rag_response"] = PROMPTS["naive_rag_response"]
-
 PROMPTS["kg_query_context"] = """
 ## Entity Summaries (use for definitions and general facts)
 
@@ -442,7 +319,7 @@ PROMPTS["kg_query_context"] = """
 """
 
 PROMPTS["naive_query_context"] = """
-Document Chunks (Each entry has a reference_id refer to the `Reference Document List`):
+Document Chunks (Each entry includes a reference_id that refers to the `Reference Document List`):
 
 ```json
 {text_chunks_str}
@@ -466,7 +343,7 @@ Given a user query, your task is to extract two distinct types of keywords:
 
 ---Instructions & Constraints---
 1. **Output Format**: Your output MUST be a valid JSON object and nothing else. Do not include any explanatory text, markdown code fences (like ```json), or any other text before or after the JSON. It will be parsed directly by a JSON parser.
-2. **Source of Truth**: All keywords must be explicitly derived from the user query, with both high-level and low-level keyword categories are required to contain content.
+2. **Source of Truth**: Derive all keywords explicitly from the user query. Populate both keyword lists when the query contains meaningful content; if the query is trivial or nonsensical, return empty lists (see edge cases).
 3. **Concise & Meaningful**: Keywords should be concise words or meaningful phrases. Prioritize multi-word phrases when they represent a single concept. For example, from "latest financial report of Apple Inc.", you should extract "latest financial report" and "Apple Inc." rather than "latest", "financial", "report", and "Apple".
 4. **Handle Edge Cases**: For queries that are too simple, vague, or nonsensical (e.g., "hello", "ok", "asdfghjkl"), you must return a JSON object with empty lists for both keyword types.