From 62aa0726a5144b90bbae957f0e14e298e16f93b7 Mon Sep 17 00:00:00 2001 From: yangdx Date: Sun, 21 Sep 2025 23:51:19 +0800 Subject: [PATCH 01/10] Fix conversation history handling when history_turns is 0 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit • Check history_turns > 0 before filtering • Prevent sending all history converstion to backend when history_turns is 0 --- lightrag_webui/src/features/RetrievalTesting.tsx | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/lightrag_webui/src/features/RetrievalTesting.tsx b/lightrag_webui/src/features/RetrievalTesting.tsx index c9781a41..20440332 100644 --- a/lightrag_webui/src/features/RetrievalTesting.tsx +++ b/lightrag_webui/src/features/RetrievalTesting.tsx @@ -261,10 +261,12 @@ export default function RetrievalTesting() { const queryParams = { ...state.querySettings, query: actualQuery, - conversation_history: prevMessages - .filter((m) => m.isError !== true) - .slice(-(state.querySettings.history_turns || 0) * 2) - .map((m) => ({ role: m.role, content: m.content })), + conversation_history: (state.querySettings.history_turns || 0) > 0 + ? prevMessages + .filter((m) => m.isError !== true) + .slice(-(state.querySettings.history_turns || 0) * 2) + .map((m) => ({ role: m.role, content: m.content })) + : [], ...(modeOverride ? { mode: modeOverride } : {}) } From cff60295083c6a5f449755cf9d2869199d69a6dd Mon Sep 17 00:00:00 2001 From: yangdx Date: Mon, 22 Sep 2025 00:09:27 +0800 Subject: [PATCH 02/10] Ensure COT tags are properly closed in all stream termination scenarios - Add COT closure after stream completion - Handle COT in exception scenarios - Add final safety check in finally block - Prevent unclosed thinking tags - Log COT closure failures --- lightrag/llm/openai.py | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/lightrag/llm/openai.py b/lightrag/llm/openai.py index 6d486afc..9f0708ca 100644 --- a/lightrag/llm/openai.py +++ b/lightrag/llm/openai.py @@ -317,6 +317,11 @@ async def openai_complete_if_cache( if content is None and reasoning_content is None: continue + # Ensure COT is properly closed if still active after stream ends + if enable_cot and cot_active: + yield "" + cot_active = False + # After streaming is complete, track token usage if token_tracker and final_chunk_usage: # Use actual usage from the API @@ -332,6 +337,16 @@ async def openai_complete_if_cache( elif token_tracker: logger.debug("No usage information available in streaming response") except Exception as e: + # Ensure COT is properly closed before handling exception + if enable_cot and cot_active: + try: + yield "" + cot_active = False + except Exception as close_error: + logger.warning( + f"Failed to close COT tag during exception handling: {close_error}" + ) + logger.error(f"Error in stream response: {str(e)}") # Try to clean up resources if possible if ( @@ -350,6 +365,16 @@ async def openai_complete_if_cache( await openai_async_client.close() raise finally: + # Final safety check for unclosed COT tags + if enable_cot and cot_active: + try: + yield "" + cot_active = False + except Exception as final_close_error: + logger.warning( + f"Failed to close COT tag in finally block: {final_close_error}" + ) + # Ensure resources are released even if no exception occurs if ( iteration_started From 9288ae17036d25d7e46472360b191811f5113c17 Mon Sep 17 00:00:00 2001 From: yangdx Date: Mon, 22 Sep 2025 01:01:39 +0800 Subject: [PATCH 03/10] Refactor COT parsing to handle multiple think blocks robustly --- .../src/features/RetrievalTesting.tsx | 131 +++++++++++++----- 1 file changed, 96 insertions(+), 35 deletions(-) diff --git a/lightrag_webui/src/features/RetrievalTesting.tsx b/lightrag_webui/src/features/RetrievalTesting.tsx index 20440332..0031c954 100644 --- a/lightrag_webui/src/features/RetrievalTesting.tsx +++ b/lightrag_webui/src/features/RetrievalTesting.tsx @@ -22,6 +22,66 @@ const generateUniqueId = () => { return `id-${Date.now()}-${Math.random().toString(36).substring(2, 9)}`; }; +// Robust COT parsing function to handle multiple think blocks and edge cases +const parseCOTContent = (content: string) => { + const thinkStartTag = '' + const thinkEndTag = '' + + // Find all and tag positions + const startMatches: number[] = [] + const endMatches: number[] = [] + + let startIndex = 0 + while ((startIndex = content.indexOf(thinkStartTag, startIndex)) !== -1) { + startMatches.push(startIndex) + startIndex += thinkStartTag.length + } + + let endIndex = 0 + while ((endIndex = content.indexOf(thinkEndTag, endIndex)) !== -1) { + endMatches.push(endIndex) + endIndex += thinkEndTag.length + } + + // Analyze COT state + const hasThinkStart = startMatches.length > 0 + const hasThinkEnd = endMatches.length > 0 + const isThinking = hasThinkStart && (startMatches.length > endMatches.length) + + let thinkingContent = '' + let displayContent = content + + if (hasThinkStart) { + if (hasThinkEnd && startMatches.length === endMatches.length) { + // Complete thinking blocks: extract the last complete thinking content + const lastStartIndex = startMatches[startMatches.length - 1] + const lastEndIndex = endMatches[endMatches.length - 1] + + if (lastEndIndex > lastStartIndex) { + thinkingContent = content.substring( + lastStartIndex + thinkStartTag.length, + lastEndIndex + ).trim() + + // Remove all thinking blocks, keep only the final display content + displayContent = content.substring(lastEndIndex + thinkEndTag.length).trim() + } + } else if (isThinking) { + // Currently thinking: extract current thinking content + const lastStartIndex = startMatches[startMatches.length - 1] + thinkingContent = content.substring(lastStartIndex + thinkStartTag.length) + displayContent = '' + } + } + + return { + isThinking, + thinkingContent, + displayContent, + hasValidThinkBlock: hasThinkStart && hasThinkEnd && startMatches.length === endMatches.length + } +} + export default function RetrievalTesting() { const { t } = useTranslation() const [messages, setMessages] = useState(() => { @@ -178,40 +238,28 @@ export default function RetrievalTesting() { thinkingStartTime.current = Date.now() } - // Real-time parsing for streaming - const thinkStartTag = '' - const thinkEndTag = '' - const thinkStartIndex = assistantMessage.content.indexOf(thinkStartTag) - const thinkEndIndex = assistantMessage.content.indexOf(thinkEndTag) + // Use the new robust COT parsing function + const cotResult = parseCOTContent(assistantMessage.content) - if (thinkStartIndex !== -1) { - if (thinkEndIndex !== -1) { - // Thinking has finished for this chunk - assistantMessage.isThinking = false + // Update thinking state + assistantMessage.isThinking = cotResult.isThinking - // Only calculate time and extract thinking content once - if (!thinkingProcessed.current) { - if (thinkingStartTime.current && !assistantMessage.thinkingTime) { - const duration = (Date.now() - thinkingStartTime.current) / 1000 - assistantMessage.thinkingTime = parseFloat(duration.toFixed(2)) - } - assistantMessage.thinkingContent = assistantMessage.content - .substring(thinkStartIndex + thinkStartTag.length, thinkEndIndex) - .trim() - thinkingProcessed.current = true - } - - // Always update display content as content after may grow - assistantMessage.displayContent = assistantMessage.content.substring(thinkEndIndex + thinkEndTag.length).trim() - } else { - // Still thinking - update thinking content in real-time - assistantMessage.isThinking = true - assistantMessage.thinkingContent = assistantMessage.content.substring(thinkStartIndex + thinkStartTag.length) - assistantMessage.displayContent = '' + // Only calculate time and extract thinking content once when thinking is complete + if (cotResult.hasValidThinkBlock && !thinkingProcessed.current) { + if (thinkingStartTime.current && !assistantMessage.thinkingTime) { + const duration = (Date.now() - thinkingStartTime.current) / 1000 + assistantMessage.thinkingTime = parseFloat(duration.toFixed(2)) } + thinkingProcessed.current = true + } + + // Update content based on parsing results + assistantMessage.thinkingContent = cotResult.thinkingContent + // Only fallback to full content if not in a thinking state. + if (cotResult.isThinking) { + assistantMessage.displayContent = '' } else { - assistantMessage.isThinking = false - assistantMessage.displayContent = assistantMessage.content + assistantMessage.displayContent = cotResult.displayContent || assistantMessage.content } // Detect if the assistant message contains a complete mermaid code block @@ -297,17 +345,30 @@ export default function RetrievalTesting() { // Enhanced cleanup with error handling to prevent memory leaks try { - // Final calculation for thinking time, only if not already calculated - if (assistantMessage.thinkingContent && thinkingStartTime.current && !assistantMessage.thinkingTime) { + // Final COT state validation and cleanup + const finalCotResult = parseCOTContent(assistantMessage.content) + + // Force set final state - stream ended so thinking must be false + assistantMessage.isThinking = false + + // If we have a complete thinking block but time wasn't calculated, do final calculation + if (finalCotResult.hasValidThinkBlock && thinkingStartTime.current && !assistantMessage.thinkingTime) { const duration = (Date.now() - thinkingStartTime.current) / 1000 assistantMessage.thinkingTime = parseFloat(duration.toFixed(2)) } + + // Ensure display content is correctly set based on final parsing + if (finalCotResult.displayContent !== undefined) { + assistantMessage.displayContent = finalCotResult.displayContent + } + } catch (error) { - console.error('Error calculating thinking time:', error) + console.error('Error in final COT state validation:', error) + // Force reset state on error + assistantMessage.isThinking = false } finally { // Ensure cleanup happens regardless of errors - assistantMessage.isThinking = false; - thinkingStartTime.current = null; + thinkingStartTime.current = null } // Save history with error handling From 8826d2f8928ab591caac0e09f82dcf270e5e439f Mon Sep 17 00:00:00 2001 From: yangdx Date: Mon, 22 Sep 2025 01:04:57 +0800 Subject: [PATCH 04/10] Optimize prompt instruction for citation format --- lightrag/prompt.py | 47 ++++++++++++++++++++++------------------------ 1 file changed, 22 insertions(+), 25 deletions(-) diff --git a/lightrag/prompt.py b/lightrag/prompt.py index 9dd7d279..463b35ad 100644 --- a/lightrag/prompt.py +++ b/lightrag/prompt.py @@ -224,8 +224,6 @@ Consider the conversation history if provided to maintain conversational flow an - Carefully determine the user's query intent in the context of the conversation history to fully understand the user's information need. - Scrutinize the `Source Data`(both Knowledge Graph and Document Chunks). Identify and extract all pieces of information that are directly relevant to answering the user query. - Weave the extracted facts into a coherent and logical response. Your own knowledge must ONLY be used to formulate fluent sentences and connect ideas, NOT to introduce any external information. - - Review the draft to ensure it strictly adheres to all `Formatting & Language` rules below before generating the final response. - - Append a reference section at the end of the response; merge citations that share the same file_path into one reference item. 2. **Content & Grounding:** - Strictly adhere to the provided context from the `Source Data`; DO NOT invent, assume, or infer any information not explicitly stated. @@ -234,21 +232,21 @@ Consider the conversation history if provided to maintain conversational flow an 3. **Formatting & Language:** - The response MUST be in the same language as the user query. - Use Markdown for clear formatting (e.g., headings, bold, lists). - - Target format and length: {response_type} + - The response should be presented in {response_type}. - Append a reference section at the end of the response. - Generate a Only output citation information in the references section - The main body of the response should exclude inline citations; all references should be listed exclusively in the references section at the end. + - Merge citations that share the same file_path into one reference item. + - The main body of the response should exclude inline citations; all citation information should be listed exclusively in the references section. 4. **Reference/Citation Format:** - Append a reference section at the end of the response. - - Provide a maximum of 8 unique and most relevant references, and list each citation on a separate line. - - Merge citations that share the same file_path into one reference item, disregarding their distinct IDs. - - Preserve the original file_path language in citation - The References section should be under a `### References` heading. - Output the citation in the following formats: - - For a Knowledge Graph Entity: `[EN] ` - - For a Knowledge Graph Relationship: `RE] ~ ` - - For a Document Chunk: `[DC] ` + - For a Knowledge Graph Entity: [EN] + - For a Knowledge Graph Relationship: [RE] ~ + - For a Document Chunk: [DC] + - , , , and should be derived from the attribute values in `Source Data` and preserved in their original language. + - Merge citations that share the same file_path into one reference item, disregarding their distinct IDs. + - Provide a maximum of 8 unique and most relevant references, and list each citation on a separate line. 5. **Example of Section:** ``` @@ -256,9 +254,9 @@ Consider the conversation history if provided to maintain conversational flow an - [EN] LightRAG - [EN] Dual-Level Retrieval System - [RE] LightRAG ~ GraphRAG -- [DC1] Simple and Fast RAG.pdf -- [DC2] LightRAG Simple and Fast Alternative to GraphRAG for Legal Doc Analysis.md -- [DC3] Microsoft GraphRAG Technology Summary.md +- [DC] Simple and Fast RAG.pdf +- [DC] LightRAG Simple and Fast Alternative to GraphRAG for Legal Doc Analysis.md +- [DC] Microsoft GraphRAG Technology Summary.md ``` ---Source Data--- @@ -281,8 +279,6 @@ Consider the conversation history if provided to maintain conversational flow an - Carefully determine the user's query intent in the context of the conversation history to fully understand the user's information need. - Scrutinize the `Source Data`(Document Chunks). Identify and extract all pieces of information that are directly relevant to answering the user query. - Weave the extracted facts into a coherent and logical response. Your own knowledge must ONLY be used to formulate fluent sentences and connect ideas, NOT to introduce any external information. - - Review the draft to ensure it strictly adheres to all `Formatting & Language` rules below before generating the final response. - - Append a reference section at the end of the response; merge citations that share the same file_path into one reference item. 2. **Content & Grounding:** - Strictly adhere to the provided context from the `Source Data`; DO NOT invent, assume, or infer any information not explicitly stated. @@ -291,23 +287,24 @@ Consider the conversation history if provided to maintain conversational flow an 3. **Formatting & Language:** - The response MUST be in the same language as the user query. - Use Markdown for clear formatting (e.g., headings, bold, lists). - - Target format and length: {response_type} - - The main body of the response should exclude inline citations; all references should be listed exclusively in the references section at the end. + - The response should be presented in {response_type}. + - Append a reference section at the end of the response. + - The main body of the response should exclude inline citations; all citation information should be listed exclusively in the references section. 4. **Reference/Citation Format:** - Append a reference section at the end of the response. - - Provide a maximum of 8 unique and most relevant references, and list each citation on a separate line. - - Merge citations that share the same file_path into one reference item, disregarding their distinct IDs. - - Preserve the original file_path language in citation - The References section should be under a `### References` heading. - - Output the citation in the following format: `[DC] ` + - Output the citation in the following format: [DC] + - should be derived from the attribute values in `Source Data` and preserved in their original language. + - Merge citations that share the same file_path into one reference item, disregarding their distinct IDs. + - Provide a maximum of 8 unique and most relevant references, and list each citation on a separate line. 5. **Example of Section:** ``` ### References -- [DC1] Simple and Fast RAG.pdf -- [DC2] LightRAG Simple and Fast Alternative to GraphRAG for Legal Doc Analysis.md -- [DC3] Microsoft GraphRAG Technology Summary.md +- [DC] Simple and Fast RAG.pdf +- [DC] LightRAG Simple and Fast Alternative to GraphRAG for Legal Doc Analysis.md +- [DC] Microsoft GraphRAG Technology Summary.md ``` ---Source Data--- From 17dd56e41c06e7c6d09a899fa0a294f8b1c5d818 Mon Sep 17 00:00:00 2001 From: yangdx Date: Mon, 22 Sep 2025 01:41:58 +0800 Subject: [PATCH 05/10] Improve code highlighting with context-aware styling and inline detection MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit • Add messageRole prop to CodeHighlight • Remove unused Element type import • Replace node-based inline detection • Add theme-aware inline code styles • Update dependency arrays in useMemo --- .../src/components/retrieval/ChatMessage.tsx | 56 ++++++++++++------- 1 file changed, 35 insertions(+), 21 deletions(-) diff --git a/lightrag_webui/src/components/retrieval/ChatMessage.tsx b/lightrag_webui/src/components/retrieval/ChatMessage.tsx index d04686a5..9404848b 100644 --- a/lightrag_webui/src/components/retrieval/ChatMessage.tsx +++ b/lightrag_webui/src/components/retrieval/ChatMessage.tsx @@ -10,7 +10,6 @@ import rehypeReact from 'rehype-react' import remarkMath from 'remark-math' import mermaid from 'mermaid' -import type { Element } from 'hast' import { Prism as SyntaxHighlighter } from 'react-syntax-highlighter' import { oneLight, oneDark } from 'react-syntax-highlighter/dist/cjs/styles/prism' @@ -85,6 +84,7 @@ export const ChatMessage = ({ message }: { message: MessageWithError }) => { // ), p: ({ children }: { children?: ReactNode }) =>

{children}

, @@ -95,11 +95,11 @@ export const ChatMessage = ({ message }: { message: MessageWithError }) => { // ul: ({ children }: { children?: ReactNode }) =>
    {children}
, ol: ({ children }: { children?: ReactNode }) =>
    {children}
, li: ({ children }: { children?: ReactNode }) =>
  • {children}
  • - }), [message.mermaidRendered]); + }), [message.mermaidRendered, message.role]); const thinkingMarkdownComponents = useMemo(() => ({ - code: (props: any) => () - }), [message.mermaidRendered]); + code: (props: any) => () + }), [message.mermaidRendered, message.role]); return (
    { - if (!node || !node.children) return false; - const textContent = node.children - .filter((child) => child.type === 'text') - .map((child) => (child as any).value) - .join(''); - // Consider inline if it doesn't contain newline or is very short - return !textContent.includes('\n') || textContent.length < 40; -}; // Check if it is a large JSON @@ -231,12 +221,11 @@ const isLargeJson = (language: string | undefined, content: string | undefined): }; // Memoize the CodeHighlight component -const CodeHighlight = memo(({ className, children, node, renderAsDiagram = false, ...props }: CodeHighlightProps) => { +const CodeHighlight = memo(({ inline, className, children, renderAsDiagram = false, messageRole, ...props }: CodeHighlightProps) => { const { theme } = useTheme(); const [hasRendered, setHasRendered] = useState(false); // State to track successful render const match = className?.match(/language-(\w+)/); const language = match ? match[1] : undefined; - const inline = isInlineCode(node); // Use the helper function const mermaidRef = useRef(null); const debounceTimerRef = useRef | null>(null); // Use ReturnType for better typing @@ -401,20 +390,45 @@ const CodeHighlight = memo(({ className, children, node, renderAsDiagram = false } + // ReactMarkdown determines inline vs block based on markdown syntax + // Inline code: `code` (no className with language) + // Block code: ```language (has className like "language-js") + // If there's no language className and no explicit inline prop, it's likely inline code + const isInline = inline ?? !className?.startsWith('language-'); + + // Generate dynamic inline code styles based on message role and theme + const getInlineCodeStyles = () => { + if (messageRole === 'user') { + // User messages have dark background (bg-primary), need light inline code + return theme === 'dark' + ? 'bg-primary-foreground/20 text-primary-foreground border border-primary-foreground/30' + : 'bg-primary-foreground/20 text-primary-foreground border border-primary-foreground/30'; + } else { + // Assistant messages have light background (bg-muted), need contrasting inline code + return theme === 'dark' + ? 'bg-muted-foreground/20 text-muted-foreground border border-muted-foreground/30' + : 'bg-slate-200 text-slate-800 border border-slate-300'; + } + }; + // Handle non-Mermaid code blocks - return !inline ? ( + return !isInline ? ( {contentStr} ) : ( - // Handle inline code + // Handle inline code with context-aware styling {children} From d2029fd80435b6ffbf43ee21d5db285e8365e111 Mon Sep 17 00:00:00 2001 From: yangdx Date: Mon, 22 Sep 2025 02:01:39 +0800 Subject: [PATCH 06/10] Replace Input with auto-resizing Textarea in RetrievalTesting MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit • Add new Textarea component • Auto-resize between 40px-120px height • Support Enter to submit, Shift+Enter for newline • Add form autocomplete attributes • Reset height after message submission --- lightrag_webui/src/components/ui/Textarea.tsx | 25 +++++++++++++ .../src/features/RetrievalTesting.tsx | 37 ++++++++++++++++--- 2 files changed, 57 insertions(+), 5 deletions(-) create mode 100644 lightrag_webui/src/components/ui/Textarea.tsx diff --git a/lightrag_webui/src/components/ui/Textarea.tsx b/lightrag_webui/src/components/ui/Textarea.tsx new file mode 100644 index 00000000..04a061a8 --- /dev/null +++ b/lightrag_webui/src/components/ui/Textarea.tsx @@ -0,0 +1,25 @@ +import * as React from 'react' +import { cn } from '@/lib/utils' + +export interface TextareaProps + extends React.TextareaHTMLAttributes { + className?: string +} + +const Textarea = React.forwardRef( + ({ className, ...props }, ref) => { + return ( +