Refactor COT parsing to handle multiple think blocks robustly

2025-09-22 01:01:39 +08:00 · 2025-09-22 01:01:39 +08:00 · 9288ae1703
commit 9288ae1703
parent cff6029508
1 changed files with 96 additions and 35 deletions
--- a/lightrag_webui/src/features/RetrievalTesting.tsx
+++ b/lightrag_webui/src/features/RetrievalTesting.tsx
@ -22,6 +22,66 @@ const generateUniqueId = () => {
  return `id-${Date.now()}-${Math.random().toString(36).substring(2, 9)}`;
 };

+// Robust COT parsing function to handle multiple think blocks and edge cases
+const parseCOTContent = (content: string) => {
+  const thinkStartTag = '<think>'
+  const thinkEndTag = '</think>'
+
+  // Find all <think> and </think> tag positions
+  const startMatches: number[] = []
+  const endMatches: number[] = []
+
+  let startIndex = 0
+  while ((startIndex = content.indexOf(thinkStartTag, startIndex)) !== -1) {
+    startMatches.push(startIndex)
+    startIndex += thinkStartTag.length
+  }
+
+  let endIndex = 0
+  while ((endIndex = content.indexOf(thinkEndTag, endIndex)) !== -1) {
+    endMatches.push(endIndex)
+    endIndex += thinkEndTag.length
+  }
+
+  // Analyze COT state
+  const hasThinkStart = startMatches.length > 0
+  const hasThinkEnd = endMatches.length > 0
+  const isThinking = hasThinkStart && (startMatches.length > endMatches.length)
+
+  let thinkingContent = ''
+  let displayContent = content
+
+  if (hasThinkStart) {
+    if (hasThinkEnd && startMatches.length === endMatches.length) {
+      // Complete thinking blocks: extract the last complete thinking content
+      const lastStartIndex = startMatches[startMatches.length - 1]
+      const lastEndIndex = endMatches[endMatches.length - 1]
+
+      if (lastEndIndex > lastStartIndex) {
+        thinkingContent = content.substring(
+          lastStartIndex + thinkStartTag.length,
+          lastEndIndex
+        ).trim()
+
+        // Remove all thinking blocks, keep only the final display content
+        displayContent = content.substring(lastEndIndex + thinkEndTag.length).trim()
+      }
+    } else if (isThinking) {
+      // Currently thinking: extract current thinking content
+      const lastStartIndex = startMatches[startMatches.length - 1]
+      thinkingContent = content.substring(lastStartIndex + thinkStartTag.length)
+      displayContent = ''
+    }
+  }
+
+  return {
+    isThinking,
+    thinkingContent,
+    displayContent,
+    hasValidThinkBlock: hasThinkStart && hasThinkEnd && startMatches.length === endMatches.length
+  }
+}
+
 export default function RetrievalTesting() {
  const { t } = useTranslation()
  const [messages, setMessages] = useState<MessageWithError[]>(() => {
@ -178,40 +238,28 @@ export default function RetrievalTesting() {
          thinkingStartTime.current = Date.now()
        }

-        // Real-time parsing for streaming
-        const thinkStartTag = '<think>'
-        const thinkEndTag = '</think>'
-        const thinkStartIndex = assistantMessage.content.indexOf(thinkStartTag)
-        const thinkEndIndex = assistantMessage.content.indexOf(thinkEndTag)
+        // Use the new robust COT parsing function
+        const cotResult = parseCOTContent(assistantMessage.content)

-        if (thinkStartIndex !== -1) {
-          if (thinkEndIndex !== -1) {
-            // Thinking has finished for this chunk
-            assistantMessage.isThinking = false
+        // Update thinking state
+        assistantMessage.isThinking = cotResult.isThinking

-            // Only calculate time and extract thinking content once
-            if (!thinkingProcessed.current) {
-              if (thinkingStartTime.current && !assistantMessage.thinkingTime) {
-                const duration = (Date.now() - thinkingStartTime.current) / 1000
-                assistantMessage.thinkingTime = parseFloat(duration.toFixed(2))
-              }
-              assistantMessage.thinkingContent = assistantMessage.content
-                .substring(thinkStartIndex + thinkStartTag.length, thinkEndIndex)
-                .trim()
-              thinkingProcessed.current = true
-            }
-
-            // Always update display content as content after </think> may grow
-            assistantMessage.displayContent = assistantMessage.content.substring(thinkEndIndex + thinkEndTag.length).trim()
-          } else {
-            // Still thinking - update thinking content in real-time
-            assistantMessage.isThinking = true
-            assistantMessage.thinkingContent = assistantMessage.content.substring(thinkStartIndex + thinkStartTag.length)
-            assistantMessage.displayContent = ''
+        // Only calculate time and extract thinking content once when thinking is complete
+        if (cotResult.hasValidThinkBlock && !thinkingProcessed.current) {
+          if (thinkingStartTime.current && !assistantMessage.thinkingTime) {
+            const duration = (Date.now() - thinkingStartTime.current) / 1000
+            assistantMessage.thinkingTime = parseFloat(duration.toFixed(2))
          }
+          thinkingProcessed.current = true
+        }
+
+        // Update content based on parsing results
+        assistantMessage.thinkingContent = cotResult.thinkingContent
+        // Only fallback to full content if not in a thinking state.
+        if (cotResult.isThinking) {
+          assistantMessage.displayContent = ''
        } else {
-          assistantMessage.isThinking = false
-          assistantMessage.displayContent = assistantMessage.content
+          assistantMessage.displayContent = cotResult.displayContent || assistantMessage.content
        }

        // Detect if the assistant message contains a complete mermaid code block
@ -297,17 +345,30 @@ export default function RetrievalTesting() {

        // Enhanced cleanup with error handling to prevent memory leaks
        try {
-          // Final calculation for thinking time, only if not already calculated
-          if (assistantMessage.thinkingContent && thinkingStartTime.current && !assistantMessage.thinkingTime) {
+          // Final COT state validation and cleanup
+          const finalCotResult = parseCOTContent(assistantMessage.content)
+
+          // Force set final state - stream ended so thinking must be false
+          assistantMessage.isThinking = false
+
+          // If we have a complete thinking block but time wasn't calculated, do final calculation
+          if (finalCotResult.hasValidThinkBlock && thinkingStartTime.current && !assistantMessage.thinkingTime) {
            const duration = (Date.now() - thinkingStartTime.current) / 1000
            assistantMessage.thinkingTime = parseFloat(duration.toFixed(2))
          }
+
+          // Ensure display content is correctly set based on final parsing
+          if (finalCotResult.displayContent !== undefined) {
+            assistantMessage.displayContent = finalCotResult.displayContent
+          }
+
        } catch (error) {
-          console.error('Error calculating thinking time:', error)
+          console.error('Error in final COT state validation:', error)
+          // Force reset state on error
+          assistantMessage.isThinking = false
        } finally {
          // Ensure cleanup happens regardless of errors
-          assistantMessage.isThinking = false;
-          thinkingStartTime.current = null;
+          thinkingStartTime.current = null
        }

        // Save history with error handling