From 7074de5ba74ac486e68c42416b810ecaf08c3f67 Mon Sep 17 00:00:00 2001
From: Eric Hare <ericrhare@gmail.com>
Date: Tue, 9 Dec 2025 10:07:21 -0800
Subject: [PATCH] fix: Tool calling for granite 3.3

---
 frontend/hooks/useChatStreaming.ts | 72 ++++++++++++++++++++++++++++++
 src/agent.py                       | 62 +++++++++++++++++++++++++
 2 files changed, 134 insertions(+)

diff --git a/frontend/hooks/useChatStreaming.ts b/frontend/hooks/useChatStreaming.ts
index c67a0ca6..89d0d810 100644
--- a/frontend/hooks/useChatStreaming.ts
+++ b/frontend/hooks/useChatStreaming.ts
@@ -162,6 +162,19 @@ export function useChatStreaming({
             if (line.trim()) {
               try {
                 const chunk = JSON.parse(line);
+                
+                // Investigation logging for Granite 3.3 8b tool call detection
+                const chunkKeys = Object.keys(chunk);
+                const toolRelatedKeys = chunkKeys.filter(key => 
+                  key.toLowerCase().includes('tool') || 
+                  key.toLowerCase().includes('call') || 
+                  key.toLowerCase().includes('retrieval') ||
+                  key.toLowerCase().includes('function') ||
+                  key.toLowerCase().includes('result')
+                );
+                if (toolRelatedKeys.length > 0) {
+                  console.log('[Tool Detection] Found tool-related keys:', toolRelatedKeys, chunk);
+                }
 
                 // Extract response ID if present
                 if (chunk.id) {
@@ -449,6 +462,42 @@ export function useChatStreaming({
                     }
                   }
                 }
+                
+                // Heuristic detection for implicit tool calls (Granite 3.3 8b workaround)
+                // Check if chunk contains retrieval results without explicit tool call markers
+                const hasImplicitToolCall = (
+                  // Check for various result indicators in the chunk
+                  (chunk.results && Array.isArray(chunk.results) && chunk.results.length > 0) ||
+                  (chunk.outputs && Array.isArray(chunk.outputs) && chunk.outputs.length > 0) ||
+                  // Check for retrieval-related fields
+                  chunk.retrieved_documents ||
+                  chunk.retrieval_results ||
+                  // Check for nested data structures that might contain results
+                  (chunk.data && typeof chunk.data === 'object' && (
+                    chunk.data.results || 
+                    chunk.data.retrieved_documents ||
+                    chunk.data.retrieval_results
+                  ))
+                );
+                
+                if (hasImplicitToolCall && currentFunctionCalls.length === 0) {
+                  console.log('[Heuristic Detection] Detected implicit tool call:', chunk);
+                  
+                  // Create a synthetic function call for the UI
+                  const results = chunk.results || chunk.outputs || chunk.retrieved_documents || 
+                                 chunk.retrieval_results || chunk.data?.results || 
+                                 chunk.data?.retrieved_documents || [];
+                  
+                  const syntheticFunctionCall: FunctionCall = {
+                    name: "Retrieval",
+                    arguments: { implicit: true, detected_heuristically: true },
+                    status: "completed",
+                    type: "retrieval_call",
+                    result: results,
+                  };
+                  currentFunctionCalls.push(syntheticFunctionCall);
+                  console.log('[Heuristic Detection] Created synthetic function call');
+                }
 
                 // Update streaming message in real-time
                 if (
@@ -486,6 +535,29 @@ export function useChatStreaming({
           "No response received from the server. Please try again.",
         );
       }
+      
+      // Post-processing: Heuristic detection based on final content
+      // If no explicit tool calls detected but content shows RAG indicators
+      if (currentFunctionCalls.length === 0 && currentContent) {
+        // Check for citation patterns that indicate RAG usage
+        const hasCitations = /\(Source:|\[Source:|\bSource:|filename:|document:/i.test(currentContent);
+        // Check for common RAG response patterns
+        const hasRAGPattern = /based on.*(?:document|file|information|data)|according to.*(?:document|file)/i.test(currentContent);
+        
+        if (hasCitations || hasRAGPattern) {
+          console.log('[Post-Processing] Detected RAG usage from content patterns');
+          const syntheticFunctionCall: FunctionCall = {
+            name: "Retrieval",
+            arguments: { 
+              implicit: true, 
+              detected_from: hasCitations ? "citations" : "content_patterns"
+            },
+            status: "completed",
+            type: "retrieval_call",
+          };
+          currentFunctionCalls.push(syntheticFunctionCall);
+        }
+      }
 
       // Finalize the message
       const finalMessage: Message = {
diff --git a/src/agent.py b/src/agent.py
index dd092643..278386e5 100644
--- a/src/agent.py
+++ b/src/agent.py
@@ -135,6 +135,7 @@ async def async_response_stream(
 
         full_response = ""
         chunk_count = 0
+        detected_tool_call = False  # Track if we've detected a tool call
         async for chunk in response:
             chunk_count += 1
             logger.debug(
@@ -158,6 +159,17 @@ async def async_response_stream(
                 else:
                     delta_text = str(chunk.delta)
                 full_response += delta_text
+            
+            # Enhanced logging for tool call detection (Granite 3.3 8b investigation)
+            chunk_attrs = dir(chunk) if hasattr(chunk, '__dict__') else []
+            tool_related_attrs = [attr for attr in chunk_attrs if 'tool' in attr.lower() or 'call' in attr.lower() or 'retrieval' in attr.lower()]
+            if tool_related_attrs:
+                logger.info(
+                    "Tool-related attributes found in chunk",
+                    chunk_count=chunk_count,
+                    attributes=tool_related_attrs,
+                    chunk_type=type(chunk).__name__
+                )
 
             # Send the raw event as JSON followed by newline for easy parsing
             try:
@@ -169,7 +181,57 @@ async def async_response_stream(
                     chunk_data = chunk.__dict__
                 else:
                     chunk_data = str(chunk)
+                
+                # Log detailed chunk structure for investigation (especially for Granite 3.3 8b)
+                if isinstance(chunk_data, dict):
+                    # Check for any fields that might indicate tool usage
+                    potential_tool_fields = {
+                        k: v for k, v in chunk_data.items() 
+                        if any(keyword in str(k).lower() for keyword in ['tool', 'call', 'retrieval', 'function', 'result', 'output'])
+                    }
+                    if potential_tool_fields:
+                        logger.info(
+                            "Potential tool-related fields in chunk",
+                            chunk_count=chunk_count,
+                            fields=list(potential_tool_fields.keys()),
+                            sample_data=str(potential_tool_fields)[:500]
+                        )
 
+                # Middleware: Detect implicit tool calls and inject standardized events
+                # This helps Granite 3.3 8b and other models that don't emit standard markers
+                if isinstance(chunk_data, dict) and not detected_tool_call:
+                    # Check if this chunk contains retrieval results
+                    has_results = any([
+                        'results' in chunk_data and isinstance(chunk_data.get('results'), list),
+                        'outputs' in chunk_data and isinstance(chunk_data.get('outputs'), list),
+                        'retrieved_documents' in chunk_data,
+                        'retrieval_results' in chunk_data,
+                    ])
+                    
+                    if has_results:
+                        logger.info(
+                            "Detected implicit tool call in backend, injecting synthetic event",
+                            chunk_fields=list(chunk_data.keys())
+                        )
+                        # Inject a synthetic tool call event before this chunk
+                        synthetic_event = {
+                            "type": "response.output_item.done",
+                            "item": {
+                                "type": "retrieval_call",
+                                "id": f"synthetic_{chunk_count}",
+                                "name": "Retrieval",
+                                "tool_name": "Retrieval",
+                                "status": "completed",
+                                "inputs": {"implicit": True, "backend_detected": True},
+                                "results": chunk_data.get('results') or chunk_data.get('outputs') or 
+                                         chunk_data.get('retrieved_documents') or 
+                                         chunk_data.get('retrieval_results') or []
+                            }
+                        }
+                        # Send the synthetic event first
+                        yield (json.dumps(synthetic_event, default=str) + "\n").encode("utf-8")
+                        detected_tool_call = True  # Mark that we've injected a tool call
+                
                 yield (json.dumps(chunk_data, default=str) + "\n").encode("utf-8")
             except Exception as e:
                 # Fallback to string representation