From 7074de5ba74ac486e68c42416b810ecaf08c3f67 Mon Sep 17 00:00:00 2001 From: Eric Hare Date: Tue, 9 Dec 2025 10:07:21 -0800 Subject: [PATCH] fix: Tool calling for granite 3.3 --- frontend/hooks/useChatStreaming.ts | 72 ++++++++++++++++++++++++++++++ src/agent.py | 62 +++++++++++++++++++++++++ 2 files changed, 134 insertions(+) diff --git a/frontend/hooks/useChatStreaming.ts b/frontend/hooks/useChatStreaming.ts index c67a0ca6..89d0d810 100644 --- a/frontend/hooks/useChatStreaming.ts +++ b/frontend/hooks/useChatStreaming.ts @@ -162,6 +162,19 @@ export function useChatStreaming({ if (line.trim()) { try { const chunk = JSON.parse(line); + + // Investigation logging for Granite 3.3 8b tool call detection + const chunkKeys = Object.keys(chunk); + const toolRelatedKeys = chunkKeys.filter(key => + key.toLowerCase().includes('tool') || + key.toLowerCase().includes('call') || + key.toLowerCase().includes('retrieval') || + key.toLowerCase().includes('function') || + key.toLowerCase().includes('result') + ); + if (toolRelatedKeys.length > 0) { + console.log('[Tool Detection] Found tool-related keys:', toolRelatedKeys, chunk); + } // Extract response ID if present if (chunk.id) { @@ -449,6 +462,42 @@ export function useChatStreaming({ } } } + + // Heuristic detection for implicit tool calls (Granite 3.3 8b workaround) + // Check if chunk contains retrieval results without explicit tool call markers + const hasImplicitToolCall = ( + // Check for various result indicators in the chunk + (chunk.results && Array.isArray(chunk.results) && chunk.results.length > 0) || + (chunk.outputs && Array.isArray(chunk.outputs) && chunk.outputs.length > 0) || + // Check for retrieval-related fields + chunk.retrieved_documents || + chunk.retrieval_results || + // Check for nested data structures that might contain results + (chunk.data && typeof chunk.data === 'object' && ( + chunk.data.results || + chunk.data.retrieved_documents || + chunk.data.retrieval_results + )) + ); + + if (hasImplicitToolCall && currentFunctionCalls.length === 0) { + console.log('[Heuristic Detection] Detected implicit tool call:', chunk); + + // Create a synthetic function call for the UI + const results = chunk.results || chunk.outputs || chunk.retrieved_documents || + chunk.retrieval_results || chunk.data?.results || + chunk.data?.retrieved_documents || []; + + const syntheticFunctionCall: FunctionCall = { + name: "Retrieval", + arguments: { implicit: true, detected_heuristically: true }, + status: "completed", + type: "retrieval_call", + result: results, + }; + currentFunctionCalls.push(syntheticFunctionCall); + console.log('[Heuristic Detection] Created synthetic function call'); + } // Update streaming message in real-time if ( @@ -486,6 +535,29 @@ export function useChatStreaming({ "No response received from the server. Please try again.", ); } + + // Post-processing: Heuristic detection based on final content + // If no explicit tool calls detected but content shows RAG indicators + if (currentFunctionCalls.length === 0 && currentContent) { + // Check for citation patterns that indicate RAG usage + const hasCitations = /\(Source:|\[Source:|\bSource:|filename:|document:/i.test(currentContent); + // Check for common RAG response patterns + const hasRAGPattern = /based on.*(?:document|file|information|data)|according to.*(?:document|file)/i.test(currentContent); + + if (hasCitations || hasRAGPattern) { + console.log('[Post-Processing] Detected RAG usage from content patterns'); + const syntheticFunctionCall: FunctionCall = { + name: "Retrieval", + arguments: { + implicit: true, + detected_from: hasCitations ? "citations" : "content_patterns" + }, + status: "completed", + type: "retrieval_call", + }; + currentFunctionCalls.push(syntheticFunctionCall); + } + } // Finalize the message const finalMessage: Message = { diff --git a/src/agent.py b/src/agent.py index dd092643..278386e5 100644 --- a/src/agent.py +++ b/src/agent.py @@ -135,6 +135,7 @@ async def async_response_stream( full_response = "" chunk_count = 0 + detected_tool_call = False # Track if we've detected a tool call async for chunk in response: chunk_count += 1 logger.debug( @@ -158,6 +159,17 @@ async def async_response_stream( else: delta_text = str(chunk.delta) full_response += delta_text + + # Enhanced logging for tool call detection (Granite 3.3 8b investigation) + chunk_attrs = dir(chunk) if hasattr(chunk, '__dict__') else [] + tool_related_attrs = [attr for attr in chunk_attrs if 'tool' in attr.lower() or 'call' in attr.lower() or 'retrieval' in attr.lower()] + if tool_related_attrs: + logger.info( + "Tool-related attributes found in chunk", + chunk_count=chunk_count, + attributes=tool_related_attrs, + chunk_type=type(chunk).__name__ + ) # Send the raw event as JSON followed by newline for easy parsing try: @@ -169,7 +181,57 @@ async def async_response_stream( chunk_data = chunk.__dict__ else: chunk_data = str(chunk) + + # Log detailed chunk structure for investigation (especially for Granite 3.3 8b) + if isinstance(chunk_data, dict): + # Check for any fields that might indicate tool usage + potential_tool_fields = { + k: v for k, v in chunk_data.items() + if any(keyword in str(k).lower() for keyword in ['tool', 'call', 'retrieval', 'function', 'result', 'output']) + } + if potential_tool_fields: + logger.info( + "Potential tool-related fields in chunk", + chunk_count=chunk_count, + fields=list(potential_tool_fields.keys()), + sample_data=str(potential_tool_fields)[:500] + ) + # Middleware: Detect implicit tool calls and inject standardized events + # This helps Granite 3.3 8b and other models that don't emit standard markers + if isinstance(chunk_data, dict) and not detected_tool_call: + # Check if this chunk contains retrieval results + has_results = any([ + 'results' in chunk_data and isinstance(chunk_data.get('results'), list), + 'outputs' in chunk_data and isinstance(chunk_data.get('outputs'), list), + 'retrieved_documents' in chunk_data, + 'retrieval_results' in chunk_data, + ]) + + if has_results: + logger.info( + "Detected implicit tool call in backend, injecting synthetic event", + chunk_fields=list(chunk_data.keys()) + ) + # Inject a synthetic tool call event before this chunk + synthetic_event = { + "type": "response.output_item.done", + "item": { + "type": "retrieval_call", + "id": f"synthetic_{chunk_count}", + "name": "Retrieval", + "tool_name": "Retrieval", + "status": "completed", + "inputs": {"implicit": True, "backend_detected": True}, + "results": chunk_data.get('results') or chunk_data.get('outputs') or + chunk_data.get('retrieved_documents') or + chunk_data.get('retrieval_results') or [] + } + } + # Send the synthetic event first + yield (json.dumps(synthetic_event, default=str) + "\n").encode("utf-8") + detected_tool_call = True # Mark that we've injected a tool call + yield (json.dumps(chunk_data, default=str) + "\n").encode("utf-8") except Exception as e: # Fallback to string representation