From 8c26e03114915705b2ae3ce0a8533a4728ba145d Mon Sep 17 00:00:00 2001 From: phact Date: Tue, 13 Jan 2026 14:33:58 -0500 Subject: [PATCH 1/2] add usage data to chat backend --- src/agent.py | 28 ++++++++++++++++++++++++++++ src/api/v1/chat.py | 9 +++++++-- 2 files changed, 35 insertions(+), 2 deletions(-) diff --git a/src/agent.py b/src/agent.py index 08ca99d4..76cf8b1d 100644 --- a/src/agent.py +++ b/src/agent.py @@ -197,6 +197,18 @@ async def async_response_stream( sample_data=str(potential_tool_fields)[:500] ) + # Detect response.completed event and log usage + if isinstance(chunk_data, dict) and chunk_data.get("type") == "response.completed": + response_data = chunk_data.get("response", {}) + usage = response_data.get("usage") + if usage: + logger.info( + "Stream usage data", + input_tokens=usage.get("input_tokens"), + output_tokens=usage.get("output_tokens"), + total_tokens=usage.get("total_tokens"), + ) + # Middleware: Detect implicit tool calls and inject standardized events # This helps Granite 3.3 8b and other models that don't emit standard markers if isinstance(chunk_data, dict) and not detected_tool_call: @@ -487,6 +499,7 @@ async def async_chat_stream( full_response = "" response_id = None + usage_data = None async for chunk in async_stream( async_client, prompt, @@ -506,6 +519,10 @@ async def async_chat_stream( response_id = chunk_data["id"] elif "response_id" in chunk_data: response_id = chunk_data["response_id"] + # Capture usage from response.completed event + if chunk_data.get("type") == "response.completed": + response_obj = chunk_data.get("response", {}) + usage_data = response_obj.get("usage") except: pass yield chunk @@ -518,6 +535,9 @@ async def async_chat_stream( "response_id": response_id, "timestamp": datetime.now(), } + # Store usage data if available (from response.completed event) + if usage_data: + assistant_message["response_data"] = {"usage": usage_data} conversation_state["messages"].append(assistant_message) # Store the conversation thread with its response_id @@ -676,6 +696,7 @@ async def async_langflow_chat_stream( full_response = "" response_id = None + usage_data = None collected_chunks = [] # Store all chunks for function call data async for chunk in async_stream( @@ -700,6 +721,10 @@ async def async_langflow_chat_stream( response_id = chunk_data["id"] elif "response_id" in chunk_data: response_id = chunk_data["response_id"] + # Capture usage from response.completed event + if chunk_data.get("type") == "response.completed": + response_obj = chunk_data.get("response", {}) + usage_data = response_obj.get("usage") except: pass yield chunk @@ -713,6 +738,9 @@ async def async_langflow_chat_stream( "timestamp": datetime.now(), "chunks": collected_chunks, # Store complete chunk data for function calls } + # Store usage data if available (from response.completed event) + if usage_data: + assistant_message["response_data"] = {"usage": usage_data} conversation_state["messages"].append(assistant_message) # Store the conversation thread with its response_id diff --git a/src/api/v1/chat.py b/src/api/v1/chat.py index ecadba16..9fc83d67 100644 --- a/src/api/v1/chat.py +++ b/src/api/v1/chat.py @@ -239,11 +239,16 @@ async def chat_get_endpoint(request: Request, chat_service, session_manager): # Transform to public API format messages = [] for msg in conversation.get("messages", []): - messages.append({ + message_data = { "role": msg.get("role"), "content": msg.get("content"), "timestamp": msg.get("timestamp"), - }) + } + # Include token usage if available (from Responses API) + usage = msg.get("response_data", {}).get("usage") if isinstance(msg.get("response_data"), dict) else None + if usage: + message_data["usage"] = usage + messages.append(message_data) response_data = { "chat_id": conversation.get("response_id"), From 38072b27f51d8599455644db11987fd8a2bb8bc4 Mon Sep 17 00:00:00 2001 From: phact Date: Wed, 14 Jan 2026 11:20:55 -0500 Subject: [PATCH 2/2] usage frontend --- .../chat/_components/assistant-message.tsx | 6 ++++- frontend/app/chat/_components/token-usage.tsx | 27 +++++++++++++++++++ frontend/app/chat/_types/types.ts | 13 +++++++++ frontend/app/chat/page.tsx | 13 +++++++++ frontend/hooks/useChatStreaming.ts | 7 +++++ 5 files changed, 65 insertions(+), 1 deletion(-) create mode 100644 frontend/app/chat/_components/token-usage.tsx diff --git a/frontend/app/chat/_components/assistant-message.tsx b/frontend/app/chat/_components/assistant-message.tsx index 0a046af0..ae6cffbb 100644 --- a/frontend/app/chat/_components/assistant-message.tsx +++ b/frontend/app/chat/_components/assistant-message.tsx @@ -3,9 +3,10 @@ import { motion } from "motion/react"; import DogIcon from "@/components/icons/dog-icon"; import { MarkdownRenderer } from "@/components/markdown-renderer"; import { cn } from "@/lib/utils"; -import type { FunctionCall } from "../_types/types"; +import type { FunctionCall, TokenUsage as TokenUsageType } from "../_types/types"; import { FunctionCalls } from "./function-calls"; import { Message } from "./message"; +import { TokenUsage } from "./token-usage"; interface AssistantMessageProps { content: string; @@ -21,6 +22,7 @@ interface AssistantMessageProps { animate?: boolean; delay?: number; isInitialGreeting?: boolean; + usage?: TokenUsageType; } export function AssistantMessage({ @@ -37,6 +39,7 @@ export function AssistantMessage({ animate = true, delay = 0.2, isInitialGreeting = false, + usage, }: AssistantMessageProps) { return ( + {usage && !isStreaming && } diff --git a/frontend/app/chat/_components/token-usage.tsx b/frontend/app/chat/_components/token-usage.tsx new file mode 100644 index 00000000..2fc5f03a --- /dev/null +++ b/frontend/app/chat/_components/token-usage.tsx @@ -0,0 +1,27 @@ +import { Zap } from "lucide-react"; +import type { TokenUsage as TokenUsageType } from "../_types/types"; + +interface TokenUsageProps { + usage: TokenUsageType; +} + +export function TokenUsage({ usage }: TokenUsageProps) { + // Guard against partial/malformed usage data + if (typeof usage.input_tokens !== "number" || typeof usage.output_tokens !== "number") { + return null; + } + + return ( +
+ + + {usage.input_tokens.toLocaleString()} in / {usage.output_tokens.toLocaleString()} out + {usage.input_tokens_details?.cached_tokens ? ( + + ({usage.input_tokens_details.cached_tokens.toLocaleString()} cached) + + ) : null} + +
+ ); +} diff --git a/frontend/app/chat/_types/types.ts b/frontend/app/chat/_types/types.ts index c0d732ea..c605da7c 100644 --- a/frontend/app/chat/_types/types.ts +++ b/frontend/app/chat/_types/types.ts @@ -1,3 +1,15 @@ +export interface TokenUsage { + input_tokens: number; + output_tokens: number; + total_tokens: number; + input_tokens_details?: { + cached_tokens?: number; + }; + output_tokens_details?: { + reasoning_tokens?: number; + }; +} + export interface Message { role: "user" | "assistant"; content: string; @@ -5,6 +17,7 @@ export interface Message { functionCalls?: FunctionCall[]; isStreaming?: boolean; source?: "langflow" | "chat"; + usage?: TokenUsage; } export interface FunctionCall { diff --git a/frontend/app/chat/page.tsx b/frontend/app/chat/page.tsx index f15cf788..4d2ddfef 100644 --- a/frontend/app/chat/page.tsx +++ b/frontend/app/chat/page.tsx @@ -501,6 +501,17 @@ function ChatPage() { } else { console.log("No function calls found in message"); } + + // Extract usage data from response_data + if (msg.response_data && typeof msg.response_data === "object") { + const responseData = + typeof msg.response_data === "string" + ? JSON.parse(msg.response_data) + : msg.response_data; + if (responseData.usage) { + message.usage = responseData.usage; + } + } } return message; @@ -849,6 +860,7 @@ function ChatPage() { role: "assistant", content: result.response, timestamp: new Date(), + usage: result.usage, }; setMessages((prev) => [...prev, assistantMessage]); if (result.response_id) { @@ -1164,6 +1176,7 @@ function ChatPage() { messages.length === 1 && message.content === "How can I assist?" } + usage={message.usage} /> ), diff --git a/frontend/hooks/useChatStreaming.ts b/frontend/hooks/useChatStreaming.ts index 89d0d810..6d19ac08 100644 --- a/frontend/hooks/useChatStreaming.ts +++ b/frontend/hooks/useChatStreaming.ts @@ -3,6 +3,7 @@ import type { FunctionCall, Message, SelectedFilters, + TokenUsage, } from "@/app/chat/_types/types"; import { useChat } from "@/contexts/chat-context"; @@ -130,6 +131,7 @@ export function useChatStreaming({ let currentContent = ""; const currentFunctionCalls: FunctionCall[] = []; let newResponseId: string | null = null; + let usageData: TokenUsage | undefined; // Initialize streaming message if (!controller.signal.aborted && thisStreamId === streamIdRef.current) { @@ -448,6 +450,10 @@ export function useChatStreaming({ else if (chunk.type === "response.output_text.delta") { currentContent += chunk.delta || ""; } + // Handle response.completed event - capture usage + else if (chunk.type === "response.completed" && chunk.response?.usage) { + usageData = chunk.response.usage; + } // Handle OpenRAG backend format else if (chunk.output_text) { currentContent += chunk.output_text; @@ -567,6 +573,7 @@ export function useChatStreaming({ currentFunctionCalls.length > 0 ? currentFunctionCalls : undefined, timestamp: new Date(), isStreaming: false, + usage: usageData, }; if (!controller.signal.aborted && thisStreamId === streamIdRef.current) {