Merge branch 'main' into usage-data

usage frontend
add usage data to chat backend
2026-01-16 09:10:39 -05:00 · 2026-01-14 11:20:55 -05:00 · 2026-01-13 14:33:58 -05:00
7 changed files with 100 additions and 3 deletions
--- a/frontend/app/chat/_components/assistant-message.tsx
+++ b/frontend/app/chat/_components/assistant-message.tsx
@ -3,9 +3,10 @@ import { motion } from "motion/react";
 import DogIcon from "@/components/icons/dog-icon";
 import { MarkdownRenderer } from "@/components/markdown-renderer";
 import { cn } from "@/lib/utils";
-import type { FunctionCall } from "../_types/types";
+import type { FunctionCall, TokenUsage as TokenUsageType } from "../_types/types";
 import { FunctionCalls } from "./function-calls";
 import { Message } from "./message";
+import { TokenUsage } from "./token-usage";

 interface AssistantMessageProps {
  content: string;
@ -21,6 +22,7 @@ interface AssistantMessageProps {
  animate?: boolean;
  delay?: number;
  isInitialGreeting?: boolean;
+  usage?: TokenUsageType;
 }

 export function AssistantMessage({
@ -37,6 +39,7 @@ export function AssistantMessage({
  animate = true,
  delay = 0.2,
  isInitialGreeting = false,
+  usage,
 }: AssistantMessageProps) {
  return (
    <motion.div
@ -135,6 +138,7 @@ export function AssistantMessage({
                  : content
              }
            />
+            {usage && !isStreaming && <TokenUsage usage={usage} />}
          </motion.div>
        </div>
      </Message>
--- a/frontend/app/chat/_components/token-usage.tsx
+++ b/frontend/app/chat/_components/token-usage.tsx
@ -0,0 +1,27 @@
+import { Zap } from "lucide-react";
+import type { TokenUsage as TokenUsageType } from "../_types/types";
+
+interface TokenUsageProps {
+  usage: TokenUsageType;
+}
+
+export function TokenUsage({ usage }: TokenUsageProps) {
+  // Guard against partial/malformed usage data
+  if (typeof usage.input_tokens !== "number" || typeof usage.output_tokens !== "number") {
+    return null;
+  }
+
+  return (
+    <div className="flex items-center gap-2 mt-2 text-xs text-muted-foreground">
+      <Zap className="h-3 w-3" />
+      <span>
+        {usage.input_tokens.toLocaleString()} in / {usage.output_tokens.toLocaleString()} out
+        {usage.input_tokens_details?.cached_tokens ? (
+          <span className="text-green-500 ml-1">
+            ({usage.input_tokens_details.cached_tokens.toLocaleString()} cached)
+          </span>
+        ) : null}
+      </span>
+    </div>
+  );
+}
--- a/frontend/app/chat/_types/types.ts
+++ b/frontend/app/chat/_types/types.ts
@ -1,3 +1,15 @@
+export interface TokenUsage {
+  input_tokens: number;
+  output_tokens: number;
+  total_tokens: number;
+  input_tokens_details?: {
+    cached_tokens?: number;
+  };
+  output_tokens_details?: {
+    reasoning_tokens?: number;
+  };
+}
+
 export interface Message {
  role: "user" | "assistant";
  content: string;
@ -5,6 +17,7 @@ export interface Message {
  functionCalls?: FunctionCall[];
  isStreaming?: boolean;
  source?: "langflow" | "chat";
+  usage?: TokenUsage;
 }

 export interface FunctionCall {
--- a/frontend/app/chat/page.tsx
+++ b/frontend/app/chat/page.tsx
@ -501,6 +501,17 @@ function ChatPage() {
 						} else {
 							console.log("No function calls found in message");
 						}
+
+						// Extract usage data from response_data
+						if (msg.response_data && typeof msg.response_data === "object") {
+							const responseData =
+								typeof msg.response_data === "string"
+									? JSON.parse(msg.response_data)
+									: msg.response_data;
+							if (responseData.usage) {
+								message.usage = responseData.usage;
+							}
+						}
 					}

 					return message;
@ -849,6 +860,7 @@ function ChatPage() {
 						role: "assistant",
 						content: result.response,
 						timestamp: new Date(),
+						usage: result.usage,
 					};
 					setMessages((prev) => [...prev, assistantMessage]);
 					if (result.response_id) {
@ -1164,6 +1176,7 @@ function ChatPage() {
 														messages.length === 1 &&
 														message.content === "How can I assist?"
 													}
+													usage={message.usage}
 												/>
 											</div>
 										),
--- a/frontend/hooks/useChatStreaming.ts
+++ b/frontend/hooks/useChatStreaming.ts
@ -3,6 +3,7 @@ import type {
  FunctionCall,
  Message,
  SelectedFilters,
+  TokenUsage,
 } from "@/app/chat/_types/types";
 import { useChat } from "@/contexts/chat-context";

@ -130,6 +131,7 @@ export function useChatStreaming({
      let currentContent = "";
      const currentFunctionCalls: FunctionCall[] = [];
      let newResponseId: string | null = null;
+      let usageData: TokenUsage | undefined;

      // Initialize streaming message
      if (!controller.signal.aborted && thisStreamId === streamIdRef.current) {
@ -448,6 +450,10 @@ export function useChatStreaming({
                else if (chunk.type === "response.output_text.delta") {
                  currentContent += chunk.delta || "";
                }
+                // Handle response.completed event - capture usage
+                else if (chunk.type === "response.completed" && chunk.response?.usage) {
+                  usageData = chunk.response.usage;
+                }
                // Handle OpenRAG backend format
                else if (chunk.output_text) {
                  currentContent += chunk.output_text;
@ -567,6 +573,7 @@ export function useChatStreaming({
          currentFunctionCalls.length > 0 ? currentFunctionCalls : undefined,
        timestamp: new Date(),
        isStreaming: false,
+        usage: usageData,
      };

      if (!controller.signal.aborted && thisStreamId === streamIdRef.current) {
--- a/src/agent.py
+++ b/src/agent.py
@ -197,6 +197,18 @@ async def async_response_stream(
                            sample_data=str(potential_tool_fields)[:500]
                        )

+                # Detect response.completed event and log usage
+                if isinstance(chunk_data, dict) and chunk_data.get("type") == "response.completed":
+                    response_data = chunk_data.get("response", {})
+                    usage = response_data.get("usage")
+                    if usage:
+                        logger.info(
+                            "Stream usage data",
+                            input_tokens=usage.get("input_tokens"),
+                            output_tokens=usage.get("output_tokens"),
+                            total_tokens=usage.get("total_tokens"),
+                        )
+
                # Middleware: Detect implicit tool calls and inject standardized events
                # This helps Granite 3.3 8b and other models that don't emit standard markers
                if isinstance(chunk_data, dict) and not detected_tool_call:
@ -487,6 +499,7 @@ async def async_chat_stream(

    full_response = ""
    response_id = None
+    usage_data = None
    async for chunk in async_stream(
        async_client,
        prompt,
@ -506,6 +519,10 @@ async def async_chat_stream(
                response_id = chunk_data["id"]
            elif "response_id" in chunk_data:
                response_id = chunk_data["response_id"]
+            # Capture usage from response.completed event
+            if chunk_data.get("type") == "response.completed":
+                response_obj = chunk_data.get("response", {})
+                usage_data = response_obj.get("usage")
        except:
            pass
        yield chunk
@ -518,6 +535,9 @@ async def async_chat_stream(
            "response_id": response_id,
            "timestamp": datetime.now(),
        }
+        # Store usage data if available (from response.completed event)
+        if usage_data:
+            assistant_message["response_data"] = {"usage": usage_data}
        conversation_state["messages"].append(assistant_message)

        # Store the conversation thread with its response_id
@ -676,6 +696,7 @@ async def async_langflow_chat_stream(

    full_response = ""
    response_id = None
+    usage_data = None
    collected_chunks = []  # Store all chunks for function call data

    async for chunk in async_stream(
@ -700,6 +721,10 @@ async def async_langflow_chat_stream(
                response_id = chunk_data["id"]
            elif "response_id" in chunk_data:
                response_id = chunk_data["response_id"]
+            # Capture usage from response.completed event
+            if chunk_data.get("type") == "response.completed":
+                response_obj = chunk_data.get("response", {})
+                usage_data = response_obj.get("usage")
        except:
            pass
        yield chunk
@ -713,6 +738,9 @@ async def async_langflow_chat_stream(
            "timestamp": datetime.now(),
            "chunks": collected_chunks,  # Store complete chunk data for function calls
        }
+        # Store usage data if available (from response.completed event)
+        if usage_data:
+            assistant_message["response_data"] = {"usage": usage_data}
        conversation_state["messages"].append(assistant_message)

        # Store the conversation thread with its response_id
--- a/src/api/v1/chat.py
+++ b/src/api/v1/chat.py
@ -239,11 +239,16 @@ async def chat_get_endpoint(request: Request, chat_service, session_manager):
        # Transform to public API format
        messages = []
        for msg in conversation.get("messages", []):
-            messages.append({
+            message_data = {
                "role": msg.get("role"),
                "content": msg.get("content"),
                "timestamp": msg.get("timestamp"),
-            })
+            }
+            # Include token usage if available (from Responses API)
+            usage = msg.get("response_data", {}).get("usage") if isinstance(msg.get("response_data"), dict) else None
+            if usage:
+                message_data["usage"] = usage
+            messages.append(message_data)

        response_data = {
            "chat_id": conversation.get("response_id"),
Author	SHA1	Message	Date
Edwin Jose	08f675c70a	Merge branch 'main' into usage-data	2026-01-16 09:10:39 -05:00
phact	38072b27f5	usage frontend	2026-01-14 11:20:55 -05:00
phact	8c26e03114	add usage data to chat backend	2026-01-13 14:33:58 -05:00