Compare commits

...
Sign in to create a new pull request.

3 commits

Author SHA1 Message Date
Edwin Jose
08f675c70a
Merge branch 'main' into usage-data 2026-01-16 09:10:39 -05:00
phact
38072b27f5 usage frontend 2026-01-14 11:20:55 -05:00
phact
8c26e03114 add usage data to chat backend 2026-01-13 14:33:58 -05:00
7 changed files with 100 additions and 3 deletions

View file

@ -3,9 +3,10 @@ import { motion } from "motion/react";
import DogIcon from "@/components/icons/dog-icon"; import DogIcon from "@/components/icons/dog-icon";
import { MarkdownRenderer } from "@/components/markdown-renderer"; import { MarkdownRenderer } from "@/components/markdown-renderer";
import { cn } from "@/lib/utils"; import { cn } from "@/lib/utils";
import type { FunctionCall } from "../_types/types"; import type { FunctionCall, TokenUsage as TokenUsageType } from "../_types/types";
import { FunctionCalls } from "./function-calls"; import { FunctionCalls } from "./function-calls";
import { Message } from "./message"; import { Message } from "./message";
import { TokenUsage } from "./token-usage";
interface AssistantMessageProps { interface AssistantMessageProps {
content: string; content: string;
@ -21,6 +22,7 @@ interface AssistantMessageProps {
animate?: boolean; animate?: boolean;
delay?: number; delay?: number;
isInitialGreeting?: boolean; isInitialGreeting?: boolean;
usage?: TokenUsageType;
} }
export function AssistantMessage({ export function AssistantMessage({
@ -37,6 +39,7 @@ export function AssistantMessage({
animate = true, animate = true,
delay = 0.2, delay = 0.2,
isInitialGreeting = false, isInitialGreeting = false,
usage,
}: AssistantMessageProps) { }: AssistantMessageProps) {
return ( return (
<motion.div <motion.div
@ -135,6 +138,7 @@ export function AssistantMessage({
: content : content
} }
/> />
{usage && !isStreaming && <TokenUsage usage={usage} />}
</motion.div> </motion.div>
</div> </div>
</Message> </Message>

View file

@ -0,0 +1,27 @@
import { Zap } from "lucide-react";
import type { TokenUsage as TokenUsageType } from "../_types/types";
interface TokenUsageProps {
usage: TokenUsageType;
}
export function TokenUsage({ usage }: TokenUsageProps) {
// Guard against partial/malformed usage data
if (typeof usage.input_tokens !== "number" || typeof usage.output_tokens !== "number") {
return null;
}
return (
<div className="flex items-center gap-2 mt-2 text-xs text-muted-foreground">
<Zap className="h-3 w-3" />
<span>
{usage.input_tokens.toLocaleString()} in / {usage.output_tokens.toLocaleString()} out
{usage.input_tokens_details?.cached_tokens ? (
<span className="text-green-500 ml-1">
({usage.input_tokens_details.cached_tokens.toLocaleString()} cached)
</span>
) : null}
</span>
</div>
);
}

View file

@ -1,3 +1,15 @@
export interface TokenUsage {
input_tokens: number;
output_tokens: number;
total_tokens: number;
input_tokens_details?: {
cached_tokens?: number;
};
output_tokens_details?: {
reasoning_tokens?: number;
};
}
export interface Message { export interface Message {
role: "user" | "assistant"; role: "user" | "assistant";
content: string; content: string;
@ -5,6 +17,7 @@ export interface Message {
functionCalls?: FunctionCall[]; functionCalls?: FunctionCall[];
isStreaming?: boolean; isStreaming?: boolean;
source?: "langflow" | "chat"; source?: "langflow" | "chat";
usage?: TokenUsage;
} }
export interface FunctionCall { export interface FunctionCall {

View file

@ -501,6 +501,17 @@ function ChatPage() {
} else { } else {
console.log("No function calls found in message"); console.log("No function calls found in message");
} }
// Extract usage data from response_data
if (msg.response_data && typeof msg.response_data === "object") {
const responseData =
typeof msg.response_data === "string"
? JSON.parse(msg.response_data)
: msg.response_data;
if (responseData.usage) {
message.usage = responseData.usage;
}
}
} }
return message; return message;
@ -849,6 +860,7 @@ function ChatPage() {
role: "assistant", role: "assistant",
content: result.response, content: result.response,
timestamp: new Date(), timestamp: new Date(),
usage: result.usage,
}; };
setMessages((prev) => [...prev, assistantMessage]); setMessages((prev) => [...prev, assistantMessage]);
if (result.response_id) { if (result.response_id) {
@ -1164,6 +1176,7 @@ function ChatPage() {
messages.length === 1 && messages.length === 1 &&
message.content === "How can I assist?" message.content === "How can I assist?"
} }
usage={message.usage}
/> />
</div> </div>
), ),

View file

@ -3,6 +3,7 @@ import type {
FunctionCall, FunctionCall,
Message, Message,
SelectedFilters, SelectedFilters,
TokenUsage,
} from "@/app/chat/_types/types"; } from "@/app/chat/_types/types";
import { useChat } from "@/contexts/chat-context"; import { useChat } from "@/contexts/chat-context";
@ -130,6 +131,7 @@ export function useChatStreaming({
let currentContent = ""; let currentContent = "";
const currentFunctionCalls: FunctionCall[] = []; const currentFunctionCalls: FunctionCall[] = [];
let newResponseId: string | null = null; let newResponseId: string | null = null;
let usageData: TokenUsage | undefined;
// Initialize streaming message // Initialize streaming message
if (!controller.signal.aborted && thisStreamId === streamIdRef.current) { if (!controller.signal.aborted && thisStreamId === streamIdRef.current) {
@ -448,6 +450,10 @@ export function useChatStreaming({
else if (chunk.type === "response.output_text.delta") { else if (chunk.type === "response.output_text.delta") {
currentContent += chunk.delta || ""; currentContent += chunk.delta || "";
} }
// Handle response.completed event - capture usage
else if (chunk.type === "response.completed" && chunk.response?.usage) {
usageData = chunk.response.usage;
}
// Handle OpenRAG backend format // Handle OpenRAG backend format
else if (chunk.output_text) { else if (chunk.output_text) {
currentContent += chunk.output_text; currentContent += chunk.output_text;
@ -567,6 +573,7 @@ export function useChatStreaming({
currentFunctionCalls.length > 0 ? currentFunctionCalls : undefined, currentFunctionCalls.length > 0 ? currentFunctionCalls : undefined,
timestamp: new Date(), timestamp: new Date(),
isStreaming: false, isStreaming: false,
usage: usageData,
}; };
if (!controller.signal.aborted && thisStreamId === streamIdRef.current) { if (!controller.signal.aborted && thisStreamId === streamIdRef.current) {

View file

@ -197,6 +197,18 @@ async def async_response_stream(
sample_data=str(potential_tool_fields)[:500] sample_data=str(potential_tool_fields)[:500]
) )
# Detect response.completed event and log usage
if isinstance(chunk_data, dict) and chunk_data.get("type") == "response.completed":
response_data = chunk_data.get("response", {})
usage = response_data.get("usage")
if usage:
logger.info(
"Stream usage data",
input_tokens=usage.get("input_tokens"),
output_tokens=usage.get("output_tokens"),
total_tokens=usage.get("total_tokens"),
)
# Middleware: Detect implicit tool calls and inject standardized events # Middleware: Detect implicit tool calls and inject standardized events
# This helps Granite 3.3 8b and other models that don't emit standard markers # This helps Granite 3.3 8b and other models that don't emit standard markers
if isinstance(chunk_data, dict) and not detected_tool_call: if isinstance(chunk_data, dict) and not detected_tool_call:
@ -487,6 +499,7 @@ async def async_chat_stream(
full_response = "" full_response = ""
response_id = None response_id = None
usage_data = None
async for chunk in async_stream( async for chunk in async_stream(
async_client, async_client,
prompt, prompt,
@ -506,6 +519,10 @@ async def async_chat_stream(
response_id = chunk_data["id"] response_id = chunk_data["id"]
elif "response_id" in chunk_data: elif "response_id" in chunk_data:
response_id = chunk_data["response_id"] response_id = chunk_data["response_id"]
# Capture usage from response.completed event
if chunk_data.get("type") == "response.completed":
response_obj = chunk_data.get("response", {})
usage_data = response_obj.get("usage")
except: except:
pass pass
yield chunk yield chunk
@ -518,6 +535,9 @@ async def async_chat_stream(
"response_id": response_id, "response_id": response_id,
"timestamp": datetime.now(), "timestamp": datetime.now(),
} }
# Store usage data if available (from response.completed event)
if usage_data:
assistant_message["response_data"] = {"usage": usage_data}
conversation_state["messages"].append(assistant_message) conversation_state["messages"].append(assistant_message)
# Store the conversation thread with its response_id # Store the conversation thread with its response_id
@ -676,6 +696,7 @@ async def async_langflow_chat_stream(
full_response = "" full_response = ""
response_id = None response_id = None
usage_data = None
collected_chunks = [] # Store all chunks for function call data collected_chunks = [] # Store all chunks for function call data
async for chunk in async_stream( async for chunk in async_stream(
@ -700,6 +721,10 @@ async def async_langflow_chat_stream(
response_id = chunk_data["id"] response_id = chunk_data["id"]
elif "response_id" in chunk_data: elif "response_id" in chunk_data:
response_id = chunk_data["response_id"] response_id = chunk_data["response_id"]
# Capture usage from response.completed event
if chunk_data.get("type") == "response.completed":
response_obj = chunk_data.get("response", {})
usage_data = response_obj.get("usage")
except: except:
pass pass
yield chunk yield chunk
@ -713,6 +738,9 @@ async def async_langflow_chat_stream(
"timestamp": datetime.now(), "timestamp": datetime.now(),
"chunks": collected_chunks, # Store complete chunk data for function calls "chunks": collected_chunks, # Store complete chunk data for function calls
} }
# Store usage data if available (from response.completed event)
if usage_data:
assistant_message["response_data"] = {"usage": usage_data}
conversation_state["messages"].append(assistant_message) conversation_state["messages"].append(assistant_message)
# Store the conversation thread with its response_id # Store the conversation thread with its response_id

View file

@ -239,11 +239,16 @@ async def chat_get_endpoint(request: Request, chat_service, session_manager):
# Transform to public API format # Transform to public API format
messages = [] messages = []
for msg in conversation.get("messages", []): for msg in conversation.get("messages", []):
messages.append({ message_data = {
"role": msg.get("role"), "role": msg.get("role"),
"content": msg.get("content"), "content": msg.get("content"),
"timestamp": msg.get("timestamp"), "timestamp": msg.get("timestamp"),
}) }
# Include token usage if available (from Responses API)
usage = msg.get("response_data", {}).get("usage") if isinstance(msg.get("response_data"), dict) else None
if usage:
message_data["usage"] = usage
messages.append(message_data)
response_data = { response_data = {
"chat_id": conversation.get("response_id"), "chat_id": conversation.get("response_id"),