Compare commits
3 commits
main
...
usage-data
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
08f675c70a | ||
|
|
38072b27f5 | ||
|
|
8c26e03114 |
7 changed files with 100 additions and 3 deletions
|
|
@ -3,9 +3,10 @@ import { motion } from "motion/react";
|
||||||
import DogIcon from "@/components/icons/dog-icon";
|
import DogIcon from "@/components/icons/dog-icon";
|
||||||
import { MarkdownRenderer } from "@/components/markdown-renderer";
|
import { MarkdownRenderer } from "@/components/markdown-renderer";
|
||||||
import { cn } from "@/lib/utils";
|
import { cn } from "@/lib/utils";
|
||||||
import type { FunctionCall } from "../_types/types";
|
import type { FunctionCall, TokenUsage as TokenUsageType } from "../_types/types";
|
||||||
import { FunctionCalls } from "./function-calls";
|
import { FunctionCalls } from "./function-calls";
|
||||||
import { Message } from "./message";
|
import { Message } from "./message";
|
||||||
|
import { TokenUsage } from "./token-usage";
|
||||||
|
|
||||||
interface AssistantMessageProps {
|
interface AssistantMessageProps {
|
||||||
content: string;
|
content: string;
|
||||||
|
|
@ -21,6 +22,7 @@ interface AssistantMessageProps {
|
||||||
animate?: boolean;
|
animate?: boolean;
|
||||||
delay?: number;
|
delay?: number;
|
||||||
isInitialGreeting?: boolean;
|
isInitialGreeting?: boolean;
|
||||||
|
usage?: TokenUsageType;
|
||||||
}
|
}
|
||||||
|
|
||||||
export function AssistantMessage({
|
export function AssistantMessage({
|
||||||
|
|
@ -37,6 +39,7 @@ export function AssistantMessage({
|
||||||
animate = true,
|
animate = true,
|
||||||
delay = 0.2,
|
delay = 0.2,
|
||||||
isInitialGreeting = false,
|
isInitialGreeting = false,
|
||||||
|
usage,
|
||||||
}: AssistantMessageProps) {
|
}: AssistantMessageProps) {
|
||||||
return (
|
return (
|
||||||
<motion.div
|
<motion.div
|
||||||
|
|
@ -135,6 +138,7 @@ export function AssistantMessage({
|
||||||
: content
|
: content
|
||||||
}
|
}
|
||||||
/>
|
/>
|
||||||
|
{usage && !isStreaming && <TokenUsage usage={usage} />}
|
||||||
</motion.div>
|
</motion.div>
|
||||||
</div>
|
</div>
|
||||||
</Message>
|
</Message>
|
||||||
|
|
|
||||||
27
frontend/app/chat/_components/token-usage.tsx
Normal file
27
frontend/app/chat/_components/token-usage.tsx
Normal file
|
|
@ -0,0 +1,27 @@
|
||||||
|
import { Zap } from "lucide-react";
|
||||||
|
import type { TokenUsage as TokenUsageType } from "../_types/types";
|
||||||
|
|
||||||
|
interface TokenUsageProps {
|
||||||
|
usage: TokenUsageType;
|
||||||
|
}
|
||||||
|
|
||||||
|
export function TokenUsage({ usage }: TokenUsageProps) {
|
||||||
|
// Guard against partial/malformed usage data
|
||||||
|
if (typeof usage.input_tokens !== "number" || typeof usage.output_tokens !== "number") {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
return (
|
||||||
|
<div className="flex items-center gap-2 mt-2 text-xs text-muted-foreground">
|
||||||
|
<Zap className="h-3 w-3" />
|
||||||
|
<span>
|
||||||
|
{usage.input_tokens.toLocaleString()} in / {usage.output_tokens.toLocaleString()} out
|
||||||
|
{usage.input_tokens_details?.cached_tokens ? (
|
||||||
|
<span className="text-green-500 ml-1">
|
||||||
|
({usage.input_tokens_details.cached_tokens.toLocaleString()} cached)
|
||||||
|
</span>
|
||||||
|
) : null}
|
||||||
|
</span>
|
||||||
|
</div>
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
@ -1,3 +1,15 @@
|
||||||
|
export interface TokenUsage {
|
||||||
|
input_tokens: number;
|
||||||
|
output_tokens: number;
|
||||||
|
total_tokens: number;
|
||||||
|
input_tokens_details?: {
|
||||||
|
cached_tokens?: number;
|
||||||
|
};
|
||||||
|
output_tokens_details?: {
|
||||||
|
reasoning_tokens?: number;
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
export interface Message {
|
export interface Message {
|
||||||
role: "user" | "assistant";
|
role: "user" | "assistant";
|
||||||
content: string;
|
content: string;
|
||||||
|
|
@ -5,6 +17,7 @@ export interface Message {
|
||||||
functionCalls?: FunctionCall[];
|
functionCalls?: FunctionCall[];
|
||||||
isStreaming?: boolean;
|
isStreaming?: boolean;
|
||||||
source?: "langflow" | "chat";
|
source?: "langflow" | "chat";
|
||||||
|
usage?: TokenUsage;
|
||||||
}
|
}
|
||||||
|
|
||||||
export interface FunctionCall {
|
export interface FunctionCall {
|
||||||
|
|
|
||||||
|
|
@ -501,6 +501,17 @@ function ChatPage() {
|
||||||
} else {
|
} else {
|
||||||
console.log("No function calls found in message");
|
console.log("No function calls found in message");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Extract usage data from response_data
|
||||||
|
if (msg.response_data && typeof msg.response_data === "object") {
|
||||||
|
const responseData =
|
||||||
|
typeof msg.response_data === "string"
|
||||||
|
? JSON.parse(msg.response_data)
|
||||||
|
: msg.response_data;
|
||||||
|
if (responseData.usage) {
|
||||||
|
message.usage = responseData.usage;
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return message;
|
return message;
|
||||||
|
|
@ -849,6 +860,7 @@ function ChatPage() {
|
||||||
role: "assistant",
|
role: "assistant",
|
||||||
content: result.response,
|
content: result.response,
|
||||||
timestamp: new Date(),
|
timestamp: new Date(),
|
||||||
|
usage: result.usage,
|
||||||
};
|
};
|
||||||
setMessages((prev) => [...prev, assistantMessage]);
|
setMessages((prev) => [...prev, assistantMessage]);
|
||||||
if (result.response_id) {
|
if (result.response_id) {
|
||||||
|
|
@ -1164,6 +1176,7 @@ function ChatPage() {
|
||||||
messages.length === 1 &&
|
messages.length === 1 &&
|
||||||
message.content === "How can I assist?"
|
message.content === "How can I assist?"
|
||||||
}
|
}
|
||||||
|
usage={message.usage}
|
||||||
/>
|
/>
|
||||||
</div>
|
</div>
|
||||||
),
|
),
|
||||||
|
|
|
||||||
|
|
@ -3,6 +3,7 @@ import type {
|
||||||
FunctionCall,
|
FunctionCall,
|
||||||
Message,
|
Message,
|
||||||
SelectedFilters,
|
SelectedFilters,
|
||||||
|
TokenUsage,
|
||||||
} from "@/app/chat/_types/types";
|
} from "@/app/chat/_types/types";
|
||||||
import { useChat } from "@/contexts/chat-context";
|
import { useChat } from "@/contexts/chat-context";
|
||||||
|
|
||||||
|
|
@ -130,6 +131,7 @@ export function useChatStreaming({
|
||||||
let currentContent = "";
|
let currentContent = "";
|
||||||
const currentFunctionCalls: FunctionCall[] = [];
|
const currentFunctionCalls: FunctionCall[] = [];
|
||||||
let newResponseId: string | null = null;
|
let newResponseId: string | null = null;
|
||||||
|
let usageData: TokenUsage | undefined;
|
||||||
|
|
||||||
// Initialize streaming message
|
// Initialize streaming message
|
||||||
if (!controller.signal.aborted && thisStreamId === streamIdRef.current) {
|
if (!controller.signal.aborted && thisStreamId === streamIdRef.current) {
|
||||||
|
|
@ -448,6 +450,10 @@ export function useChatStreaming({
|
||||||
else if (chunk.type === "response.output_text.delta") {
|
else if (chunk.type === "response.output_text.delta") {
|
||||||
currentContent += chunk.delta || "";
|
currentContent += chunk.delta || "";
|
||||||
}
|
}
|
||||||
|
// Handle response.completed event - capture usage
|
||||||
|
else if (chunk.type === "response.completed" && chunk.response?.usage) {
|
||||||
|
usageData = chunk.response.usage;
|
||||||
|
}
|
||||||
// Handle OpenRAG backend format
|
// Handle OpenRAG backend format
|
||||||
else if (chunk.output_text) {
|
else if (chunk.output_text) {
|
||||||
currentContent += chunk.output_text;
|
currentContent += chunk.output_text;
|
||||||
|
|
@ -567,6 +573,7 @@ export function useChatStreaming({
|
||||||
currentFunctionCalls.length > 0 ? currentFunctionCalls : undefined,
|
currentFunctionCalls.length > 0 ? currentFunctionCalls : undefined,
|
||||||
timestamp: new Date(),
|
timestamp: new Date(),
|
||||||
isStreaming: false,
|
isStreaming: false,
|
||||||
|
usage: usageData,
|
||||||
};
|
};
|
||||||
|
|
||||||
if (!controller.signal.aborted && thisStreamId === streamIdRef.current) {
|
if (!controller.signal.aborted && thisStreamId === streamIdRef.current) {
|
||||||
|
|
|
||||||
28
src/agent.py
28
src/agent.py
|
|
@ -197,6 +197,18 @@ async def async_response_stream(
|
||||||
sample_data=str(potential_tool_fields)[:500]
|
sample_data=str(potential_tool_fields)[:500]
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# Detect response.completed event and log usage
|
||||||
|
if isinstance(chunk_data, dict) and chunk_data.get("type") == "response.completed":
|
||||||
|
response_data = chunk_data.get("response", {})
|
||||||
|
usage = response_data.get("usage")
|
||||||
|
if usage:
|
||||||
|
logger.info(
|
||||||
|
"Stream usage data",
|
||||||
|
input_tokens=usage.get("input_tokens"),
|
||||||
|
output_tokens=usage.get("output_tokens"),
|
||||||
|
total_tokens=usage.get("total_tokens"),
|
||||||
|
)
|
||||||
|
|
||||||
# Middleware: Detect implicit tool calls and inject standardized events
|
# Middleware: Detect implicit tool calls and inject standardized events
|
||||||
# This helps Granite 3.3 8b and other models that don't emit standard markers
|
# This helps Granite 3.3 8b and other models that don't emit standard markers
|
||||||
if isinstance(chunk_data, dict) and not detected_tool_call:
|
if isinstance(chunk_data, dict) and not detected_tool_call:
|
||||||
|
|
@ -487,6 +499,7 @@ async def async_chat_stream(
|
||||||
|
|
||||||
full_response = ""
|
full_response = ""
|
||||||
response_id = None
|
response_id = None
|
||||||
|
usage_data = None
|
||||||
async for chunk in async_stream(
|
async for chunk in async_stream(
|
||||||
async_client,
|
async_client,
|
||||||
prompt,
|
prompt,
|
||||||
|
|
@ -506,6 +519,10 @@ async def async_chat_stream(
|
||||||
response_id = chunk_data["id"]
|
response_id = chunk_data["id"]
|
||||||
elif "response_id" in chunk_data:
|
elif "response_id" in chunk_data:
|
||||||
response_id = chunk_data["response_id"]
|
response_id = chunk_data["response_id"]
|
||||||
|
# Capture usage from response.completed event
|
||||||
|
if chunk_data.get("type") == "response.completed":
|
||||||
|
response_obj = chunk_data.get("response", {})
|
||||||
|
usage_data = response_obj.get("usage")
|
||||||
except:
|
except:
|
||||||
pass
|
pass
|
||||||
yield chunk
|
yield chunk
|
||||||
|
|
@ -518,6 +535,9 @@ async def async_chat_stream(
|
||||||
"response_id": response_id,
|
"response_id": response_id,
|
||||||
"timestamp": datetime.now(),
|
"timestamp": datetime.now(),
|
||||||
}
|
}
|
||||||
|
# Store usage data if available (from response.completed event)
|
||||||
|
if usage_data:
|
||||||
|
assistant_message["response_data"] = {"usage": usage_data}
|
||||||
conversation_state["messages"].append(assistant_message)
|
conversation_state["messages"].append(assistant_message)
|
||||||
|
|
||||||
# Store the conversation thread with its response_id
|
# Store the conversation thread with its response_id
|
||||||
|
|
@ -676,6 +696,7 @@ async def async_langflow_chat_stream(
|
||||||
|
|
||||||
full_response = ""
|
full_response = ""
|
||||||
response_id = None
|
response_id = None
|
||||||
|
usage_data = None
|
||||||
collected_chunks = [] # Store all chunks for function call data
|
collected_chunks = [] # Store all chunks for function call data
|
||||||
|
|
||||||
async for chunk in async_stream(
|
async for chunk in async_stream(
|
||||||
|
|
@ -700,6 +721,10 @@ async def async_langflow_chat_stream(
|
||||||
response_id = chunk_data["id"]
|
response_id = chunk_data["id"]
|
||||||
elif "response_id" in chunk_data:
|
elif "response_id" in chunk_data:
|
||||||
response_id = chunk_data["response_id"]
|
response_id = chunk_data["response_id"]
|
||||||
|
# Capture usage from response.completed event
|
||||||
|
if chunk_data.get("type") == "response.completed":
|
||||||
|
response_obj = chunk_data.get("response", {})
|
||||||
|
usage_data = response_obj.get("usage")
|
||||||
except:
|
except:
|
||||||
pass
|
pass
|
||||||
yield chunk
|
yield chunk
|
||||||
|
|
@ -713,6 +738,9 @@ async def async_langflow_chat_stream(
|
||||||
"timestamp": datetime.now(),
|
"timestamp": datetime.now(),
|
||||||
"chunks": collected_chunks, # Store complete chunk data for function calls
|
"chunks": collected_chunks, # Store complete chunk data for function calls
|
||||||
}
|
}
|
||||||
|
# Store usage data if available (from response.completed event)
|
||||||
|
if usage_data:
|
||||||
|
assistant_message["response_data"] = {"usage": usage_data}
|
||||||
conversation_state["messages"].append(assistant_message)
|
conversation_state["messages"].append(assistant_message)
|
||||||
|
|
||||||
# Store the conversation thread with its response_id
|
# Store the conversation thread with its response_id
|
||||||
|
|
|
||||||
|
|
@ -239,11 +239,16 @@ async def chat_get_endpoint(request: Request, chat_service, session_manager):
|
||||||
# Transform to public API format
|
# Transform to public API format
|
||||||
messages = []
|
messages = []
|
||||||
for msg in conversation.get("messages", []):
|
for msg in conversation.get("messages", []):
|
||||||
messages.append({
|
message_data = {
|
||||||
"role": msg.get("role"),
|
"role": msg.get("role"),
|
||||||
"content": msg.get("content"),
|
"content": msg.get("content"),
|
||||||
"timestamp": msg.get("timestamp"),
|
"timestamp": msg.get("timestamp"),
|
||||||
})
|
}
|
||||||
|
# Include token usage if available (from Responses API)
|
||||||
|
usage = msg.get("response_data", {}).get("usage") if isinstance(msg.get("response_data"), dict) else None
|
||||||
|
if usage:
|
||||||
|
message_data["usage"] = usage
|
||||||
|
messages.append(message_data)
|
||||||
|
|
||||||
response_data = {
|
response_data = {
|
||||||
"chat_id": conversation.get("response_id"),
|
"chat_id": conversation.get("response_id"),
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue