streaming and tool calling in ui

2025-07-18 14:10:11 -04:00 · 2025-07-18 14:10:11 -04:00 · cb99a3b7ec
commit cb99a3b7ec
parent 47e56d6893
5 changed files with 661 additions and 81 deletions
--- a/frontend/src/app/chat/page.tsx
+++ b/frontend/src/app/chat/page.tsx
@ -4,12 +4,22 @@ import { useState, useRef, useEffect } from "react"
 import { Button } from "@/components/ui/button"
 import { Card, CardContent, CardDescription, CardHeader, CardTitle } from "@/components/ui/card"
 import { Input } from "@/components/ui/input"
-import { MessageCircle, Send, Loader2, User, Bot } from "lucide-react"
+import { MessageCircle, Send, Loader2, User, Bot, Zap, Settings, ChevronDown, ChevronRight } from "lucide-react"

 interface Message {
  role: "user" | "assistant"
  content: string
  timestamp: Date
+  functionCalls?: FunctionCall[]
+  isStreaming?: boolean
+}
+
+interface FunctionCall {
+  name: string
+  arguments?: Record<string, unknown>
+  result?: Record<string, unknown>
+  status: "pending" | "completed" | "error"
+  argumentsString?: string
 }

 type EndpointType = "chat" | "langflow"
@ -19,6 +29,13 @@ export default function ChatPage() {
  const [input, setInput] = useState("")
  const [loading, setLoading] = useState(false)
  const [endpoint, setEndpoint] = useState<EndpointType>("chat")
+  const [asyncMode, setAsyncMode] = useState(false)
+  const [streamingMessage, setStreamingMessage] = useState<{
+    content: string
+    functionCalls: FunctionCall[]
+    timestamp: Date
+  } | null>(null)
+  const [expandedFunctionCalls, setExpandedFunctionCalls] = useState<Set<string>>(new Set())
  const messagesEndRef = useRef<HTMLDivElement>(null)

  const scrollToBottom = () => {
@ -27,7 +44,310 @@ export default function ChatPage() {

  useEffect(() => {
    scrollToBottom()
-  }, [messages])
+  }, [messages, streamingMessage])
+
+  const handleSSEStream = async (userMessage: Message) => {
+    const apiEndpoint = endpoint === "chat" ? "/api/chat" : "/api/langflow"
+    
+    try {
+      const response = await fetch(apiEndpoint, {
+        method: "POST",
+        headers: {
+          "Content-Type": "application/json",
+        },
+        body: JSON.stringify({ 
+          prompt: userMessage.content,
+          stream: true 
+        }),
+      })
+
+      if (!response.ok) {
+        throw new Error(`HTTP error! status: ${response.status}`)
+      }
+
+      const reader = response.body?.getReader()
+      if (!reader) {
+        throw new Error("No reader available")
+      }
+
+      const decoder = new TextDecoder()
+      let buffer = ""
+      let currentContent = ""
+      const currentFunctionCalls: FunctionCall[] = []
+      
+      // Initialize streaming message
+      setStreamingMessage({
+        content: "",
+        functionCalls: [],
+        timestamp: new Date()
+      })
+
+      try {
+        while (true) {
+          const { done, value } = await reader.read()
+          
+          if (done) break
+          
+          buffer += decoder.decode(value, { stream: true })
+          
+          // Process complete lines (JSON objects)
+          const lines = buffer.split('\n')
+          buffer = lines.pop() || "" // Keep incomplete line in buffer
+          
+          for (const line of lines) {
+            if (line.trim()) {
+              try {
+                const chunk = JSON.parse(line)
+                console.log("Received chunk:", chunk.type || chunk.object, chunk)
+                
+                // Handle OpenAI Chat Completions streaming format
+                if (chunk.object === "response.chunk" && chunk.delta) {
+                  // Handle function calls in delta
+                  if (chunk.delta.function_call) {
+                    console.log("Function call in delta:", chunk.delta.function_call)
+                    
+                    // Check if this is a new function call
+                    if (chunk.delta.function_call.name) {
+                      console.log("New function call:", chunk.delta.function_call.name)
+                      const functionCall: FunctionCall = {
+                        name: chunk.delta.function_call.name,
+                        arguments: undefined,
+                        status: "pending",
+                        argumentsString: chunk.delta.function_call.arguments || ""
+                      }
+                      currentFunctionCalls.push(functionCall)
+                      console.log("Added function call:", functionCall)
+                    }
+                    // Or if this is arguments continuation
+                    else if (chunk.delta.function_call.arguments) {
+                      console.log("Function call arguments delta:", chunk.delta.function_call.arguments)
+                      const lastFunctionCall = currentFunctionCalls[currentFunctionCalls.length - 1]
+                      if (lastFunctionCall) {
+                        if (!lastFunctionCall.argumentsString) {
+                          lastFunctionCall.argumentsString = ""
+                        }
+                        lastFunctionCall.argumentsString += chunk.delta.function_call.arguments
+                        console.log("Accumulated arguments:", lastFunctionCall.argumentsString)
+                        
+                        // Try to parse arguments if they look complete
+                        if (lastFunctionCall.argumentsString.includes("}")) {
+                          try {
+                            const parsed = JSON.parse(lastFunctionCall.argumentsString)
+                            lastFunctionCall.arguments = parsed
+                            lastFunctionCall.status = "completed"
+                            console.log("Parsed function arguments:", parsed)
+                          } catch (e) {
+                            console.log("Arguments not yet complete or invalid JSON:", e)
+                          }
+                        }
+                      }
+                    }
+                  }
+                  
+                  // Handle tool calls in delta  
+                  else if (chunk.delta.tool_calls && Array.isArray(chunk.delta.tool_calls)) {
+                    console.log("Tool calls in delta:", chunk.delta.tool_calls)
+                    
+                    for (const toolCall of chunk.delta.tool_calls) {
+                      if (toolCall.function) {
+                        // Check if this is a new tool call
+                        if (toolCall.function.name) {
+                          console.log("New tool call:", toolCall.function.name)
+                          const functionCall: FunctionCall = {
+                            name: toolCall.function.name,
+                            arguments: undefined,
+                            status: "pending",
+                            argumentsString: toolCall.function.arguments || ""
+                          }
+                          currentFunctionCalls.push(functionCall)
+                          console.log("Added tool call:", functionCall)
+                        }
+                        // Or if this is arguments continuation
+                        else if (toolCall.function.arguments) {
+                          console.log("Tool call arguments delta:", toolCall.function.arguments)
+                          const lastFunctionCall = currentFunctionCalls[currentFunctionCalls.length - 1]
+                          if (lastFunctionCall) {
+                            if (!lastFunctionCall.argumentsString) {
+                              lastFunctionCall.argumentsString = ""
+                            }
+                            lastFunctionCall.argumentsString += toolCall.function.arguments
+                            console.log("Accumulated tool arguments:", lastFunctionCall.argumentsString)
+                            
+                            // Try to parse arguments if they look complete
+                            if (lastFunctionCall.argumentsString.includes("}")) {
+                              try {
+                                const parsed = JSON.parse(lastFunctionCall.argumentsString)
+                                lastFunctionCall.arguments = parsed
+                                lastFunctionCall.status = "completed"
+                                console.log("Parsed tool arguments:", parsed)
+                              } catch (e) {
+                                console.log("Tool arguments not yet complete or invalid JSON:", e)
+                              }
+                            }
+                          }
+                        }
+                      }
+                    }
+                  }
+                  
+                  // Handle content/text in delta
+                  else if (chunk.delta.content) {
+                    console.log("Content delta:", chunk.delta.content)
+                    currentContent += chunk.delta.content
+                  }
+                  
+                  // Handle finish reason
+                  if (chunk.delta.finish_reason) {
+                    console.log("Finish reason:", chunk.delta.finish_reason)
+                    // Mark any pending function calls as completed
+                    currentFunctionCalls.forEach(fc => {
+                      if (fc.status === "pending" && fc.argumentsString) {
+                        try {
+                          fc.arguments = JSON.parse(fc.argumentsString)
+                          fc.status = "completed"
+                          console.log("Completed function call on finish:", fc)
+                        } catch (e) {
+                          fc.arguments = { raw: fc.argumentsString }
+                          fc.status = "error"
+                          console.log("Error parsing function call on finish:", fc, e)
+                        }
+                      }
+                    })
+                  }
+                }
+                
+                // Handle Realtime API format (this is what you're actually getting!)
+                else if (chunk.type === "response.output_item.added" && chunk.item?.type === "function_call") {
+                  console.log("Function call started (Realtime API):", chunk.item.name)
+                  const functionCall: FunctionCall = {
+                    name: chunk.item.name || "unknown",
+                    arguments: undefined,
+                    status: "pending",
+                    argumentsString: ""
+                  }
+                  currentFunctionCalls.push(functionCall)
+                }
+                
+                // Handle function call arguments streaming (Realtime API)
+                else if (chunk.type === "response.function_call_arguments.delta") {
+                  console.log("Function args delta (Realtime API):", chunk.delta)
+                  const lastFunctionCall = currentFunctionCalls[currentFunctionCalls.length - 1]
+                  if (lastFunctionCall) {
+                    if (!lastFunctionCall.argumentsString) {
+                      lastFunctionCall.argumentsString = ""
+                    }
+                    lastFunctionCall.argumentsString += chunk.delta || ""
+                    console.log("Accumulated arguments (Realtime API):", lastFunctionCall.argumentsString)
+                  }
+                }
+                
+                // Handle function call arguments completion (Realtime API)
+                else if (chunk.type === "response.function_call_arguments.done") {
+                  console.log("Function args done (Realtime API):", chunk.arguments)
+                  const lastFunctionCall = currentFunctionCalls[currentFunctionCalls.length - 1]
+                  if (lastFunctionCall) {
+                    try {
+                      lastFunctionCall.arguments = JSON.parse(chunk.arguments || "{}")
+                      lastFunctionCall.status = "completed"
+                      console.log("Parsed function arguments (Realtime API):", lastFunctionCall.arguments)
+                    } catch (e) {
+                      lastFunctionCall.arguments = { raw: chunk.arguments }
+                      lastFunctionCall.status = "error"
+                      console.log("Error parsing function arguments (Realtime API):", e)
+                    }
+                  }
+                }
+                
+                // Handle function call completion (Realtime API)
+                else if (chunk.type === "response.output_item.done" && chunk.item?.type === "function_call") {
+                  console.log("Function call done (Realtime API):", chunk.item.status)
+                  const lastFunctionCall = currentFunctionCalls[currentFunctionCalls.length - 1]
+                  if (lastFunctionCall) {
+                    lastFunctionCall.status = chunk.item.status === "completed" ? "completed" : "error"
+                  }
+                }
+                
+                // Handle function call results
+                else if (chunk.type === "response.function_call.result" || chunk.type === "function_call_result") {
+                  console.log("Function call result:", chunk.result || chunk)
+                  const lastFunctionCall = currentFunctionCalls[currentFunctionCalls.length - 1]
+                  if (lastFunctionCall) {
+                    lastFunctionCall.result = chunk.result || chunk.output || chunk.response
+                    lastFunctionCall.status = "completed"
+                  }
+                }
+                
+                // Handle tool call results  
+                else if (chunk.type === "response.tool_call.result" || chunk.type === "tool_call_result") {
+                  console.log("Tool call result:", chunk.result || chunk)
+                  const lastFunctionCall = currentFunctionCalls[currentFunctionCalls.length - 1]
+                  if (lastFunctionCall) {
+                    lastFunctionCall.result = chunk.result || chunk.output || chunk.response
+                    lastFunctionCall.status = "completed" 
+                  }
+                }
+                
+                // Handle generic results that might be in different formats
+                else if ((chunk.type && chunk.type.includes("result")) || chunk.result) {
+                  console.log("Generic result:", chunk)
+                  const lastFunctionCall = currentFunctionCalls[currentFunctionCalls.length - 1]
+                  if (lastFunctionCall && !lastFunctionCall.result) {
+                    lastFunctionCall.result = chunk.result || chunk.output || chunk.response || chunk
+                    lastFunctionCall.status = "completed"
+                  }
+                }
+                
+                // Handle text output streaming (Realtime API)
+                else if (chunk.type === "response.output_text.delta") {
+                  console.log("Text delta (Realtime API):", chunk.delta)
+                  currentContent += chunk.delta || ""
+                }
+                
+                // Log unhandled chunks
+                else if (chunk.type !== null && chunk.object !== "response.chunk") {
+                  console.log("Unhandled chunk format:", chunk)
+                }
+                
+                // Update streaming message
+                setStreamingMessage({
+                  content: currentContent,
+                  functionCalls: [...currentFunctionCalls],
+                  timestamp: new Date()
+                })
+                
+              } catch (parseError) {
+                console.warn("Failed to parse chunk:", line, parseError)
+              }
+            }
+          }
+        }
+      } finally {
+        reader.releaseLock()
+      }
+
+      // Finalize the message
+      const finalMessage: Message = {
+        role: "assistant",
+        content: currentContent,
+        functionCalls: currentFunctionCalls,
+        timestamp: new Date()
+      }
+      
+      setMessages(prev => [...prev, finalMessage])
+      setStreamingMessage(null)
+      
+    } catch (error) {
+      console.error("SSE Stream error:", error)
+      setStreamingMessage(null)
+      
+      const errorMessage: Message = {
+        role: "assistant",
+        content: "Sorry, I couldn't connect to the chat service. Please try again.",
+        timestamp: new Date()
+      }
+      setMessages(prev => [...prev, errorMessage])
+    }
+  }

  const handleSubmit = async (e: React.FormEvent) => {
    e.preventDefault()
@ -43,45 +363,127 @@ export default function ChatPage() {
    setInput("")
    setLoading(true)

-    try {
-      const apiEndpoint = endpoint === "chat" ? "/api/chat" : "/api/langflow"
-      const response = await fetch(apiEndpoint, {
-        method: "POST",
-        headers: {
-          "Content-Type": "application/json",
-        },
-        body: JSON.stringify({ prompt: userMessage.content }),
-      })
+    if (asyncMode) {
+      await handleSSEStream(userMessage)
+    } else {
+      // Original non-streaming logic
+      try {
+        const apiEndpoint = endpoint === "chat" ? "/api/chat" : "/api/langflow"
+        const response = await fetch(apiEndpoint, {
+          method: "POST",
+          headers: {
+            "Content-Type": "application/json",
+          },
+          body: JSON.stringify({ prompt: userMessage.content }),
+        })

-      const result = await response.json()
-      
-      if (response.ok) {
-        const assistantMessage: Message = {
-          role: "assistant",
-          content: result.response,
-          timestamp: new Date()
+        const result = await response.json()
+        
+        if (response.ok) {
+          const assistantMessage: Message = {
+            role: "assistant",
+            content: result.response,
+            timestamp: new Date()
+          }
+          setMessages(prev => [...prev, assistantMessage])
+        } else {
+          console.error("Chat failed:", result.error)
+          const errorMessage: Message = {
+            role: "assistant",
+            content: "Sorry, I encountered an error. Please try again.",
+            timestamp: new Date()
+          }
+          setMessages(prev => [...prev, errorMessage])
        }
-        setMessages(prev => [...prev, assistantMessage])
-      } else {
-        console.error("Chat failed:", result.error)
+      } catch (error) {
+        console.error("Chat error:", error)
        const errorMessage: Message = {
          role: "assistant",
-          content: "Sorry, I encountered an error. Please try again.",
+          content: "Sorry, I couldn't connect to the chat service. Please try again.",
          timestamp: new Date()
        }
        setMessages(prev => [...prev, errorMessage])
      }
-    } catch (error) {
-      console.error("Chat error:", error)
-      const errorMessage: Message = {
-        role: "assistant",
-        content: "Sorry, I couldn't connect to the chat service. Please try again.",
-        timestamp: new Date()
-      }
-      setMessages(prev => [...prev, errorMessage])
-    } finally {
-      setLoading(false)
    }
+    
+    setLoading(false)
+  }
+
+  const toggleFunctionCall = (functionCallId: string) => {
+    setExpandedFunctionCalls(prev => {
+      const newSet = new Set(prev)
+      if (newSet.has(functionCallId)) {
+        newSet.delete(functionCallId)
+      } else {
+        newSet.add(functionCallId)
+      }
+      return newSet
+    })
+  }
+
+  const renderFunctionCalls = (functionCalls: FunctionCall[], messageIndex?: number) => {
+    if (!functionCalls || functionCalls.length === 0) return null
+    
+    return (
+      <div className="mb-3 space-y-2">
+        {functionCalls.map((fc, index) => {
+          const functionCallId = `${messageIndex || 'streaming'}-${index}`
+          const isExpanded = expandedFunctionCalls.has(functionCallId)
+          
+          return (
+            <div key={index} className="rounded-lg bg-blue-500/10 border border-blue-500/20 p-3">
+              <div 
+                className="flex items-center gap-2 cursor-pointer hover:bg-blue-500/5 -m-3 p-3 rounded-lg transition-colors"
+                onClick={() => toggleFunctionCall(functionCallId)}
+              >
+                <Settings className="h-4 w-4 text-blue-400" />
+                <span className="text-sm font-medium text-blue-400 flex-1">
+                  Function Call: {fc.name}
+                </span>
+                <div className={`px-2 py-1 rounded text-xs font-medium ${
+                  fc.status === "completed" ? "bg-green-500/20 text-green-400" :
+                  fc.status === "error" ? "bg-red-500/20 text-red-400" :
+                  "bg-yellow-500/20 text-yellow-400"
+                }`}>
+                  {fc.status}
+                </div>
+                {isExpanded ? (
+                  <ChevronDown className="h-4 w-4 text-blue-400" />
+                ) : (
+                  <ChevronRight className="h-4 w-4 text-blue-400" />
+                )}
+              </div>
+              
+              {isExpanded && (
+                <div className="mt-3 pt-3 border-t border-blue-500/20">
+                  {/* Show arguments - either completed or streaming */}
+                  {(fc.arguments || fc.argumentsString) && (
+                    <div className="text-xs text-muted-foreground mb-3">
+                      <span className="font-medium">Arguments:</span>
+                      <pre className="mt-1 p-2 bg-muted/30 rounded text-xs overflow-x-auto">
+                        {fc.arguments 
+                          ? JSON.stringify(fc.arguments, null, 2)
+                          : fc.argumentsString || "..."
+                        }
+                      </pre>
+                    </div>
+                  )}
+                  
+                  {fc.result && (
+                    <div className="text-xs text-muted-foreground">
+                      <span className="font-medium">Result:</span>
+                      <pre className="mt-1 p-2 bg-muted/30 rounded text-xs overflow-x-auto">
+                        {JSON.stringify(fc.result, null, 2)}
+                      </pre>
+                    </div>
+                  )}
+                </div>
+              )}
+            </div>
+          )
+        })}
+      </div>
+    )
  }

  return (
@ -98,33 +500,57 @@ export default function ChatPage() {
              <MessageCircle className="h-5 w-5" />
              <CardTitle>Chat</CardTitle>
            </div>
-            <div className="flex items-center gap-2 bg-muted/50 rounded-lg p-1">
-              <Button
-                variant={endpoint === "chat" ? "default" : "ghost"}
-                size="sm"
-                onClick={() => setEndpoint("chat")}
-                className="h-7 text-xs"
-              >
-                Chat
-              </Button>
-              <Button
-                variant={endpoint === "langflow" ? "default" : "ghost"}
-                size="sm"
-                onClick={() => setEndpoint("langflow")}
-                className="h-7 text-xs"
-              >
-                Langflow
-              </Button>
+            <div className="flex items-center gap-4">
+              {/* Async Mode Toggle */}
+              <div className="flex items-center gap-2 bg-muted/50 rounded-lg p-1">
+                <Button
+                  variant={!asyncMode ? "default" : "ghost"}
+                  size="sm"
+                  onClick={() => setAsyncMode(false)}
+                  className="h-7 text-xs"
+                >
+                  Sync
+                </Button>
+                <Button
+                  variant={asyncMode ? "default" : "ghost"}
+                  size="sm"
+                  onClick={() => setAsyncMode(true)}
+                  className="h-7 text-xs"
+                >
+                  <Zap className="h-3 w-3 mr-1" />
+                  Async
+                </Button>
+              </div>
+              {/* Endpoint Toggle */}
+              <div className="flex items-center gap-2 bg-muted/50 rounded-lg p-1">
+                <Button
+                  variant={endpoint === "chat" ? "default" : "ghost"}
+                  size="sm"
+                  onClick={() => setEndpoint("chat")}
+                  className="h-7 text-xs"
+                >
+                  Chat
+                </Button>
+                <Button
+                  variant={endpoint === "langflow" ? "default" : "ghost"}
+                  size="sm"
+                  onClick={() => setEndpoint("langflow")}
+                  className="h-7 text-xs"
+                >
+                  Langflow
+                </Button>
+              </div>
            </div>
          </div>
          <CardDescription>
-            Chat with AI about your indexed documents using {endpoint === "chat" ? "Chat" : "Langflow"} endpoint
+            Chat with AI about your indexed documents using {endpoint === "chat" ? "Chat" : "Langflow"} endpoint 
+            {asyncMode ? " with real-time streaming" : ""}
          </CardDescription>
        </CardHeader>
        <CardContent className="flex-1 flex flex-col gap-4 min-h-0">
          {/* Messages Area */}
          <div className="flex-1 overflow-y-auto overflow-x-hidden space-y-6 p-4 rounded-lg bg-muted/20 min-h-0">
-            {messages.length === 0 ? (
+            {messages.length === 0 && !streamingMessage ? (
              <div className="flex items-center justify-center h-full text-muted-foreground">
                <div className="text-center">
                  <MessageCircle className="h-12 w-12 mx-auto mb-4 opacity-50" />
@ -168,6 +594,7 @@ export default function ChatPage() {
                                {message.timestamp.toLocaleTimeString()}
                              </span>
                            </div>
+                            {renderFunctionCalls(message.functionCalls || [], index)}
                            <p className="text-foreground whitespace-pre-wrap break-words overflow-wrap-anywhere">{message.content}</p>
                          </div>
                        </div>
@ -175,7 +602,37 @@ export default function ChatPage() {
                    )}
                  </div>
                ))}
-                {loading && (
+                
+                {/* Streaming Message Display */}
+                {streamingMessage && (
+                  <div className="space-y-2">
+                    <div className="flex items-center gap-2">
+                      <div className="w-8 h-8 rounded-lg bg-accent/20 flex items-center justify-center">
+                        <Bot className="h-4 w-4 text-accent-foreground" />
+                      </div>
+                      <span className="font-medium text-foreground">AI</span>
+                      <span className="text-sm text-muted-foreground">gpt-4.1</span>
+                    </div>
+                    <div className="pl-10 max-w-full">
+                      <div className="rounded-lg bg-card border border-border/40 p-4 max-w-full overflow-hidden">
+                        <div className="flex items-center gap-2 mb-2">
+                          <Loader2 className="w-4 h-4 animate-spin text-blue-400" />
+                          <span className="text-sm text-blue-400 font-medium">Streaming...</span>
+                          <span className="text-xs text-muted-foreground ml-auto">
+                            {streamingMessage.timestamp.toLocaleTimeString()}
+                          </span>
+                        </div>
+                        {renderFunctionCalls(streamingMessage.functionCalls, messages.length)}
+                        <p className="text-foreground whitespace-pre-wrap break-words overflow-wrap-anywhere">
+                          {streamingMessage.content}
+                          <span className="inline-block w-2 h-4 bg-blue-400 ml-1 animate-pulse"></span>
+                        </p>
+                      </div>
+                    </div>
+                  </div>
+                )}
+                
+                {loading && !asyncMode && (
                  <div className="space-y-2">
                    <div className="flex items-center gap-2">
                      <div className="w-8 h-8 rounded-lg bg-accent/20 flex items-center justify-center">
--- a/pyproject.toml
+++ b/pyproject.toml
@ -5,7 +5,7 @@ description = "Add your description here"
 readme = "README.md"
 requires-python = ">=3.13"
 dependencies = [
-    "agentd>=0.2.0.post3",
+    "agentd>=0.2.1",
    "aiofiles>=24.1.0",
    "docling>=2.41.0",
    "opensearch-py[async]>=3.0.0",
--- a/src/agent.py
+++ b/src/agent.py
@ -1,17 +1,118 @@
 messages = [{"role": "system", "content": "You are a helpful assistant. Always use the search_tools to answer questions."}]

-# Async version for web server
-async def async_chat(async_client, prompt: str, model: str = "gpt-4.1-mini", previous_response_id: str = None) -> str:
+# Generic async response function for streaming
+async def async_response_stream(client, prompt: str, model: str, previous_response_id: str = None, log_prefix: str = "response"):
+    print(f"user ==> {prompt}")
+    
+    try:
+        # Build request parameters
+        request_params = {
+            "model": model,
+            "input": prompt,
+            "stream": True
+        }
+        if previous_response_id is not None:
+            request_params["previous_response_id"] = previous_response_id
+        
+        response = await client.responses.create(**request_params)
+        
+        full_response = ""
+        chunk_count = 0
+        async for chunk in response:
+            chunk_count += 1
+            print(f"[DEBUG] Chunk {chunk_count}: {chunk}")
+            
+            # Yield the raw event as JSON for the UI to process
+            import json
+            
+            # Also extract text content for logging
+            if hasattr(chunk, 'output_text') and chunk.output_text:
+                full_response += chunk.output_text
+            elif hasattr(chunk, 'delta') and chunk.delta:
+                # Handle delta properly - it might be a dict or string
+                if isinstance(chunk.delta, dict):
+                    delta_text = chunk.delta.get('content', '') or chunk.delta.get('text', '') or str(chunk.delta)
+                else:
+                    delta_text = str(chunk.delta)
+                full_response += delta_text
+            
+            # Send the raw event as JSON followed by newline for easy parsing
+            try:
+                # Try to serialize the chunk object
+                if hasattr(chunk, 'model_dump'):
+                    # Pydantic model
+                    chunk_data = chunk.model_dump()
+                elif hasattr(chunk, '__dict__'):
+                    chunk_data = chunk.__dict__
+                else:
+                    chunk_data = str(chunk)
+                
+                yield (json.dumps(chunk_data, default=str) + '\n').encode('utf-8')
+            except Exception as e:
+                # Fallback to string representation
+                print(f"[DEBUG] JSON serialization failed: {e}")
+                yield (json.dumps({"error": f"Serialization failed: {e}", "raw": str(chunk)}) + '\n').encode('utf-8')
+        
+        print(f"[DEBUG] Stream complete. Total chunks: {chunk_count}")
+        print(f"{log_prefix} ==> {full_response}")
+        
+    except Exception as e:
+        print(f"[ERROR] Exception in streaming: {e}")
+        import traceback
+        traceback.print_exc()
+        raise
+
+# Generic async response function for non-streaming
+async def async_response(client, prompt: str, model: str, previous_response_id: str = None, log_prefix: str = "response"):
+    print(f"user ==> {prompt}")
+    
+    # Build request parameters
+    request_params = {
+        "model": model,
+        "input": prompt,
+        "stream": False
+    }
+    if previous_response_id is not None:
+        request_params["previous_response_id"] = previous_response_id
+    
+    response = await client.responses.create(**request_params)
+    
+    response_text = response.output_text
+    print(f"{log_prefix} ==> {response_text}")
+    return response_text
+
+# Unified streaming function for both chat and langflow
+async def async_stream(client, prompt: str, model: str, previous_response_id: str = None, log_prefix: str = "response"):
+    async for chunk in async_response_stream(client, prompt, model, previous_response_id=previous_response_id, log_prefix=log_prefix):
+        yield chunk
+
+# Async langflow function (non-streaming only)
+async def async_langflow(langflow_client, flow_id: str, prompt: str):
+    return await async_response(langflow_client, prompt, flow_id, log_prefix="langflow")
+
+# Async langflow function for streaming (alias for compatibility)
+async def async_langflow_stream(langflow_client, flow_id: str, prompt: str):
+    print(f"[DEBUG] Starting langflow stream for prompt: {prompt}")
+    try:
+        async for chunk in async_stream(langflow_client, prompt, flow_id, log_prefix="langflow"):
+            print(f"[DEBUG] Yielding chunk from langflow_stream: {chunk[:100]}...")
+            yield chunk
+        print(f"[DEBUG] Langflow stream completed")
+    except Exception as e:
+        print(f"[ERROR] Exception in langflow_stream: {e}")
+        import traceback
+        traceback.print_exc()
+        raise
+
+# Async chat function (non-streaming only)
+async def async_chat(async_client, prompt: str, model: str = "gpt-4.1-mini", previous_response_id: str = None):
    global messages
    messages += [{"role": "user", "content": prompt}]
-    response = await async_client.responses.create(
-        model=model,
-        input=prompt,
-        previous_response_id=previous_response_id,
-    )
+    return await async_response(async_client, prompt, model, previous_response_id=previous_response_id, log_prefix="agent")

-    response_id = response.id
-    response_text = response.output_text
-    print(f"user ==> {prompt}")
-    print(f"agent ==> {response_text}")
-    return response_text
+# Async chat function for streaming (alias for compatibility)
+async def async_chat_stream(async_client, prompt: str, model: str = "gpt-4.1-mini", previous_response_id: str = None):
+    global messages
+    messages += [{"role": "user", "content": prompt}]
+    async for chunk in async_stream(async_client, prompt, model, previous_response_id=previous_response_id, log_prefix="agent"):
+        yield chunk
--- a/src/app.py
+++ b/src/app.py
@ -4,7 +4,7 @@ import os
 from collections import defaultdict
 from typing import Any

-from agent import async_chat
+from agent import async_chat, async_langflow

 os.environ['USE_CPU_ONLY'] = 'true'

@ -14,7 +14,7 @@ import asyncio

 from starlette.applications import Starlette
 from starlette.requests     import Request
-from starlette.responses    import JSONResponse
+from starlette.responses    import JSONResponse, StreamingResponse
 from starlette.routing      import Route

 import aiofiles
@ -305,16 +305,31 @@ async def search_tool(query: str)-> dict[str, Any]:
 async def chat_endpoint(request):
    data = await request.json()
    prompt = data.get("prompt", "")
+    stream = data.get("stream", False)

    if not prompt:
        return JSONResponse({"error": "Prompt is required"}, status_code=400)

-    response = await async_chat(patched_async_client, prompt)
-    return JSONResponse({"response": response})
+    if stream:
+        from agent import async_chat_stream
+        return StreamingResponse(
+            async_chat_stream(patched_async_client, prompt),
+            media_type="text/event-stream",
+            headers={
+                "Cache-Control": "no-cache",
+                "Connection": "keep-alive",
+                "Access-Control-Allow-Origin": "*",
+                "Access-Control-Allow-Headers": "Cache-Control"
+            }
+        )
+    else:
+        response = await async_chat(patched_async_client, prompt)
+        return JSONResponse({"response": response})

 async def langflow_endpoint(request):
    data = await request.json()
    prompt = data.get("prompt", "")
+    stream = data.get("stream", False)
    
    if not prompt:
        return JSONResponse({"error": "Prompt is required"}, status_code=400)
@ -323,14 +338,21 @@ async def langflow_endpoint(request):
        return JSONResponse({"error": "LANGFLOW_URL, FLOW_ID, and LANGFLOW_KEY environment variables are required"}, status_code=500)

    try:
-        response = await langflow_client.responses.create(
-            model=flow_id,
-            input=prompt
-        )
-        
-        response_text = response.output_text
-        
-        return JSONResponse({"response": response_text})
+        if stream:
+            from agent import async_langflow_stream
+            return StreamingResponse(
+                async_langflow_stream(langflow_client, flow_id, prompt),
+                media_type="text/event-stream",
+                headers={
+                    "Cache-Control": "no-cache",
+                    "Connection": "keep-alive",
+                    "Access-Control-Allow-Origin": "*",
+                    "Access-Control-Allow-Headers": "Cache-Control"
+                }
+            )
+        else:
+            response = await async_langflow(langflow_client, flow_id, prompt)
+            return JSONResponse({"response": response})
        
    except Exception as e:
        return JSONResponse({"error": f"Langflow request failed: {str(e)}"}, status_code=500)
--- a/uv.lock
+++ b/uv.lock
@ -9,7 +9,7 @@ resolution-markers = [

 [[package]]
 name = "agentd"
-version = "0.2.0.post3"
+version = "0.2.1"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
    { name = "litellm" },
@ -18,9 +18,9 @@ dependencies = [
    { name = "openai-agents" },
    { name = "pyyaml" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/6e/89/2bc397c80764d6acfeb6de7ac6d3ce8914f6f37f57307d1e2b6f7e8b0923/agentd-0.2.0.post3.tar.gz", hash = "sha256:765cb51798791eed32687b44305b20dd4130990471f0f1914afa2b292d09cb5e", size = 114530, upload-time = "2025-07-16T06:13:11.423Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/ea/ef/8c96f9699ec99b2e60b4f2fbbc8ae3693ff583eda2835b3244d3aaaf68b3/agentd-0.2.1.tar.gz", hash = "sha256:96c4a5efc1a4ee3ee0a1ce68ade05e8b51e6a0171b0d64fadac369744a847240", size = 118826, upload-time = "2025-07-17T19:47:58.91Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/8c/f8/ced474722557f11e0a2f7e691371cf25c6a823507cc297971baa71bfbaac/agentd-0.2.0.post3-py3-none-any.whl", hash = "sha256:d05c6123a33d9f0b466fba4f7b378618c352ca367c65cd2c5a54f867af7b3cff", size = 13299, upload-time = "2025-07-16T06:13:10.034Z" },
+    { url = "https://files.pythonhosted.org/packages/c9/9b/1e695c083227550eb85a9544a6a7c28e11d1dfb893e9d22035a0ad1b7fa3/agentd-0.2.1-py3-none-any.whl", hash = "sha256:b35da9c3557b5784c301c327c2831c6dfcfc74bf84fa8428585f2dffecdb2904", size = 15833, upload-time = "2025-07-17T19:47:57.749Z" },
 ]

 [[package]]
@ -434,7 +434,7 @@ dependencies = [

 [package.metadata]
 requires-dist = [
-    { name = "agentd", specifier = ">=0.2.0.post3" },
+    { name = "agentd", specifier = ">=0.2.1" },
    { name = "aiofiles", specifier = ">=24.1.0" },
    { name = "docling", specifier = ">=2.41.0" },
    { name = "opensearch-py", extras = ["async"], specifier = ">=3.0.0" },