From 9528e2f1857697c69e0fbd738a58235b88e94480 Mon Sep 17 00:00:00 2001 From: cristhianzl Date: Wed, 3 Sep 2025 10:34:45 -0300 Subject: [PATCH 01/32] add chat history --- .env.example | 10 +- frontend/components/navigation.tsx | 10 +- frontend/src/app/chat/page.tsx | 2246 +++++++++++++--------- src/services/auth_service.py | 35 +- src/services/chat_service.py | 123 +- src/services/langflow_history_service.py | 310 +++ src/services/user_binding_service.py | 256 +++ 7 files changed, 2022 insertions(+), 968 deletions(-) create mode 100644 src/services/langflow_history_service.py create mode 100644 src/services/user_binding_service.py diff --git a/.env.example b/.env.example index a1fd6326..fc91b8a3 100644 --- a/.env.example +++ b/.env.example @@ -1,12 +1,12 @@ # make one like so https://docs.langflow.org/api-keys-and-authentication#langflow-secret-key LANGFLOW_SECRET_KEY= -# flow id from the the openrag flow json +# flow id from the the openrag flow json (add the /flows/openrag_agent.json to your canva and get the flowid from the url) FLOW_ID=1098eea1-6649-4e1d-aed1-b77249fb8dd0 # must match the hashed password in secureconfig, must change for secure deployment!!! OPENSEARCH_PASSWORD=OSisgendb1! # make here https://console.cloud.google.com/apis/credentials -GOOGLE_OAUTH_CLIENT_ID= -GOOGLE_OAUTH_CLIENT_SECRET= +GOOGLE_OAUTH_CLIENT_ID=287178119926-8t3co7hgnc5onv55k7hjv46qdcvbddfm.apps.googleusercontent.com +GOOGLE_OAUTH_CLIENT_SECRET=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 # Azure app registration credentials for SharePoint/OneDrive MICROSOFT_GRAPH_OAUTH_CLIENT_ID= MICROSOFT_GRAPH_OAUTH_CLIENT_SECRET= @@ -20,3 +20,7 @@ AWS_SECRET_ACCESS_KEY= # OPTIONAL url for openrag link to langflow in the UI LANGFLOW_PUBLIC_URL= + +# Change the AUTO_LOGIN=False in .env +LANGFLOW_SUPERUSER=langflow +LANGFLOW_SUPERUSER_PASSWORD=langflow \ No newline at end of file diff --git a/frontend/components/navigation.tsx b/frontend/components/navigation.tsx index 6581ab68..7419a25a 100644 --- a/frontend/components/navigation.tsx +++ b/frontend/components/navigation.tsx @@ -85,6 +85,14 @@ export function Navigation() { if (!response.ok) { const errorText = await response.text() console.error("Upload failed:", errorText) + + // Trigger error event for chat page to handle + window.dispatchEvent(new CustomEvent('fileUploadError', { + detail: { filename: file.name, error: 'Failed to process document' } + })) + + // Trigger loading end event + window.dispatchEvent(new CustomEvent('fileUploadComplete')) return } @@ -111,7 +119,7 @@ export function Navigation() { // Trigger error event for chat page to handle window.dispatchEvent(new CustomEvent('fileUploadError', { - detail: { filename: file.name, error: error instanceof Error ? error.message : 'Unknown error' } + detail: { filename: file.name, error: 'Failed to process document' } })) } } diff --git a/frontend/src/app/chat/page.tsx b/frontend/src/app/chat/page.tsx index dca8084e..100228ea 100644 --- a/frontend/src/app/chat/page.tsx +++ b/frontend/src/app/chat/page.tsx @@ -1,285 +1,319 @@ -"use client" - -import { useState, useRef, useEffect } from "react" -import { Button } from "@/components/ui/button" -import { Loader2, User, Bot, Zap, Settings, ChevronDown, ChevronRight, Upload, AtSign, Plus, X, GitBranch } from "lucide-react" -import { ProtectedRoute } from "@/components/protected-route" -import { useTask } from "@/contexts/task-context" -import { useKnowledgeFilter } from "@/contexts/knowledge-filter-context" -import { useAuth } from "@/contexts/auth-context" -import { useChat, EndpointType } from "@/contexts/chat-context" -import { Avatar, AvatarFallback, AvatarImage } from "@/components/ui/avatar" +"use client"; +import { ProtectedRoute } from "@/components/protected-route"; +import { Avatar, AvatarFallback, AvatarImage } from "@/components/ui/avatar"; +import { Button } from "@/components/ui/button"; +import { useAuth } from "@/contexts/auth-context"; +import { EndpointType, useChat } from "@/contexts/chat-context"; +import { useKnowledgeFilter } from "@/contexts/knowledge-filter-context"; +import { useTask } from "@/contexts/task-context"; +import { + AtSign, + Bot, + ChevronDown, + ChevronRight, + GitBranch, + Loader2, + Plus, + Settings, + Upload, + User, + X, + Zap, +} from "lucide-react"; +import { useEffect, useRef, useState } from "react"; interface Message { - role: "user" | "assistant" - content: string - timestamp: Date - functionCalls?: FunctionCall[] - isStreaming?: boolean + role: "user" | "assistant"; + content: string; + timestamp: Date; + functionCalls?: FunctionCall[]; + isStreaming?: boolean; } interface FunctionCall { - name: string - arguments?: Record - result?: Record | ToolCallResult[] - status: "pending" | "completed" | "error" - argumentsString?: string - id?: string - type?: string + name: string; + arguments?: Record; + result?: Record | ToolCallResult[]; + status: "pending" | "completed" | "error"; + argumentsString?: string; + id?: string; + type?: string; } interface ToolCallResult { - text_key?: string + text_key?: string; data?: { - file_path?: string - text?: string - [key: string]: unknown - } - default_value?: string - [key: string]: unknown + file_path?: string; + text?: string; + [key: string]: unknown; + }; + default_value?: string; + [key: string]: unknown; } - - interface SelectedFilters { - data_sources: string[] - document_types: string[] - owners: string[] + data_sources: string[]; + document_types: string[]; + owners: string[]; } interface KnowledgeFilterData { - id: string - name: string - description: string - query_data: string - owner: string - created_at: string - updated_at: string + id: string; + name: string; + description: string; + query_data: string; + owner: string; + created_at: string; + updated_at: string; } interface RequestBody { - prompt: string - stream?: boolean - previous_response_id?: string - filters?: SelectedFilters - limit?: number - scoreThreshold?: number + prompt: string; + stream?: boolean; + previous_response_id?: string; + filters?: SelectedFilters; + limit?: number; + scoreThreshold?: number; } function ChatPage() { - const isDebugMode = process.env.NODE_ENV === 'development' || process.env.NEXT_PUBLIC_OPENRAG_DEBUG === 'true' - const { user } = useAuth() - const { endpoint, setEndpoint, currentConversationId, conversationData, setCurrentConversationId, addConversationDoc, forkFromResponse, refreshConversations, previousResponseIds, setPreviousResponseIds } = useChat() + const isDebugMode = + process.env.NODE_ENV === "development" || + process.env.NEXT_PUBLIC_OPENRAG_DEBUG === "true"; + const { user } = useAuth(); + const { + endpoint, + setEndpoint, + currentConversationId, + conversationData, + setCurrentConversationId, + addConversationDoc, + forkFromResponse, + refreshConversations, + previousResponseIds, + setPreviousResponseIds, + } = useChat(); const [messages, setMessages] = useState([ { role: "assistant", content: "How can I assist?", - timestamp: new Date() - } - ]) - const [input, setInput] = useState("") - const [loading, setLoading] = useState(false) - const [asyncMode, setAsyncMode] = useState(true) + timestamp: new Date(), + }, + ]); + const [input, setInput] = useState(""); + const [loading, setLoading] = useState(false); + const [asyncMode, setAsyncMode] = useState(true); const [streamingMessage, setStreamingMessage] = useState<{ - content: string - functionCalls: FunctionCall[] - timestamp: Date - } | null>(null) - const [expandedFunctionCalls, setExpandedFunctionCalls] = useState>(new Set()) + content: string; + functionCalls: FunctionCall[]; + timestamp: Date; + } | null>(null); + const [expandedFunctionCalls, setExpandedFunctionCalls] = useState< + Set + >(new Set()); // previousResponseIds now comes from useChat context - const [isUploading, setIsUploading] = useState(false) - const [isDragOver, setIsDragOver] = useState(false) - const [isFilterDropdownOpen, setIsFilterDropdownOpen] = useState(false) - const [availableFilters, setAvailableFilters] = useState([]) - const [filterSearchTerm, setFilterSearchTerm] = useState("") - const [selectedFilterIndex, setSelectedFilterIndex] = useState(0) - const [isFilterHighlighted, setIsFilterHighlighted] = useState(false) - const [dropdownDismissed, setDropdownDismissed] = useState(false) - const [isUserInteracting, setIsUserInteracting] = useState(false) - const [isForkingInProgress, setIsForkingInProgress] = useState(false) - const [lastForkTimestamp, setLastForkTimestamp] = useState(0) - const dragCounterRef = useRef(0) - const messagesEndRef = useRef(null) - const inputRef = useRef(null) - const fileInputRef = useRef(null) - const dropdownRef = useRef(null) - const streamAbortRef = useRef(null) - const streamIdRef = useRef(0) - const { addTask, isMenuOpen } = useTask() - const { selectedFilter, parsedFilterData, isPanelOpen, setSelectedFilter } = useKnowledgeFilter() - - + const [isUploading, setIsUploading] = useState(false); + const [isDragOver, setIsDragOver] = useState(false); + const [isFilterDropdownOpen, setIsFilterDropdownOpen] = useState(false); + const [availableFilters, setAvailableFilters] = useState< + KnowledgeFilterData[] + >([]); + const [filterSearchTerm, setFilterSearchTerm] = useState(""); + const [selectedFilterIndex, setSelectedFilterIndex] = useState(0); + const [isFilterHighlighted, setIsFilterHighlighted] = useState(false); + const [dropdownDismissed, setDropdownDismissed] = useState(false); + const [isUserInteracting, setIsUserInteracting] = useState(false); + const [isForkingInProgress, setIsForkingInProgress] = useState(false); + const [lastForkTimestamp, setLastForkTimestamp] = useState(0); + const dragCounterRef = useRef(0); + const messagesEndRef = useRef(null); + const inputRef = useRef(null); + const fileInputRef = useRef(null); + const dropdownRef = useRef(null); + const streamAbortRef = useRef(null); + const streamIdRef = useRef(0); + const { addTask, isMenuOpen } = useTask(); + const { selectedFilter, parsedFilterData, isPanelOpen, setSelectedFilter } = + useKnowledgeFilter(); const scrollToBottom = () => { - messagesEndRef.current?.scrollIntoView({ behavior: "smooth" }) - } + messagesEndRef.current?.scrollIntoView({ behavior: "smooth" }); + }; const handleEndpointChange = (newEndpoint: EndpointType) => { - setEndpoint(newEndpoint) + setEndpoint(newEndpoint); // Clear the conversation when switching endpoints to avoid response ID conflicts - setMessages([]) - setPreviousResponseIds({ chat: null, langflow: null }) - } + setMessages([]); + setPreviousResponseIds({ chat: null, langflow: null }); + }; const handleFileUpload = async (file: File) => { - console.log("handleFileUpload called with file:", file.name) - - if (isUploading) return - - setIsUploading(true) - setLoading(true) - + console.log("handleFileUpload called with file:", file.name); + + if (isUploading) return; + + setIsUploading(true); + setLoading(true); + // Add initial upload message const uploadStartMessage: Message = { - role: "assistant", + role: "assistant", content: `🔄 Starting upload of **${file.name}**...`, - timestamp: new Date() - } - setMessages(prev => [...prev, uploadStartMessage]) - + timestamp: new Date(), + }; + setMessages((prev) => [...prev, uploadStartMessage]); + try { - const formData = new FormData() - formData.append('file', file) - formData.append('endpoint', endpoint) - + const formData = new FormData(); + formData.append("file", file); + formData.append("endpoint", endpoint); + // Add previous_response_id if we have one for this endpoint - const currentResponseId = previousResponseIds[endpoint] + const currentResponseId = previousResponseIds[endpoint]; if (currentResponseId) { - formData.append('previous_response_id', currentResponseId) + formData.append("previous_response_id", currentResponseId); } - - const response = await fetch('/api/upload_context', { - method: 'POST', + + const response = await fetch("/api/upload_context", { + method: "POST", body: formData, - }) - - console.log("Upload response status:", response.status) - + }); + + console.log("Upload response status:", response.status); + if (!response.ok) { - const errorText = await response.text() - console.error("Upload failed with status:", response.status, "Response:", errorText) - throw new Error(`Upload failed: ${response.status} - ${errorText}`) + const errorText = await response.text(); + console.error( + "Upload failed with status:", + response.status, + "Response:", + errorText + ); + throw new Error("Failed to process document"); } - - const result = await response.json() - console.log("Upload result:", result) - + + const result = await response.json(); + console.log("Upload result:", result); + if (response.status === 201) { // New flow: Got task ID, start tracking with centralized system - const taskId = result.task_id || result.id - + const taskId = result.task_id || result.id; + if (!taskId) { - console.error("No task ID in 201 response:", result) - throw new Error("No task ID received from server") + console.error("No task ID in 201 response:", result); + throw new Error("No task ID received from server"); } - + // Add task to centralized tracking - addTask(taskId) - + addTask(taskId); + // Update message to show task is being tracked const pollingMessage: Message = { role: "assistant", content: `⏳ Upload initiated for **${file.name}**. Processing in background... (Task ID: ${taskId})`, - timestamp: new Date() - } - setMessages(prev => [...prev.slice(0, -1), pollingMessage]) - + timestamp: new Date(), + }; + setMessages((prev) => [...prev.slice(0, -1), pollingMessage]); } else if (response.ok) { - // Original flow: Direct response - + // Original flow: Direct response + const uploadMessage: Message = { role: "assistant", - content: `📄 Document uploaded: **${result.filename}** (${result.pages} pages, ${result.content_length.toLocaleString()} characters)\n\n${result.confirmation}`, - timestamp: new Date() - } - - setMessages(prev => [...prev.slice(0, -1), uploadMessage]) - + content: `📄 Document uploaded: **${result.filename}** (${ + result.pages + } pages, ${result.content_length.toLocaleString()} characters)\n\n${ + result.confirmation + }`, + timestamp: new Date(), + }; + + setMessages((prev) => [...prev.slice(0, -1), uploadMessage]); + // Add file to conversation docs if (result.filename) { - addConversationDoc(result.filename) + addConversationDoc(result.filename); } - + // Update the response ID for this endpoint if (result.response_id) { - setPreviousResponseIds(prev => ({ + setPreviousResponseIds((prev) => ({ ...prev, - [endpoint]: result.response_id - })) + [endpoint]: result.response_id, + })); } // Sidebar should show this conversation after upload creates it - try { refreshConversations() } catch {} - + try { + refreshConversations(); + } catch {} } else { - throw new Error(`Upload failed: ${response.status}`) + throw new Error(`Upload failed: ${response.status}`); } - } catch (error) { - console.error('Upload failed:', error) + console.error("Upload failed:", error); const errorMessage: Message = { role: "assistant", - content: `❌ Upload failed: ${error instanceof Error ? error.message : 'Unknown error'}`, - timestamp: new Date() - } - setMessages(prev => [...prev.slice(0, -1), errorMessage]) + content: `❌ Failed to process document. Please try again.`, + timestamp: new Date(), + }; + setMessages((prev) => [...prev.slice(0, -1), errorMessage]); } finally { - setIsUploading(false) - setLoading(false) + setIsUploading(false); + setLoading(false); } - } + }; // Remove the old pollTaskStatus function since we're using centralized system const handleDragEnter = (e: React.DragEvent) => { - e.preventDefault() - e.stopPropagation() - dragCounterRef.current++ + e.preventDefault(); + e.stopPropagation(); + dragCounterRef.current++; if (dragCounterRef.current === 1) { - setIsDragOver(true) + setIsDragOver(true); } - } - + }; + const handleDragOver = (e: React.DragEvent) => { - e.preventDefault() - e.stopPropagation() - } - + e.preventDefault(); + e.stopPropagation(); + }; + const handleDragLeave = (e: React.DragEvent) => { - e.preventDefault() - e.stopPropagation() - dragCounterRef.current-- + e.preventDefault(); + e.stopPropagation(); + dragCounterRef.current--; if (dragCounterRef.current === 0) { - setIsDragOver(false) + setIsDragOver(false); } - } - + }; + const handleDrop = (e: React.DragEvent) => { - e.preventDefault() - e.stopPropagation() - dragCounterRef.current = 0 - setIsDragOver(false) - - const files = Array.from(e.dataTransfer.files) + e.preventDefault(); + e.stopPropagation(); + dragCounterRef.current = 0; + setIsDragOver(false); + + const files = Array.from(e.dataTransfer.files); if (files.length > 0) { - handleFileUpload(files[0]) // Upload first file only + handleFileUpload(files[0]); // Upload first file only } - } + }; const handleFilePickerClick = () => { - fileInputRef.current?.click() - } + fileInputRef.current?.click(); + }; const handleFilePickerChange = (e: React.ChangeEvent) => { - const files = e.target.files + const files = e.target.files; if (files && files.length > 0) { - handleFileUpload(files[0]) + handleFileUpload(files[0]); } // Reset the input so the same file can be selected again if (fileInputRef.current) { - fileInputRef.current.value = '' + fileInputRef.current.value = ""; } - } + }; const loadAvailableFilters = async () => { try { @@ -290,74 +324,74 @@ function ChatPage() { }, body: JSON.stringify({ query: "", - limit: 20 + limit: 20, }), - }) + }); - const result = await response.json() + const result = await response.json(); if (response.ok && result.success) { - setAvailableFilters(result.filters) + setAvailableFilters(result.filters); } else { - console.error("Failed to load knowledge filters:", result.error) - setAvailableFilters([]) + console.error("Failed to load knowledge filters:", result.error); + setAvailableFilters([]); } } catch (error) { - console.error('Failed to load knowledge filters:', error) - setAvailableFilters([]) + console.error("Failed to load knowledge filters:", error); + setAvailableFilters([]); } - } + }; const handleFilterDropdownToggle = () => { if (!isFilterDropdownOpen) { - loadAvailableFilters() + loadAvailableFilters(); } - setIsFilterDropdownOpen(!isFilterDropdownOpen) - } + setIsFilterDropdownOpen(!isFilterDropdownOpen); + }; const handleFilterSelect = (filter: KnowledgeFilterData | null) => { - setSelectedFilter(filter) - setIsFilterDropdownOpen(false) - setFilterSearchTerm("") - setIsFilterHighlighted(false) - + setSelectedFilter(filter); + setIsFilterDropdownOpen(false); + setFilterSearchTerm(""); + setIsFilterHighlighted(false); + // Remove the @searchTerm from the input and replace with filter pill - const words = input.split(' ') - const lastWord = words[words.length - 1] - - if (lastWord.startsWith('@')) { + const words = input.split(" "); + const lastWord = words[words.length - 1]; + + if (lastWord.startsWith("@")) { // Remove the @search term - words.pop() - setInput(words.join(' ') + (words.length > 0 ? ' ' : '')) + words.pop(); + setInput(words.join(" ") + (words.length > 0 ? " " : "")); } - } + }; useEffect(() => { // Only auto-scroll if not in the middle of user interaction if (!isUserInteracting) { const timer = setTimeout(() => { - scrollToBottom() - }, 50) // Small delay to avoid conflicts with click events - - return () => clearTimeout(timer) + scrollToBottom(); + }, 50); // Small delay to avoid conflicts with click events + + return () => clearTimeout(timer); } - }, [messages, streamingMessage, isUserInteracting]) + }, [messages, streamingMessage, isUserInteracting]); // Reset selected index when search term changes useEffect(() => { - setSelectedFilterIndex(0) - }, [filterSearchTerm]) + setSelectedFilterIndex(0); + }, [filterSearchTerm]); // Auto-focus the input on component mount useEffect(() => { - inputRef.current?.focus() - }, []) + inputRef.current?.focus(); + }, []); // Explicitly handle external new conversation trigger useEffect(() => { const handleNewConversation = () => { // Abort any in-flight streaming so it doesn't bleed into new chat if (streamAbortRef.current) { - streamAbortRef.current.abort() + streamAbortRef.current.abort(); } // Reset chat UI even if context state was already 'new' setMessages([ @@ -366,212 +400,273 @@ function ChatPage() { content: "How can I assist?", timestamp: new Date(), }, - ]) - setInput("") - setStreamingMessage(null) - setExpandedFunctionCalls(new Set()) - setIsFilterHighlighted(false) - setLoading(false) - } + ]); + setInput(""); + setStreamingMessage(null); + setExpandedFunctionCalls(new Set()); + setIsFilterHighlighted(false); + setLoading(false); + }; const handleFocusInput = () => { - inputRef.current?.focus() - } + inputRef.current?.focus(); + }; - window.addEventListener('newConversation', handleNewConversation) - window.addEventListener('focusInput', handleFocusInput) + window.addEventListener("newConversation", handleNewConversation); + window.addEventListener("focusInput", handleFocusInput); return () => { - window.removeEventListener('newConversation', handleNewConversation) - window.removeEventListener('focusInput', handleFocusInput) - } - }, []) + window.removeEventListener("newConversation", handleNewConversation); + window.removeEventListener("focusInput", handleFocusInput); + }; + }, []); // Load conversation when conversationData changes useEffect(() => { - const now = Date.now() - + const now = Date.now(); + // Don't reset messages if user is in the middle of an interaction (like forking) if (isUserInteracting || isForkingInProgress) { - console.log("Skipping conversation load due to user interaction or forking") - return + console.log( + "Skipping conversation load due to user interaction or forking" + ); + return; } - + // Don't reload if we just forked recently (within 1 second) if (now - lastForkTimestamp < 1000) { - console.log("Skipping conversation load - recent fork detected") - return + console.log("Skipping conversation load - recent fork detected"); + return; } - + if (conversationData && conversationData.messages) { - console.log("Loading conversation with", conversationData.messages.length, "messages") + console.log( + "Loading conversation with", + conversationData.messages.length, + "messages" + ); // Convert backend message format to frontend Message interface - const convertedMessages: Message[] = conversationData.messages.map((msg: { - role: string; - content: string; - timestamp?: string; - response_id?: string; - }) => ({ - role: msg.role as "user" | "assistant", - content: msg.content, - timestamp: new Date(msg.timestamp || new Date()), - // Add any other necessary properties - })) - - setMessages(convertedMessages) - + const convertedMessages: Message[] = conversationData.messages.map( + (msg: { + role: string; + content: string; + timestamp?: string; + response_id?: string; + }) => ({ + role: msg.role as "user" | "assistant", + content: msg.content, + timestamp: new Date(msg.timestamp || new Date()), + // Add any other necessary properties + }) + ); + + setMessages(convertedMessages); + // Set the previous response ID for this conversation - setPreviousResponseIds(prev => ({ + setPreviousResponseIds((prev) => ({ ...prev, - [conversationData.endpoint]: conversationData.response_id - })) + [conversationData.endpoint]: conversationData.response_id, + })); } // Reset messages when starting a new conversation (but not during forking) - else if (currentConversationId === null && !isUserInteracting && !isForkingInProgress && now - lastForkTimestamp > 1000) { - console.log("Resetting to default message for new conversation") + else if ( + currentConversationId === null && + !isUserInteracting && + !isForkingInProgress && + now - lastForkTimestamp > 1000 + ) { + console.log("Resetting to default message for new conversation"); setMessages([ { role: "assistant", content: "How can I assist?", - timestamp: new Date() - } - ]) + timestamp: new Date(), + }, + ]); } - }, [conversationData, currentConversationId, isUserInteracting, isForkingInProgress, lastForkTimestamp, setPreviousResponseIds]) + }, [ + conversationData, + currentConversationId, + isUserInteracting, + isForkingInProgress, + lastForkTimestamp, + setPreviousResponseIds, + ]); // Listen for file upload events from navigation useEffect(() => { const handleFileUploadStart = (event: CustomEvent) => { - const { filename } = event.detail - console.log("Chat page received file upload start event:", filename) - - setLoading(true) - setIsUploading(true) - + const { filename } = event.detail; + console.log("Chat page received file upload start event:", filename); + + setLoading(true); + setIsUploading(true); + // Add initial upload message const uploadStartMessage: Message = { - role: "assistant", + role: "assistant", content: `🔄 Starting upload of **${filename}**...`, - timestamp: new Date() - } - setMessages(prev => [...prev, uploadStartMessage]) - } + timestamp: new Date(), + }; + setMessages((prev) => [...prev, uploadStartMessage]); + }; const handleFileUploaded = (event: CustomEvent) => { - const { result } = event.detail - console.log("Chat page received file upload event:", result) - + const { result } = event.detail; + console.log("Chat page received file upload event:", result); + // Replace the last message with upload complete message const uploadMessage: Message = { role: "assistant", - content: `📄 Document uploaded: **${result.filename}** (${result.pages} pages, ${result.content_length.toLocaleString()} characters)\n\n${result.confirmation}`, - timestamp: new Date() - } - - setMessages(prev => [...prev.slice(0, -1), uploadMessage]) - + content: `📄 Document uploaded: **${result.filename}** (${ + result.pages + } pages, ${result.content_length.toLocaleString()} characters)\n\n${ + result.confirmation + }`, + timestamp: new Date(), + }; + + setMessages((prev) => [...prev.slice(0, -1), uploadMessage]); + // Update the response ID for this endpoint if (result.response_id) { - setPreviousResponseIds(prev => ({ + setPreviousResponseIds((prev) => ({ ...prev, - [endpoint]: result.response_id - })) + [endpoint]: result.response_id, + })); } - } + }; const handleFileUploadComplete = () => { - console.log("Chat page received file upload complete event") - setLoading(false) - setIsUploading(false) - } + console.log("Chat page received file upload complete event"); + setLoading(false); + setIsUploading(false); + }; const handleFileUploadError = (event: CustomEvent) => { - const { filename, error } = event.detail - console.log("Chat page received file upload error event:", filename, error) - + const { filename, error } = event.detail; + console.log( + "Chat page received file upload error event:", + filename, + error + ); + // Replace the last message with error message const errorMessage: Message = { role: "assistant", content: `❌ Upload failed for **${filename}**: ${error}`, - timestamp: new Date() - } - setMessages(prev => [...prev.slice(0, -1), errorMessage]) - } + timestamp: new Date(), + }; + setMessages((prev) => [...prev.slice(0, -1), errorMessage]); + }; + + window.addEventListener( + "fileUploadStart", + handleFileUploadStart as EventListener + ); + window.addEventListener( + "fileUploaded", + handleFileUploaded as EventListener + ); + window.addEventListener( + "fileUploadComplete", + handleFileUploadComplete as EventListener + ); + window.addEventListener( + "fileUploadError", + handleFileUploadError as EventListener + ); - window.addEventListener('fileUploadStart', handleFileUploadStart as EventListener) - window.addEventListener('fileUploaded', handleFileUploaded as EventListener) - window.addEventListener('fileUploadComplete', handleFileUploadComplete as EventListener) - window.addEventListener('fileUploadError', handleFileUploadError as EventListener) - return () => { - window.removeEventListener('fileUploadStart', handleFileUploadStart as EventListener) - window.removeEventListener('fileUploaded', handleFileUploaded as EventListener) - window.removeEventListener('fileUploadComplete', handleFileUploadComplete as EventListener) - window.removeEventListener('fileUploadError', handleFileUploadError as EventListener) - } - }, [endpoint, setPreviousResponseIds]) + window.removeEventListener( + "fileUploadStart", + handleFileUploadStart as EventListener + ); + window.removeEventListener( + "fileUploaded", + handleFileUploaded as EventListener + ); + window.removeEventListener( + "fileUploadComplete", + handleFileUploadComplete as EventListener + ); + window.removeEventListener( + "fileUploadError", + handleFileUploadError as EventListener + ); + }; + }, [endpoint, setPreviousResponseIds]); // Handle click outside to close dropdown useEffect(() => { const handleClickOutside = (event: MouseEvent) => { - if (isFilterDropdownOpen && - dropdownRef.current && - !dropdownRef.current.contains(event.target as Node) && - !inputRef.current?.contains(event.target as Node)) { - setIsFilterDropdownOpen(false) - setFilterSearchTerm("") - setSelectedFilterIndex(0) + if ( + isFilterDropdownOpen && + dropdownRef.current && + !dropdownRef.current.contains(event.target as Node) && + !inputRef.current?.contains(event.target as Node) + ) { + setIsFilterDropdownOpen(false); + setFilterSearchTerm(""); + setSelectedFilterIndex(0); } - } + }; - document.addEventListener('mousedown', handleClickOutside) + document.addEventListener("mousedown", handleClickOutside); return () => { - document.removeEventListener('mousedown', handleClickOutside) - } - }, [isFilterDropdownOpen]) - + document.removeEventListener("mousedown", handleClickOutside); + }; + }, [isFilterDropdownOpen]); const handleSSEStream = async (userMessage: Message) => { - const apiEndpoint = endpoint === "chat" ? "/api/chat" : "/api/langflow" - + const apiEndpoint = endpoint === "chat" ? "/api/chat" : "/api/langflow"; + try { // Abort any existing stream before starting a new one if (streamAbortRef.current) { - streamAbortRef.current.abort() + streamAbortRef.current.abort(); } - const controller = new AbortController() - streamAbortRef.current = controller - const thisStreamId = ++streamIdRef.current + const controller = new AbortController(); + streamAbortRef.current = controller; + const thisStreamId = ++streamIdRef.current; const requestBody: RequestBody = { prompt: userMessage.content, stream: true, - ...(parsedFilterData?.filters && (() => { - const filters = parsedFilterData.filters - const processed: SelectedFilters = { - data_sources: [], - document_types: [], - owners: [] - } - // Only copy non-wildcard arrays - processed.data_sources = filters.data_sources.includes("*") ? [] : filters.data_sources - processed.document_types = filters.document_types.includes("*") ? [] : filters.document_types - processed.owners = filters.owners.includes("*") ? [] : filters.owners - - // Only include filters if any array has values - const hasFilters = processed.data_sources.length > 0 || - processed.document_types.length > 0 || - processed.owners.length > 0 - return hasFilters ? { filters: processed } : {} - })()), + ...(parsedFilterData?.filters && + (() => { + const filters = parsedFilterData.filters; + const processed: SelectedFilters = { + data_sources: [], + document_types: [], + owners: [], + }; + // Only copy non-wildcard arrays + processed.data_sources = filters.data_sources.includes("*") + ? [] + : filters.data_sources; + processed.document_types = filters.document_types.includes("*") + ? [] + : filters.document_types; + processed.owners = filters.owners.includes("*") + ? [] + : filters.owners; + + // Only include filters if any array has values + const hasFilters = + processed.data_sources.length > 0 || + processed.document_types.length > 0 || + processed.owners.length > 0; + return hasFilters ? { filters: processed } : {}; + })()), limit: parsedFilterData?.limit ?? 10, - scoreThreshold: parsedFilterData?.scoreThreshold ?? 0 - } - + scoreThreshold: parsedFilterData?.scoreThreshold ?? 0, + }; + // Add previous_response_id if we have one for this endpoint - const currentResponseId = previousResponseIds[endpoint] + const currentResponseId = previousResponseIds[endpoint]; if (currentResponseId) { - requestBody.previous_response_id = currentResponseId + requestBody.previous_response_id = currentResponseId; } - + const response = await fetch(apiEndpoint, { method: "POST", headers: { @@ -579,138 +674,183 @@ function ChatPage() { }, body: JSON.stringify(requestBody), signal: controller.signal, - }) + }); if (!response.ok) { - throw new Error(`HTTP error! status: ${response.status}`) + throw new Error(`HTTP error! status: ${response.status}`); } - const reader = response.body?.getReader() + const reader = response.body?.getReader(); if (!reader) { - throw new Error("No reader available") + throw new Error("No reader available"); } - const decoder = new TextDecoder() - let buffer = "" - let currentContent = "" - const currentFunctionCalls: FunctionCall[] = [] - let newResponseId: string | null = null - + const decoder = new TextDecoder(); + let buffer = ""; + let currentContent = ""; + const currentFunctionCalls: FunctionCall[] = []; + let newResponseId: string | null = null; + // Initialize streaming message if (!controller.signal.aborted && thisStreamId === streamIdRef.current) { setStreamingMessage({ content: "", functionCalls: [], - timestamp: new Date() - }) + timestamp: new Date(), + }); } try { while (true) { - const { done, value } = await reader.read() - if (controller.signal.aborted || thisStreamId !== streamIdRef.current) break - if (done) break - buffer += decoder.decode(value, { stream: true }) - + const { done, value } = await reader.read(); + if (controller.signal.aborted || thisStreamId !== streamIdRef.current) + break; + if (done) break; + buffer += decoder.decode(value, { stream: true }); + // Process complete lines (JSON objects) - const lines = buffer.split('\n') - buffer = lines.pop() || "" // Keep incomplete line in buffer - + const lines = buffer.split("\n"); + buffer = lines.pop() || ""; // Keep incomplete line in buffer + for (const line of lines) { if (line.trim()) { try { - const chunk = JSON.parse(line) - console.log("Received chunk:", chunk.type || chunk.object, chunk) - + const chunk = JSON.parse(line); + console.log( + "Received chunk:", + chunk.type || chunk.object, + chunk + ); + // Extract response ID if present if (chunk.id) { - newResponseId = chunk.id + newResponseId = chunk.id; } else if (chunk.response_id) { - newResponseId = chunk.response_id + newResponseId = chunk.response_id; } - + // Handle OpenAI Chat Completions streaming format if (chunk.object === "response.chunk" && chunk.delta) { // Handle function calls in delta if (chunk.delta.function_call) { - console.log("Function call in delta:", chunk.delta.function_call) - + console.log( + "Function call in delta:", + chunk.delta.function_call + ); + // Check if this is a new function call if (chunk.delta.function_call.name) { - console.log("New function call:", chunk.delta.function_call.name) + console.log( + "New function call:", + chunk.delta.function_call.name + ); const functionCall: FunctionCall = { name: chunk.delta.function_call.name, arguments: undefined, status: "pending", - argumentsString: chunk.delta.function_call.arguments || "" - } - currentFunctionCalls.push(functionCall) - console.log("Added function call:", functionCall) + argumentsString: + chunk.delta.function_call.arguments || "", + }; + currentFunctionCalls.push(functionCall); + console.log("Added function call:", functionCall); } // Or if this is arguments continuation else if (chunk.delta.function_call.arguments) { - console.log("Function call arguments delta:", chunk.delta.function_call.arguments) - const lastFunctionCall = currentFunctionCalls[currentFunctionCalls.length - 1] + console.log( + "Function call arguments delta:", + chunk.delta.function_call.arguments + ); + const lastFunctionCall = + currentFunctionCalls[currentFunctionCalls.length - 1]; if (lastFunctionCall) { if (!lastFunctionCall.argumentsString) { - lastFunctionCall.argumentsString = "" + lastFunctionCall.argumentsString = ""; } - lastFunctionCall.argumentsString += chunk.delta.function_call.arguments - console.log("Accumulated arguments:", lastFunctionCall.argumentsString) - + lastFunctionCall.argumentsString += + chunk.delta.function_call.arguments; + console.log( + "Accumulated arguments:", + lastFunctionCall.argumentsString + ); + // Try to parse arguments if they look complete if (lastFunctionCall.argumentsString.includes("}")) { try { - const parsed = JSON.parse(lastFunctionCall.argumentsString) - lastFunctionCall.arguments = parsed - lastFunctionCall.status = "completed" - console.log("Parsed function arguments:", parsed) + const parsed = JSON.parse( + lastFunctionCall.argumentsString + ); + lastFunctionCall.arguments = parsed; + lastFunctionCall.status = "completed"; + console.log("Parsed function arguments:", parsed); } catch (e) { - console.log("Arguments not yet complete or invalid JSON:", e) + console.log( + "Arguments not yet complete or invalid JSON:", + e + ); } } } } } - - // Handle tool calls in delta - else if (chunk.delta.tool_calls && Array.isArray(chunk.delta.tool_calls)) { - console.log("Tool calls in delta:", chunk.delta.tool_calls) - + + // Handle tool calls in delta + else if ( + chunk.delta.tool_calls && + Array.isArray(chunk.delta.tool_calls) + ) { + console.log("Tool calls in delta:", chunk.delta.tool_calls); + for (const toolCall of chunk.delta.tool_calls) { if (toolCall.function) { // Check if this is a new tool call if (toolCall.function.name) { - console.log("New tool call:", toolCall.function.name) + console.log("New tool call:", toolCall.function.name); const functionCall: FunctionCall = { name: toolCall.function.name, arguments: undefined, status: "pending", - argumentsString: toolCall.function.arguments || "" - } - currentFunctionCalls.push(functionCall) - console.log("Added tool call:", functionCall) + argumentsString: toolCall.function.arguments || "", + }; + currentFunctionCalls.push(functionCall); + console.log("Added tool call:", functionCall); } // Or if this is arguments continuation else if (toolCall.function.arguments) { - console.log("Tool call arguments delta:", toolCall.function.arguments) - const lastFunctionCall = currentFunctionCalls[currentFunctionCalls.length - 1] + console.log( + "Tool call arguments delta:", + toolCall.function.arguments + ); + const lastFunctionCall = + currentFunctionCalls[ + currentFunctionCalls.length - 1 + ]; if (lastFunctionCall) { if (!lastFunctionCall.argumentsString) { - lastFunctionCall.argumentsString = "" + lastFunctionCall.argumentsString = ""; } - lastFunctionCall.argumentsString += toolCall.function.arguments - console.log("Accumulated tool arguments:", lastFunctionCall.argumentsString) - + lastFunctionCall.argumentsString += + toolCall.function.arguments; + console.log( + "Accumulated tool arguments:", + lastFunctionCall.argumentsString + ); + // Try to parse arguments if they look complete - if (lastFunctionCall.argumentsString.includes("}")) { + if ( + lastFunctionCall.argumentsString.includes("}") + ) { try { - const parsed = JSON.parse(lastFunctionCall.argumentsString) - lastFunctionCall.arguments = parsed - lastFunctionCall.status = "completed" - console.log("Parsed tool arguments:", parsed) + const parsed = JSON.parse( + lastFunctionCall.argumentsString + ); + lastFunctionCall.arguments = parsed; + lastFunctionCall.status = "completed"; + console.log("Parsed tool arguments:", parsed); } catch (e) { - console.log("Tool arguments not yet complete or invalid JSON:", e) + console.log( + "Tool arguments not yet complete or invalid JSON:", + e + ); } } } @@ -718,256 +858,403 @@ function ChatPage() { } } } - + // Handle content/text in delta else if (chunk.delta.content) { - console.log("Content delta:", chunk.delta.content) - currentContent += chunk.delta.content + console.log("Content delta:", chunk.delta.content); + currentContent += chunk.delta.content; } - + // Handle finish reason if (chunk.delta.finish_reason) { - console.log("Finish reason:", chunk.delta.finish_reason) + console.log("Finish reason:", chunk.delta.finish_reason); // Mark any pending function calls as completed - currentFunctionCalls.forEach(fc => { + currentFunctionCalls.forEach((fc) => { if (fc.status === "pending" && fc.argumentsString) { try { - fc.arguments = JSON.parse(fc.argumentsString) - fc.status = "completed" - console.log("Completed function call on finish:", fc) + fc.arguments = JSON.parse(fc.argumentsString); + fc.status = "completed"; + console.log("Completed function call on finish:", fc); } catch (e) { - fc.arguments = { raw: fc.argumentsString } - fc.status = "error" - console.log("Error parsing function call on finish:", fc, e) + fc.arguments = { raw: fc.argumentsString }; + fc.status = "error"; + console.log( + "Error parsing function call on finish:", + fc, + e + ); } } - }) + }); } } - + // Handle Realtime API format (this is what you're actually getting!) - else if (chunk.type === "response.output_item.added" && chunk.item?.type === "function_call") { - console.log("🟢 CREATING function call (added):", chunk.item.id, chunk.item.tool_name || chunk.item.name) - + else if ( + chunk.type === "response.output_item.added" && + chunk.item?.type === "function_call" + ) { + console.log( + "🟢 CREATING function call (added):", + chunk.item.id, + chunk.item.tool_name || chunk.item.name + ); + // Try to find an existing pending call to update (created by earlier deltas) - let existing = currentFunctionCalls.find(fc => fc.id === chunk.item.id) + let existing = currentFunctionCalls.find( + (fc) => fc.id === chunk.item.id + ); if (!existing) { - existing = [...currentFunctionCalls].reverse().find(fc => - fc.status === "pending" && - !fc.id && - (fc.name === (chunk.item.tool_name || chunk.item.name)) - ) + existing = [...currentFunctionCalls] + .reverse() + .find( + (fc) => + fc.status === "pending" && + !fc.id && + fc.name === (chunk.item.tool_name || chunk.item.name) + ); } - + if (existing) { - existing.id = chunk.item.id - existing.type = chunk.item.type - existing.name = chunk.item.tool_name || chunk.item.name || existing.name - existing.arguments = chunk.item.inputs || existing.arguments - console.log("🟢 UPDATED existing pending function call with id:", existing.id) + existing.id = chunk.item.id; + existing.type = chunk.item.type; + existing.name = + chunk.item.tool_name || chunk.item.name || existing.name; + existing.arguments = + chunk.item.inputs || existing.arguments; + console.log( + "🟢 UPDATED existing pending function call with id:", + existing.id + ); } else { const functionCall: FunctionCall = { - name: chunk.item.tool_name || chunk.item.name || "unknown", + name: + chunk.item.tool_name || chunk.item.name || "unknown", arguments: chunk.item.inputs || undefined, status: "pending", argumentsString: "", id: chunk.item.id, - type: chunk.item.type - } - currentFunctionCalls.push(functionCall) - console.log("🟢 Function calls now:", currentFunctionCalls.map(fc => ({ id: fc.id, name: fc.name }))) + type: chunk.item.type, + }; + currentFunctionCalls.push(functionCall); + console.log( + "🟢 Function calls now:", + currentFunctionCalls.map((fc) => ({ + id: fc.id, + name: fc.name, + })) + ); } } - + // Handle function call arguments streaming (Realtime API) - else if (chunk.type === "response.function_call_arguments.delta") { - console.log("Function args delta (Realtime API):", chunk.delta) - const lastFunctionCall = currentFunctionCalls[currentFunctionCalls.length - 1] + else if ( + chunk.type === "response.function_call_arguments.delta" + ) { + console.log( + "Function args delta (Realtime API):", + chunk.delta + ); + const lastFunctionCall = + currentFunctionCalls[currentFunctionCalls.length - 1]; if (lastFunctionCall) { if (!lastFunctionCall.argumentsString) { - lastFunctionCall.argumentsString = "" + lastFunctionCall.argumentsString = ""; } - lastFunctionCall.argumentsString += chunk.delta || "" - console.log("Accumulated arguments (Realtime API):", lastFunctionCall.argumentsString) + lastFunctionCall.argumentsString += chunk.delta || ""; + console.log( + "Accumulated arguments (Realtime API):", + lastFunctionCall.argumentsString + ); } } - + // Handle function call arguments completion (Realtime API) - else if (chunk.type === "response.function_call_arguments.done") { - console.log("Function args done (Realtime API):", chunk.arguments) - const lastFunctionCall = currentFunctionCalls[currentFunctionCalls.length - 1] + else if ( + chunk.type === "response.function_call_arguments.done" + ) { + console.log( + "Function args done (Realtime API):", + chunk.arguments + ); + const lastFunctionCall = + currentFunctionCalls[currentFunctionCalls.length - 1]; if (lastFunctionCall) { try { - lastFunctionCall.arguments = JSON.parse(chunk.arguments || "{}") - lastFunctionCall.status = "completed" - console.log("Parsed function arguments (Realtime API):", lastFunctionCall.arguments) + lastFunctionCall.arguments = JSON.parse( + chunk.arguments || "{}" + ); + lastFunctionCall.status = "completed"; + console.log( + "Parsed function arguments (Realtime API):", + lastFunctionCall.arguments + ); } catch (e) { - lastFunctionCall.arguments = { raw: chunk.arguments } - lastFunctionCall.status = "error" - console.log("Error parsing function arguments (Realtime API):", e) + lastFunctionCall.arguments = { raw: chunk.arguments }; + lastFunctionCall.status = "error"; + console.log( + "Error parsing function arguments (Realtime API):", + e + ); } } } - + // Handle function call completion (Realtime API) - else if (chunk.type === "response.output_item.done" && chunk.item?.type === "function_call") { - console.log("🔵 UPDATING function call (done):", chunk.item.id, chunk.item.tool_name || chunk.item.name) - console.log("🔵 Looking for existing function calls:", currentFunctionCalls.map(fc => ({ id: fc.id, name: fc.name }))) - + else if ( + chunk.type === "response.output_item.done" && + chunk.item?.type === "function_call" + ) { + console.log( + "🔵 UPDATING function call (done):", + chunk.item.id, + chunk.item.tool_name || chunk.item.name + ); + console.log( + "🔵 Looking for existing function calls:", + currentFunctionCalls.map((fc) => ({ + id: fc.id, + name: fc.name, + })) + ); + // Find existing function call by ID or name - const functionCall = currentFunctionCalls.find(fc => - fc.id === chunk.item.id || - fc.name === chunk.item.tool_name || - fc.name === chunk.item.name - ) - + const functionCall = currentFunctionCalls.find( + (fc) => + fc.id === chunk.item.id || + fc.name === chunk.item.tool_name || + fc.name === chunk.item.name + ); + if (functionCall) { - console.log("🔵 FOUND existing function call, updating:", functionCall.id, functionCall.name) + console.log( + "🔵 FOUND existing function call, updating:", + functionCall.id, + functionCall.name + ); // Update existing function call with completion data - functionCall.status = chunk.item.status === "completed" ? "completed" : "error" - functionCall.id = chunk.item.id - functionCall.type = chunk.item.type - functionCall.name = chunk.item.tool_name || chunk.item.name || functionCall.name - functionCall.arguments = chunk.item.inputs || functionCall.arguments - + functionCall.status = + chunk.item.status === "completed" ? "completed" : "error"; + functionCall.id = chunk.item.id; + functionCall.type = chunk.item.type; + functionCall.name = + chunk.item.tool_name || + chunk.item.name || + functionCall.name; + functionCall.arguments = + chunk.item.inputs || functionCall.arguments; + // Set results if present if (chunk.item.results) { - functionCall.result = chunk.item.results + functionCall.result = chunk.item.results; } } else { - console.log("🔴 WARNING: Could not find existing function call to update:", chunk.item.id, chunk.item.tool_name, chunk.item.name) + console.log( + "🔴 WARNING: Could not find existing function call to update:", + chunk.item.id, + chunk.item.tool_name, + chunk.item.name + ); } } - + // Handle tool call completion with results - else if (chunk.type === "response.output_item.done" && chunk.item?.type?.includes("_call") && chunk.item?.type !== "function_call") { - console.log("Tool call done with results:", chunk.item) - + else if ( + chunk.type === "response.output_item.done" && + chunk.item?.type?.includes("_call") && + chunk.item?.type !== "function_call" + ) { + console.log("Tool call done with results:", chunk.item); + // Find existing function call by ID, or by name/type if ID not available - const functionCall = currentFunctionCalls.find(fc => - fc.id === chunk.item.id || - (fc.name === chunk.item.tool_name) || - (fc.name === chunk.item.name) || - (fc.name === chunk.item.type) || - (fc.name.includes(chunk.item.type.replace('_call', '')) || chunk.item.type.includes(fc.name)) - ) - + const functionCall = currentFunctionCalls.find( + (fc) => + fc.id === chunk.item.id || + fc.name === chunk.item.tool_name || + fc.name === chunk.item.name || + fc.name === chunk.item.type || + fc.name.includes(chunk.item.type.replace("_call", "")) || + chunk.item.type.includes(fc.name) + ); + if (functionCall) { // Update existing function call - functionCall.arguments = chunk.item.inputs || functionCall.arguments - functionCall.status = chunk.item.status === "completed" ? "completed" : "error" - functionCall.id = chunk.item.id - functionCall.type = chunk.item.type - + functionCall.arguments = + chunk.item.inputs || functionCall.arguments; + functionCall.status = + chunk.item.status === "completed" ? "completed" : "error"; + functionCall.id = chunk.item.id; + functionCall.type = chunk.item.type; + // Set the results if (chunk.item.results) { - functionCall.result = chunk.item.results + functionCall.result = chunk.item.results; } } else { // Create new function call if not found const newFunctionCall = { - name: chunk.item.tool_name || chunk.item.name || chunk.item.type || "unknown", + name: + chunk.item.tool_name || + chunk.item.name || + chunk.item.type || + "unknown", arguments: chunk.item.inputs || {}, status: "completed" as const, id: chunk.item.id, type: chunk.item.type, - result: chunk.item.results - } - currentFunctionCalls.push(newFunctionCall) + result: chunk.item.results, + }; + currentFunctionCalls.push(newFunctionCall); } } - + // Handle function call output item added (new format) - else if (chunk.type === "response.output_item.added" && chunk.item?.type?.includes("_call") && chunk.item?.type !== "function_call") { - console.log("🟡 CREATING tool call (added):", chunk.item.id, chunk.item.tool_name || chunk.item.name, chunk.item.type) - + else if ( + chunk.type === "response.output_item.added" && + chunk.item?.type?.includes("_call") && + chunk.item?.type !== "function_call" + ) { + console.log( + "🟡 CREATING tool call (added):", + chunk.item.id, + chunk.item.tool_name || chunk.item.name, + chunk.item.type + ); + // Dedupe by id or pending with same name - let existing = currentFunctionCalls.find(fc => fc.id === chunk.item.id) + let existing = currentFunctionCalls.find( + (fc) => fc.id === chunk.item.id + ); if (!existing) { - existing = [...currentFunctionCalls].reverse().find(fc => - fc.status === "pending" && - !fc.id && - (fc.name === (chunk.item.tool_name || chunk.item.name || chunk.item.type)) - ) + existing = [...currentFunctionCalls] + .reverse() + .find( + (fc) => + fc.status === "pending" && + !fc.id && + fc.name === + (chunk.item.tool_name || + chunk.item.name || + chunk.item.type) + ); } - + if (existing) { - existing.id = chunk.item.id - existing.type = chunk.item.type - existing.name = chunk.item.tool_name || chunk.item.name || chunk.item.type || existing.name - existing.arguments = chunk.item.inputs || existing.arguments - console.log("🟡 UPDATED existing pending tool call with id:", existing.id) + existing.id = chunk.item.id; + existing.type = chunk.item.type; + existing.name = + chunk.item.tool_name || + chunk.item.name || + chunk.item.type || + existing.name; + existing.arguments = + chunk.item.inputs || existing.arguments; + console.log( + "🟡 UPDATED existing pending tool call with id:", + existing.id + ); } else { const functionCall = { - name: chunk.item.tool_name || chunk.item.name || chunk.item.type || "unknown", + name: + chunk.item.tool_name || + chunk.item.name || + chunk.item.type || + "unknown", arguments: chunk.item.inputs || {}, status: "pending" as const, id: chunk.item.id, - type: chunk.item.type - } - currentFunctionCalls.push(functionCall) - console.log("🟡 Function calls now:", currentFunctionCalls.map(fc => ({ id: fc.id, name: fc.name, type: fc.type }))) + type: chunk.item.type, + }; + currentFunctionCalls.push(functionCall); + console.log( + "🟡 Function calls now:", + currentFunctionCalls.map((fc) => ({ + id: fc.id, + name: fc.name, + type: fc.type, + })) + ); } } - + // Handle function call results - else if (chunk.type === "response.function_call.result" || chunk.type === "function_call_result") { - console.log("Function call result:", chunk.result || chunk) - const lastFunctionCall = currentFunctionCalls[currentFunctionCalls.length - 1] + else if ( + chunk.type === "response.function_call.result" || + chunk.type === "function_call_result" + ) { + console.log("Function call result:", chunk.result || chunk); + const lastFunctionCall = + currentFunctionCalls[currentFunctionCalls.length - 1]; if (lastFunctionCall) { - lastFunctionCall.result = chunk.result || chunk.output || chunk.response - lastFunctionCall.status = "completed" + lastFunctionCall.result = + chunk.result || chunk.output || chunk.response; + lastFunctionCall.status = "completed"; } } - - // Handle tool call results - else if (chunk.type === "response.tool_call.result" || chunk.type === "tool_call_result") { - console.log("Tool call result:", chunk.result || chunk) - const lastFunctionCall = currentFunctionCalls[currentFunctionCalls.length - 1] + + // Handle tool call results + else if ( + chunk.type === "response.tool_call.result" || + chunk.type === "tool_call_result" + ) { + console.log("Tool call result:", chunk.result || chunk); + const lastFunctionCall = + currentFunctionCalls[currentFunctionCalls.length - 1]; if (lastFunctionCall) { - lastFunctionCall.result = chunk.result || chunk.output || chunk.response - lastFunctionCall.status = "completed" + lastFunctionCall.result = + chunk.result || chunk.output || chunk.response; + lastFunctionCall.status = "completed"; } } - + // Handle generic results that might be in different formats - else if ((chunk.type && chunk.type.includes("result")) || chunk.result) { - console.log("Generic result:", chunk) - const lastFunctionCall = currentFunctionCalls[currentFunctionCalls.length - 1] + else if ( + (chunk.type && chunk.type.includes("result")) || + chunk.result + ) { + console.log("Generic result:", chunk); + const lastFunctionCall = + currentFunctionCalls[currentFunctionCalls.length - 1]; if (lastFunctionCall && !lastFunctionCall.result) { - lastFunctionCall.result = chunk.result || chunk.output || chunk.response || chunk - lastFunctionCall.status = "completed" + lastFunctionCall.result = + chunk.result || chunk.output || chunk.response || chunk; + lastFunctionCall.status = "completed"; } } - + // Handle text output streaming (Realtime API) else if (chunk.type === "response.output_text.delta") { - console.log("Text delta (Realtime API):", chunk.delta) - currentContent += chunk.delta || "" + console.log("Text delta (Realtime API):", chunk.delta); + currentContent += chunk.delta || ""; } - + // Log unhandled chunks - else if (chunk.type !== null && chunk.object !== "response.chunk") { - console.log("Unhandled chunk format:", chunk) + else if ( + chunk.type !== null && + chunk.object !== "response.chunk" + ) { + console.log("Unhandled chunk format:", chunk); } - + // Update streaming message - if (!controller.signal.aborted && thisStreamId === streamIdRef.current) { + if ( + !controller.signal.aborted && + thisStreamId === streamIdRef.current + ) { setStreamingMessage({ content: currentContent, functionCalls: [...currentFunctionCalls], - timestamp: new Date() - }) + timestamp: new Date(), + }); } - } catch (parseError) { - console.warn("Failed to parse chunk:", line, parseError) + console.warn("Failed to parse chunk:", line, parseError); } } } } } finally { - reader.releaseLock() + reader.releaseLock(); } // Finalize the message @@ -975,242 +1262,269 @@ function ChatPage() { role: "assistant", content: currentContent, functionCalls: currentFunctionCalls, - timestamp: new Date() - } - + timestamp: new Date(), + }; + if (!controller.signal.aborted && thisStreamId === streamIdRef.current) { - setMessages(prev => [...prev, finalMessage]) - setStreamingMessage(null) + setMessages((prev) => [...prev, finalMessage]); + setStreamingMessage(null); } - + // Store the response ID for the next request for this endpoint - if (newResponseId && !controller.signal.aborted && thisStreamId === streamIdRef.current) { - setPreviousResponseIds(prev => ({ + if ( + newResponseId && + !controller.signal.aborted && + thisStreamId === streamIdRef.current + ) { + setPreviousResponseIds((prev) => ({ ...prev, - [endpoint]: newResponseId - })) + [endpoint]: newResponseId, + })); } - + // Trigger sidebar refresh to include this conversation (with small delay to ensure backend has processed) setTimeout(() => { - try { refreshConversations() } catch {} - }, 100) - + try { + refreshConversations(); + } catch {} + }, 100); } catch (error) { // If stream was aborted (e.g., starting new conversation), do not append errors or final messages if (streamAbortRef.current?.signal.aborted) { - return + return; } - console.error("SSE Stream error:", error) - setStreamingMessage(null) - + console.error("SSE Stream error:", error); + setStreamingMessage(null); + const errorMessage: Message = { role: "assistant", - content: "Sorry, I couldn't connect to the chat service. Please try again.", - timestamp: new Date() - } - setMessages(prev => [...prev, errorMessage]) + content: + "Sorry, I couldn't connect to the chat service. Please try again.", + timestamp: new Date(), + }; + setMessages((prev) => [...prev, errorMessage]); } - } - + }; const handleSubmit = async (e: React.FormEvent) => { - e.preventDefault() - if (!input.trim() || loading) return + e.preventDefault(); + if (!input.trim() || loading) return; const userMessage: Message = { role: "user", content: input.trim(), - timestamp: new Date() - } + timestamp: new Date(), + }; - setMessages(prev => [...prev, userMessage]) - setInput("") - setLoading(true) - setIsFilterHighlighted(false) + setMessages((prev) => [...prev, userMessage]); + setInput(""); + setLoading(true); + setIsFilterHighlighted(false); if (asyncMode) { - await handleSSEStream(userMessage) + await handleSSEStream(userMessage); } else { // Original non-streaming logic try { - const apiEndpoint = endpoint === "chat" ? "/api/chat" : "/api/langflow" - + const apiEndpoint = endpoint === "chat" ? "/api/chat" : "/api/langflow"; + const requestBody: RequestBody = { prompt: userMessage.content, - ...(parsedFilterData?.filters && (() => { - const filters = parsedFilterData.filters - const processed: SelectedFilters = { - data_sources: [], - document_types: [], - owners: [] - } - // Only copy non-wildcard arrays - processed.data_sources = filters.data_sources.includes("*") ? [] : filters.data_sources - processed.document_types = filters.document_types.includes("*") ? [] : filters.document_types - processed.owners = filters.owners.includes("*") ? [] : filters.owners - - // Only include filters if any array has values - const hasFilters = processed.data_sources.length > 0 || - processed.document_types.length > 0 || - processed.owners.length > 0 - return hasFilters ? { filters: processed } : {} - })()), + ...(parsedFilterData?.filters && + (() => { + const filters = parsedFilterData.filters; + const processed: SelectedFilters = { + data_sources: [], + document_types: [], + owners: [], + }; + // Only copy non-wildcard arrays + processed.data_sources = filters.data_sources.includes("*") + ? [] + : filters.data_sources; + processed.document_types = filters.document_types.includes("*") + ? [] + : filters.document_types; + processed.owners = filters.owners.includes("*") + ? [] + : filters.owners; + + // Only include filters if any array has values + const hasFilters = + processed.data_sources.length > 0 || + processed.document_types.length > 0 || + processed.owners.length > 0; + return hasFilters ? { filters: processed } : {}; + })()), limit: parsedFilterData?.limit ?? 10, - scoreThreshold: parsedFilterData?.scoreThreshold ?? 0 - } - + scoreThreshold: parsedFilterData?.scoreThreshold ?? 0, + }; + // Add previous_response_id if we have one for this endpoint - const currentResponseId = previousResponseIds[endpoint] + const currentResponseId = previousResponseIds[endpoint]; if (currentResponseId) { - requestBody.previous_response_id = currentResponseId + requestBody.previous_response_id = currentResponseId; } - + const response = await fetch(apiEndpoint, { method: "POST", headers: { "Content-Type": "application/json", }, body: JSON.stringify(requestBody), - }) + }); + + const result = await response.json(); - const result = await response.json() - if (response.ok) { const assistantMessage: Message = { role: "assistant", content: result.response, - timestamp: new Date() - } - setMessages(prev => [...prev, assistantMessage]) - + timestamp: new Date(), + }; + setMessages((prev) => [...prev, assistantMessage]); + // Store the response ID if present for this endpoint if (result.response_id) { - setPreviousResponseIds(prev => ({ + setPreviousResponseIds((prev) => ({ ...prev, - [endpoint]: result.response_id - })) + [endpoint]: result.response_id, + })); } // Trigger sidebar refresh to include/update this conversation (with small delay to ensure backend has processed) setTimeout(() => { - try { refreshConversations() } catch {} - }, 100) + try { + refreshConversations(); + } catch {} + }, 100); } else { - console.error("Chat failed:", result.error) + console.error("Chat failed:", result.error); const errorMessage: Message = { role: "assistant", content: "Sorry, I encountered an error. Please try again.", - timestamp: new Date() - } - setMessages(prev => [...prev, errorMessage]) + timestamp: new Date(), + }; + setMessages((prev) => [...prev, errorMessage]); } } catch (error) { - console.error("Chat error:", error) + console.error("Chat error:", error); const errorMessage: Message = { role: "assistant", - content: "Sorry, I couldn't connect to the chat service. Please try again.", - timestamp: new Date() - } - setMessages(prev => [...prev, errorMessage]) + content: + "Sorry, I couldn't connect to the chat service. Please try again.", + timestamp: new Date(), + }; + setMessages((prev) => [...prev, errorMessage]); } } - - setLoading(false) - } + + setLoading(false); + }; const toggleFunctionCall = (functionCallId: string) => { - setExpandedFunctionCalls(prev => { - const newSet = new Set(prev) + setExpandedFunctionCalls((prev) => { + const newSet = new Set(prev); if (newSet.has(functionCallId)) { - newSet.delete(functionCallId) + newSet.delete(functionCallId); } else { - newSet.add(functionCallId) + newSet.add(functionCallId); } - return newSet - }) - } + return newSet; + }); + }; - const handleForkConversation = (messageIndex: number, event?: React.MouseEvent) => { + const handleForkConversation = ( + messageIndex: number, + event?: React.MouseEvent + ) => { // Prevent any default behavior and stop event propagation if (event) { - event.preventDefault() - event.stopPropagation() + event.preventDefault(); + event.stopPropagation(); } - + // Set interaction state to prevent auto-scroll interference - const forkTimestamp = Date.now() - setIsUserInteracting(true) - setIsForkingInProgress(true) - setLastForkTimestamp(forkTimestamp) - - console.log("Fork conversation called for message index:", messageIndex) - + const forkTimestamp = Date.now(); + setIsUserInteracting(true); + setIsForkingInProgress(true); + setLastForkTimestamp(forkTimestamp); + + console.log("Fork conversation called for message index:", messageIndex); + // Get messages up to and including the selected assistant message - const messagesToKeep = messages.slice(0, messageIndex + 1) - + const messagesToKeep = messages.slice(0, messageIndex + 1); + // The selected message should be an assistant message (since fork button is only on assistant messages) - const forkedMessage = messages[messageIndex] - if (forkedMessage.role !== 'assistant') { - console.error('Fork button should only be on assistant messages') - setIsUserInteracting(false) - setIsForkingInProgress(false) - setLastForkTimestamp(0) - return + const forkedMessage = messages[messageIndex]; + if (forkedMessage.role !== "assistant") { + console.error("Fork button should only be on assistant messages"); + setIsUserInteracting(false); + setIsForkingInProgress(false); + setLastForkTimestamp(0); + return; } - + // For forking, we want to continue from the response_id of the assistant message we're forking from // Since we don't store individual response_ids per message yet, we'll use the current conversation's response_id // This means we're continuing the conversation thread from that point - const responseIdToForkFrom = currentConversationId || previousResponseIds[endpoint] - + const responseIdToForkFrom = + currentConversationId || previousResponseIds[endpoint]; + // Create a new conversation by properly forking - setMessages(messagesToKeep) - + setMessages(messagesToKeep); + // Use the chat context's fork method which handles creating a new conversation properly if (forkFromResponse) { - forkFromResponse(responseIdToForkFrom || '') + forkFromResponse(responseIdToForkFrom || ""); } else { // Fallback to manual approach - setCurrentConversationId(null) // This creates a new conversation thread - + setCurrentConversationId(null); // This creates a new conversation thread + // Set the response_id we want to continue from as the previous response ID // This tells the backend to continue the conversation from this point - setPreviousResponseIds(prev => ({ + setPreviousResponseIds((prev) => ({ ...prev, - [endpoint]: responseIdToForkFrom - })) + [endpoint]: responseIdToForkFrom, + })); } - - console.log("Forked conversation with", messagesToKeep.length, "messages") - + + console.log("Forked conversation with", messagesToKeep.length, "messages"); + // Reset interaction state after a longer delay to ensure all effects complete setTimeout(() => { - setIsUserInteracting(false) - setIsForkingInProgress(false) - console.log("Fork interaction complete, re-enabling auto effects") - }, 500) - + setIsUserInteracting(false); + setIsForkingInProgress(false); + console.log("Fork interaction complete, re-enabling auto effects"); + }, 500); + // The original conversation remains unchanged in the sidebar // This new forked conversation will get its own response_id when the user sends the next message - } + }; + + const renderFunctionCalls = ( + functionCalls: FunctionCall[], + messageIndex?: number + ) => { + if (!functionCalls || functionCalls.length === 0) return null; - const renderFunctionCalls = (functionCalls: FunctionCall[], messageIndex?: number) => { - if (!functionCalls || functionCalls.length === 0) return null - return (
{functionCalls.map((fc, index) => { - const functionCallId = `${messageIndex || 'streaming'}-${index}` - const isExpanded = expandedFunctionCalls.has(functionCallId) - + const functionCallId = `${messageIndex || "streaming"}-${index}`; + const isExpanded = expandedFunctionCalls.has(functionCallId); + // Determine display name - show both name and type if available - const displayName = fc.type && fc.type !== fc.name - ? `${fc.name} (${fc.type})` - : fc.name - + const displayName = + fc.type && fc.type !== fc.name + ? `${fc.name} (${fc.type})` + : fc.name; + return ( -
-
+
toggleFunctionCall(functionCallId)} > @@ -1223,11 +1537,15 @@ function ChatPage() { {fc.id.substring(0, 8)}... )} -
+
{fc.status}
{isExpanded ? ( @@ -1236,7 +1554,7 @@ function ChatPage() { )}
- + {isExpanded && (
{/* Show type information if available */} @@ -1248,7 +1566,7 @@ function ChatPage() {
)} - + {/* Show ID if available */} {fc.id && (
@@ -1258,20 +1576,19 @@ function ChatPage() {
)} - + {/* Show arguments - either completed or streaming */} {(fc.arguments || fc.argumentsString) && (
Arguments:
-                        {fc.arguments 
+                        {fc.arguments
                           ? JSON.stringify(fc.arguments, null, 2)
-                          : fc.argumentsString || "..."
-                        }
+                          : fc.argumentsString || "..."}
                       
)} - + {fc.result && (
Result: @@ -1279,37 +1596,43 @@ function ChatPage() {
{(() => { // Handle different result formats - let resultsToRender = fc.result - + let resultsToRender = fc.result; + // Check if this is function_call format with nested results // Function call format: results = [{ results: [...] }] // Tool call format: results = [{ text_key: ..., data: {...} }] - if (fc.result.length > 0 && - fc.result[0]?.results && - Array.isArray(fc.result[0].results) && - !fc.result[0].text_key) { - resultsToRender = fc.result[0].results + if ( + fc.result.length > 0 && + fc.result[0]?.results && + Array.isArray(fc.result[0].results) && + !fc.result[0].text_key + ) { + resultsToRender = fc.result[0].results; } - + type ToolResultItem = { - text_key?: string - data?: { file_path?: string; text?: string } - filename?: string - page?: number - score?: number - source_url?: string | null - text?: string - } - const items = resultsToRender as unknown as ToolResultItem[] + text_key?: string; + data?: { file_path?: string; text?: string }; + filename?: string; + page?: number; + score?: number; + source_url?: string | null; + text?: string; + }; + const items = + resultsToRender as unknown as ToolResultItem[]; return items.map((result, idx: number) => ( -
+
{/* Handle tool_call format (file_path in data) */} {result.data?.file_path && (
📄 {result.data.file_path || "Unknown file"}
)} - + {/* Handle function_call format (filename directly) */} {result.filename && !result.data?.file_path && (
@@ -1322,63 +1645,74 @@ function ChatPage() { )}
)} - + {/* Handle tool_call text format */} {result.data?.text && (
- {result.data.text.length > 300 - ? result.data.text.substring(0, 300) + "..." - : result.data.text - } + {result.data.text.length > 300 + ? result.data.text.substring(0, 300) + + "..." + : result.data.text}
)} - + {/* Handle function_call text format */} {result.text && !result.data?.text && (
- {result.text.length > 300 - ? result.text.substring(0, 300) + "..." - : result.text - } + {result.text.length > 300 + ? result.text.substring(0, 300) + "..." + : result.text}
)} - + {/* Show additional metadata for function_call format */} {result.source_url && ( )} - + {result.text_key && (
Key: {result.text_key}
)}
- )) + )); })()}
- Found {(() => { - let resultsToCount = fc.result - if (fc.result.length > 0 && - fc.result[0]?.results && - Array.isArray(fc.result[0].results) && - !fc.result[0].text_key) { - resultsToCount = fc.result[0].results + Found{" "} + {(() => { + let resultsToCount = fc.result; + if ( + fc.result.length > 0 && + fc.result[0]?.results && + Array.isArray(fc.result[0].results) && + !fc.result[0].text_key + ) { + resultsToCount = fc.result[0].results; } - return resultsToCount.length - })()} result{(() => { - let resultsToCount = fc.result - if (fc.result.length > 0 && - fc.result[0]?.results && - Array.isArray(fc.result[0].results) && - !fc.result[0].text_key) { - resultsToCount = fc.result[0].results + return resultsToCount.length; + })()}{" "} + result + {(() => { + let resultsToCount = fc.result; + if ( + fc.result.length > 0 && + fc.result[0]?.results && + Array.isArray(fc.result[0].results) && + !fc.result[0].text_key + ) { + resultsToCount = fc.result[0].results; } - return resultsToCount.length !== 1 ? 's' : '' + return resultsToCount.length !== 1 ? "s" : ""; })()}
@@ -1392,35 +1726,39 @@ function ChatPage() {
)}
- ) + ); })}
- ) - } + ); + }; const suggestionChips = [ "Show me this quarter's top 10 deals", "Summarize recent client interactions", - "Search OpenSearch for mentions of our competitors" - ] + "Search OpenSearch for mentions of our competitors", + ]; const handleSuggestionClick = (suggestion: string) => { - setInput(suggestion) - inputRef.current?.focus() - } + setInput(suggestion); + inputRef.current?.focus(); + }; return ( -
+
{/* Debug header - only show in debug mode */} {isDebugMode && (
-
-
+
{/* Async Mode Toggle */}
@@ -1430,7 +1768,7 @@ function ChatPage() { onClick={() => setAsyncMode(false)} className="h-7 text-xs" > - Streaming Off + Streaming Off
))} - + {/* Streaming Message Display */} {streamingMessage && (
@@ -1547,7 +1907,10 @@ function ChatPage() {
- {renderFunctionCalls(streamingMessage.functionCalls, messages.length)} + {renderFunctionCalls( + streamingMessage.functionCalls, + messages.length + )}

{streamingMessage.content} @@ -1555,7 +1918,7 @@ function ChatPage() {

)} - + {/* Loading animation - shows immediately after user submits */} {loading && (
@@ -1565,7 +1928,9 @@ function ChatPage() {
- Thinking... + + Thinking... +
@@ -1573,21 +1938,22 @@ function ChatPage() {
)} - + {/* Drag overlay for existing messages */} {isDragOver && messages.length > 0 && (
-

Drop document to add context

+

+ Drop document to add context +

)}
-
- + {/* Suggestion chips - always show unless streaming */} {!streamingMessage && (
@@ -1608,7 +1974,7 @@ function ChatPage() {
)} - + {/* Input Area - Fixed at bottom */}
@@ -1616,17 +1982,19 @@ function ChatPage() {
{selectedFilter && (
- + @filter:{selectedFilter.name}
@@ -1786,7 +2178,10 @@ function ChatPage() { {isFilterDropdownOpen && ( -
+
{filterSearchTerm && (
@@ -1803,7 +2198,7 @@ function ChatPage() { )} {availableFilters - .filter(filter => - filter.name.toLowerCase().includes(filterSearchTerm.toLowerCase()) + .filter((filter) => + filter.name + .toLowerCase() + .includes(filterSearchTerm.toLowerCase()) ) .map((filter, index) => ( ))} - {availableFilters.filter(filter => - filter.name.toLowerCase().includes(filterSearchTerm.toLowerCase()) - ).length === 0 && filterSearchTerm && ( -
- No filters match "{filterSearchTerm}" -
- )} + {availableFilters.filter((filter) => + filter.name + .toLowerCase() + .includes(filterSearchTerm.toLowerCase()) + ).length === 0 && + filterSearchTerm && ( +
+ No filters match "{filterSearchTerm}" +
+ )} )}
@@ -1864,17 +2264,13 @@ function ChatPage() { disabled={!input.trim() || loading} className="absolute bottom-3 right-3 rounded-lg h-10 px-4" > - {loading ? ( - - ) : ( - "Send" - )} + {loading ? : "Send"}
- ) + ); } export default function ProtectedChatPage() { @@ -1882,5 +2278,5 @@ export default function ProtectedChatPage() { - ) -} + ); +} diff --git a/src/services/auth_service.py b/src/services/auth_service.py index 70c1d8b7..6cf94a40 100644 --- a/src/services/auth_service.py +++ b/src/services/auth_service.py @@ -14,6 +14,7 @@ from connectors.sharepoint.oauth import SharePointOAuth from connectors.google_drive import GoogleDriveConnector from connectors.onedrive import OneDriveConnector from connectors.sharepoint import SharePointConnector +from services.user_binding_service import user_binding_service class AuthService: def __init__(self, session_manager: SessionManager, connector_service=None): @@ -208,7 +209,20 @@ class AuthService: if jwt_token: # Get the user info to create a persistent Google Drive connection user_info = await self.session_manager.get_user_info_from_token(token_data["access_token"]) - user_id = user_info["id"] if user_info else None + google_user_id = user_info["id"] if user_info else None + + # Create or update user binding between Google ID and Langflow ID + if google_user_id and user_info: + try: + print(f"[DEBUG] Creating/updating user binding for Google ID: {google_user_id}") + binding_created = await user_binding_service.ensure_binding(google_user_id, user_info) + if binding_created: + print(f"[DEBUG] Successfully ensured user binding for Google ID: {google_user_id}") + else: + print(f"[DEBUG] Failed to create user binding for Google ID: {google_user_id}") + except Exception as e: + print(f"[WARNING] Failed to create user binding for Google ID {google_user_id}: {e}") + # Don't fail authentication if binding creation fails response_data = { "status": "authenticated", @@ -217,13 +231,13 @@ class AuthService: "jwt_token": jwt_token # Include JWT token in response } - if user_id: + if google_user_id: # Convert the temporary auth connection to a persistent Google Drive connection await self.connector_service.connection_manager.update_connection( connection_id=connection_id, connector_type="google_drive", name=f"Google Drive ({user_info.get('email', 'Unknown')})", - user_id=user_id, + user_id=google_user_id, config={ **connection_config.config, "purpose": "data_source", @@ -256,7 +270,11 @@ class AuthService: user = getattr(request.state, 'user', None) if user: - return { + # Get user binding info if available + binding_info = user_binding_service.get_binding_info(user.user_id) + langflow_user_id = user_binding_service.get_langflow_user_id(user.user_id) + + user_data = { "authenticated": True, "user": { "user_id": user.user_id, @@ -267,6 +285,15 @@ class AuthService: "last_login": user.last_login.isoformat() if user.last_login else None } } + + # Add binding information if available + if langflow_user_id: + user_data["user"]["langflow_user_id"] = langflow_user_id + if binding_info: + user_data["user"]["binding_created_at"] = binding_info.get("created_at") + user_data["user"]["binding_last_updated"] = binding_info.get("last_updated") + + return user_data else: return { "authenticated": False, diff --git a/src/services/chat_service.py b/src/services/chat_service.py index 689a626c..aea8839a 100644 --- a/src/services/chat_service.py +++ b/src/services/chat_service.py @@ -172,52 +172,105 @@ class ChatService: } async def get_langflow_history(self, user_id: str): - """Get langflow conversation history for a user""" + """Get langflow conversation history for a user - now fetches from both OpenRAG memory and Langflow database""" from agent import get_user_conversations + from services.langflow_history_service import langflow_history_service + from services.user_binding_service import user_binding_service if not user_id: return {"error": "User ID is required", "conversations": []} - conversations_dict = get_user_conversations(user_id) + all_conversations = [] - # Convert conversations dict to list format with metadata - conversations = [] - for response_id, conversation_state in conversations_dict.items(): - # Filter out system messages - messages = [] - for msg in conversation_state.get("messages", []): - if msg.get("role") in ["user", "assistant"]: - message_data = { - "role": msg["role"], - "content": msg["content"], - "timestamp": msg.get("timestamp").isoformat() if msg.get("timestamp") else None - } - if msg.get("response_id"): - message_data["response_id"] = msg["response_id"] - messages.append(message_data) + try: + # 1. Get in-memory OpenRAG conversations (current session) + conversations_dict = get_user_conversations(user_id) - if messages: # Only include conversations with actual messages - # Generate title from first user message - first_user_msg = next((msg for msg in messages if msg["role"] == "user"), None) - title = first_user_msg["content"][:50] + "..." if first_user_msg and len(first_user_msg["content"]) > 50 else first_user_msg["content"] if first_user_msg else "New chat" + for response_id, conversation_state in conversations_dict.items(): + # Filter out system messages + messages = [] + for msg in conversation_state.get("messages", []): + if msg.get("role") in ["user", "assistant"]: + message_data = { + "role": msg["role"], + "content": msg["content"], + "timestamp": msg.get("timestamp").isoformat() if msg.get("timestamp") else None + } + if msg.get("response_id"): + message_data["response_id"] = msg["response_id"] + messages.append(message_data) - conversations.append({ - "response_id": response_id, - "title": title, - "endpoint": "langflow", - "messages": messages, - "created_at": conversation_state.get("created_at").isoformat() if conversation_state.get("created_at") else None, - "last_activity": conversation_state.get("last_activity").isoformat() if conversation_state.get("last_activity") else None, - "previous_response_id": conversation_state.get("previous_response_id"), - "total_messages": len(messages) - }) + if messages: # Only include conversations with actual messages + first_user_msg = next((msg for msg in messages if msg["role"] == "user"), None) + title = first_user_msg["content"][:50] + "..." if first_user_msg and len(first_user_msg["content"]) > 50 else first_user_msg["content"] if first_user_msg else "New chat" + + all_conversations.append({ + "response_id": response_id, + "title": title, + "endpoint": "langflow", + "messages": messages, + "created_at": conversation_state.get("created_at").isoformat() if conversation_state.get("created_at") else None, + "last_activity": conversation_state.get("last_activity").isoformat() if conversation_state.get("last_activity") else None, + "previous_response_id": conversation_state.get("previous_response_id"), + "total_messages": len(messages), + "source": "openrag_memory" + }) + + # 2. Get historical conversations from Langflow database + # (works with both Google-bound users and direct Langflow users) + print(f"[DEBUG] Attempting to fetch Langflow history for user: {user_id}") + langflow_history = await langflow_history_service.get_user_conversation_history(user_id) + + if langflow_history.get("conversations"): + for conversation in langflow_history["conversations"]: + # Convert Langflow format to OpenRAG format + messages = [] + for msg in conversation.get("messages", []): + messages.append({ + "role": msg["role"], + "content": msg["content"], + "timestamp": msg.get("timestamp"), + "langflow_message_id": msg.get("langflow_message_id"), + "source": "langflow" + }) + + if messages: + first_user_msg = next((msg for msg in messages if msg["role"] == "user"), None) + title = first_user_msg["content"][:50] + "..." if first_user_msg and len(first_user_msg["content"]) > 50 else first_user_msg["content"] if first_user_msg else "Langflow chat" + + all_conversations.append({ + "response_id": conversation["session_id"], + "title": title, + "endpoint": "langflow", + "messages": messages, + "created_at": conversation.get("created_at"), + "last_activity": conversation.get("last_activity"), + "total_messages": len(messages), + "source": "langflow_database", + "langflow_session_id": conversation["session_id"], + "langflow_flow_id": conversation.get("flow_id") + }) + + print(f"[DEBUG] Added {len(langflow_history['conversations'])} historical conversations from Langflow") + elif langflow_history.get("error"): + print(f"[DEBUG] Could not fetch Langflow history for user {user_id}: {langflow_history['error']}") + else: + print(f"[DEBUG] No Langflow conversations found for user {user_id}") - # Sort by last activity (most recent first) - conversations.sort(key=lambda c: c["last_activity"], reverse=True) + except Exception as e: + print(f"[ERROR] Failed to fetch Langflow history: {e}") + # Continue with just in-memory conversations + + # Sort all conversations by last activity (most recent first) + all_conversations.sort(key=lambda c: c.get("last_activity", ""), reverse=True) return { "user_id": user_id, "endpoint": "langflow", - "conversations": conversations, - "total_conversations": len(conversations) + "conversations": all_conversations, + "total_conversations": len(all_conversations), + "sources": { + "memory": len([c for c in all_conversations if c.get("source") == "openrag_memory"]), + "langflow_db": len([c for c in all_conversations if c.get("source") == "langflow_database"]) + } } diff --git a/src/services/langflow_history_service.py b/src/services/langflow_history_service.py new file mode 100644 index 00000000..85f20b3f --- /dev/null +++ b/src/services/langflow_history_service.py @@ -0,0 +1,310 @@ +""" +Langflow Message History Service +Retrieves message history from Langflow's database using user bindings +""" + +import asyncio +import httpx +from typing import List, Dict, Optional, Any +from datetime import datetime + +from config.settings import LANGFLOW_URL, LANGFLOW_KEY, LANGFLOW_SUPERUSER, LANGFLOW_SUPERUSER_PASSWORD +from services.user_binding_service import user_binding_service + + +class LangflowHistoryService: + """Service to retrieve message history from Langflow using user bindings""" + + def __init__(self): + self.langflow_url = LANGFLOW_URL + self.auth_token = None + + def _resolve_langflow_user_id(self, user_id: str) -> Optional[str]: + """Resolve user_id to Langflow user ID + + Args: + user_id: Either Google user ID or direct Langflow user ID + + Returns: + Langflow user ID or None + """ + # First, check if this is already a Langflow user ID by checking UUID format + if self._is_uuid_format(user_id): + print(f"User ID {user_id} appears to be a Langflow UUID, using directly") + return user_id + + # Otherwise, try to get Langflow user ID from Google binding + langflow_user_id = user_binding_service.get_langflow_user_id(user_id) + if langflow_user_id: + print(f"Found Langflow binding for Google user {user_id}: {langflow_user_id}") + return langflow_user_id + + print(f"No Langflow user ID found for {user_id}") + return None + + def _is_uuid_format(self, user_id: str) -> bool: + """Check if string looks like a UUID (Langflow user ID format)""" + import re + # Basic UUID pattern check (with or without dashes) + uuid_pattern = r'^[0-9a-f]{8}-?[0-9a-f]{4}-?[0-9a-f]{4}-?[0-9a-f]{4}-?[0-9a-f]{12}$' + return bool(re.match(uuid_pattern, user_id.lower().replace('-', ''))) + + async def _authenticate(self) -> Optional[str]: + """Authenticate with Langflow and get access token""" + if self.auth_token: + return self.auth_token + + if not all([LANGFLOW_SUPERUSER, LANGFLOW_SUPERUSER_PASSWORD]): + print("Missing Langflow superuser credentials") + return None + + try: + login_data = { + "username": LANGFLOW_SUPERUSER, + "password": LANGFLOW_SUPERUSER_PASSWORD + } + + async with httpx.AsyncClient() as client: + response = await client.post( + f"{self.langflow_url.rstrip('/')}/api/v1/login", + data=login_data, + headers={"Content-Type": "application/x-www-form-urlencoded"} + ) + + if response.status_code == 200: + result = response.json() + self.auth_token = result.get('access_token') + print(f"Successfully authenticated with Langflow for history retrieval") + return self.auth_token + else: + print(f"Langflow authentication failed: {response.status_code}") + return None + + except Exception as e: + print(f"Error authenticating with Langflow: {e}") + return None + + async def get_user_sessions(self, user_id: str, flow_id: Optional[str] = None) -> List[str]: + """Get all session IDs for a user's conversations + + Args: + user_id: Either Google user ID or direct Langflow user ID + """ + # Determine the Langflow user ID + langflow_user_id = self._resolve_langflow_user_id(user_id) + if not langflow_user_id: + print(f"No Langflow user found for user: {user_id}") + return [] + + token = await self._authenticate() + if not token: + return [] + + try: + headers = {"Authorization": f"Bearer {token}"} + params = {} + + if flow_id: + params["flow_id"] = flow_id + + async with httpx.AsyncClient() as client: + response = await client.get( + f"{self.langflow_url.rstrip('/')}/api/v1/monitor/messages/sessions", + headers=headers, + params=params + ) + + if response.status_code == 200: + session_ids = response.json() + + # Filter sessions to only include those belonging to the user + user_sessions = await self._filter_sessions_by_user(session_ids, langflow_user_id, token) + print(f"Found {len(user_sessions)} sessions for user {user_id} (Langflow ID: {langflow_user_id})") + return user_sessions + else: + print(f"Failed to get sessions: {response.status_code} - {response.text}") + return [] + + except Exception as e: + print(f"Error getting user sessions: {e}") + return [] + + async def _filter_sessions_by_user(self, session_ids: List[str], langflow_user_id: str, token: str) -> List[str]: + """Filter session IDs to only include those belonging to the specified user""" + user_sessions = [] + + try: + headers = {"Authorization": f"Bearer {token}"} + + async with httpx.AsyncClient() as client: + for session_id in session_ids: + # Get a sample message from this session to check flow ownership + response = await client.get( + f"{self.langflow_url.rstrip('/')}/api/v1/monitor/messages", + headers=headers, + params={ + "session_id": session_id, + "order_by": "timestamp" + } + ) + + if response.status_code == 200: + messages = response.json() + if messages and len(messages) > 0: + # Check if this session belongs to the user via flow ownership + flow_id = messages[0].get('flow_id') + if flow_id and await self._is_user_flow(flow_id, langflow_user_id, token): + user_sessions.append(session_id) + + except Exception as e: + print(f"Error filtering sessions by user: {e}") + + return user_sessions + + async def _is_user_flow(self, flow_id: str, langflow_user_id: str, token: str) -> bool: + """Check if a flow belongs to the specified user""" + try: + headers = {"Authorization": f"Bearer {token}"} + + async with httpx.AsyncClient() as client: + response = await client.get( + f"{self.langflow_url.rstrip('/')}/api/v1/flows/{flow_id}", + headers=headers + ) + + if response.status_code == 200: + flow_data = response.json() + return flow_data.get('user_id') == langflow_user_id + + except Exception as e: + print(f"Error checking flow ownership: {e}") + + return False + + async def get_session_messages(self, user_id: str, session_id: str) -> List[Dict[str, Any]]: + """Get all messages for a specific session""" + # Verify user has access to this session + langflow_user_id = self._resolve_langflow_user_id(user_id) + if not langflow_user_id: + return [] + + token = await self._authenticate() + if not token: + return [] + + try: + headers = {"Authorization": f"Bearer {token}"} + + async with httpx.AsyncClient() as client: + response = await client.get( + f"{self.langflow_url.rstrip('/')}/api/v1/monitor/messages", + headers=headers, + params={ + "session_id": session_id, + "order_by": "timestamp" + } + ) + + if response.status_code == 200: + messages = response.json() + + # Verify user owns this session (security check) + if messages and len(messages) > 0: + flow_id = messages[0].get('flow_id') + if not await self._is_user_flow(flow_id, langflow_user_id, token): + print(f"User {user_id} does not own session {session_id}") + return [] + + # Convert to OpenRAG format + return self._convert_langflow_messages(messages) + else: + print(f"Failed to get messages for session {session_id}: {response.status_code}") + return [] + + except Exception as e: + print(f"Error getting session messages: {e}") + return [] + + def _convert_langflow_messages(self, langflow_messages: List[Dict[str, Any]]) -> List[Dict[str, Any]]: + """Convert Langflow messages to OpenRAG format""" + converted_messages = [] + + for msg in langflow_messages: + try: + # Map Langflow message format to OpenRAG format + converted_msg = { + "role": "user" if msg.get("sender") == "User" else "assistant", + "content": msg.get("text", ""), + "timestamp": msg.get("timestamp"), + "langflow_message_id": msg.get("id"), + "langflow_session_id": msg.get("session_id"), + "langflow_flow_id": msg.get("flow_id"), + "sender": msg.get("sender"), + "sender_name": msg.get("sender_name"), + "files": msg.get("files", []), + "properties": msg.get("properties", {}), + "error": msg.get("error", False), + "edit": msg.get("edit", False) + } + converted_messages.append(converted_msg) + + except Exception as e: + print(f"Error converting message: {e}") + continue + + return converted_messages + + async def get_user_conversation_history(self, user_id: str, flow_id: Optional[str] = None) -> Dict[str, Any]: + """Get all conversation history for a user, organized by session""" + langflow_user_id = self._resolve_langflow_user_id(user_id) + if not langflow_user_id: + return { + "error": f"No Langflow user found for {user_id}", + "conversations": [] + } + + try: + # Get all user sessions + session_ids = await self.get_user_sessions(user_id, flow_id) + + conversations = [] + for session_id in session_ids: + messages = await self.get_session_messages(user_id, session_id) + if messages: + # Create conversation metadata + first_message = messages[0] if messages else None + last_message = messages[-1] if messages else None + + conversation = { + "session_id": session_id, + "langflow_session_id": session_id, # For compatibility + "response_id": session_id, # Map session_id to response_id for frontend compatibility + "messages": messages, + "message_count": len(messages), + "created_at": first_message.get("timestamp") if first_message else None, + "last_activity": last_message.get("timestamp") if last_message else None, + "flow_id": first_message.get("langflow_flow_id") if first_message else None, + "source": "langflow" + } + conversations.append(conversation) + + # Sort by last activity (most recent first) + conversations.sort(key=lambda c: c.get("last_activity", ""), reverse=True) + + return { + "conversations": conversations, + "total_conversations": len(conversations), + "langflow_user_id": langflow_user_id, + "user_id": user_id + } + + except Exception as e: + print(f"Error getting user conversation history: {e}") + return { + "error": str(e), + "conversations": [] + } + + +# Global instance +langflow_history_service = LangflowHistoryService() \ No newline at end of file diff --git a/src/services/user_binding_service.py b/src/services/user_binding_service.py new file mode 100644 index 00000000..4cead0aa --- /dev/null +++ b/src/services/user_binding_service.py @@ -0,0 +1,256 @@ +""" +User Binding Service +Manages mappings between Google OAuth user IDs and Langflow user IDs +Uses verified Langflow API endpoints: /api/v1/login and /api/v1/users/whoami +""" + +import json +import os +from typing import Dict, Optional, Any +import httpx +from config.settings import LANGFLOW_URL, LANGFLOW_KEY + +USER_BINDINGS_FILE = "user_bindings.json" + +class UserBindingService: + def __init__(self): + self.bindings_file = USER_BINDINGS_FILE + self.bindings = self._load_bindings() + + def _load_bindings(self) -> Dict[str, Any]: + """Load user bindings from JSON file""" + try: + if os.path.exists(self.bindings_file): + with open(self.bindings_file, 'r') as f: + return json.load(f) + else: + return {} + except Exception as e: + print(f"Error loading user bindings: {e}") + return {} + + def _save_bindings(self): + """Save user bindings to JSON file""" + try: + with open(self.bindings_file, 'w') as f: + json.dump(self.bindings, f, indent=2) + print(f"Saved user bindings to {self.bindings_file}") + except Exception as e: + print(f"Error saving user bindings: {e}") + + def get_langflow_user_id(self, google_user_id: str) -> Optional[str]: + """Get Langflow user ID from Google user ID""" + return self.bindings.get(google_user_id, {}).get('langflow_user_id') + + def get_google_user_id(self, langflow_user_id: str) -> Optional[str]: + """Get Google user ID from Langflow user ID (reverse lookup)""" + for google_id, binding in self.bindings.items(): + if binding.get('langflow_user_id') == langflow_user_id: + return google_id + return None + + def create_binding(self, google_user_id: str, langflow_user_id: str, google_user_info: Dict[str, Any]): + """Create a new binding between Google and Langflow user IDs""" + self.bindings[google_user_id] = { + 'langflow_user_id': langflow_user_id, + 'google_user_info': { + 'email': google_user_info.get('email'), + 'name': google_user_info.get('name'), + 'picture': google_user_info.get('picture'), + 'verified_email': google_user_info.get('verified_email') + }, + 'created_at': __import__('datetime').datetime.now().isoformat(), + 'last_updated': __import__('datetime').datetime.now().isoformat() + } + self._save_bindings() + print(f"Created binding: Google ID {google_user_id} -> Langflow ID {langflow_user_id}") + + def update_binding(self, google_user_id: str, google_user_info: Dict[str, Any]): + """Update existing binding with fresh Google user info""" + if google_user_id in self.bindings: + self.bindings[google_user_id]['google_user_info'] = { + 'email': google_user_info.get('email'), + 'name': google_user_info.get('name'), + 'picture': google_user_info.get('picture'), + 'verified_email': google_user_info.get('verified_email') + } + self.bindings[google_user_id]['last_updated'] = __import__('datetime').datetime.now().isoformat() + self._save_bindings() + print(f"Updated binding for Google ID {google_user_id}") + + def has_binding(self, google_user_id: str) -> bool: + """Check if a binding exists for the Google user ID""" + return google_user_id in self.bindings + + async def get_langflow_user_info(self, langflow_access_token: str) -> Optional[Dict[str, Any]]: + """Get current user info from Langflow /me endpoint""" + if not LANGFLOW_URL: + print("LANGFLOW_URL not configured") + return None + + try: + # Use the correct Langflow endpoint based on source code analysis + endpoint = "/api/v1/users/whoami" + + headers = {} + if langflow_access_token: + headers["Authorization"] = f"Bearer {langflow_access_token}" + elif LANGFLOW_KEY: + # Try with global Langflow API key if available + headers["Authorization"] = f"Bearer {LANGFLOW_KEY}" + headers["x-api-key"] = LANGFLOW_KEY + + async with httpx.AsyncClient() as client: + url = f"{LANGFLOW_URL.rstrip('/')}{endpoint}" + print(f"Getting Langflow user info from: {url}") + + response = await client.get(url, headers=headers) + + if response.status_code == 200: + user_data = response.json() + print(f"Successfully got Langflow user data") + return user_data + else: + print(f"Langflow /whoami endpoint returned: {response.status_code} - {response.text}") + return None + + except Exception as e: + print(f"Error getting Langflow user info: {e}") + return None + + async def authenticate_with_langflow(self) -> Optional[str]: + """Authenticate with Langflow using superuser credentials to get access token""" + if not LANGFLOW_URL: + return None + + try: + from config.settings import LANGFLOW_SUPERUSER, LANGFLOW_SUPERUSER_PASSWORD + + if not LANGFLOW_SUPERUSER or not LANGFLOW_SUPERUSER_PASSWORD: + print("Langflow superuser credentials not configured") + return None + + # Try to login to Langflow + login_data = { + "username": LANGFLOW_SUPERUSER, + "password": LANGFLOW_SUPERUSER_PASSWORD + } + + async with httpx.AsyncClient() as client: + # Use the correct Langflow login endpoint based on source code analysis + endpoint = "/api/v1/login" + url = f"{LANGFLOW_URL.rstrip('/')}{endpoint}" + + # Try form-encoded data first (standard OAuth2 flow) + try: + response = await client.post( + url, + data=login_data, + headers={"Content-Type": "application/x-www-form-urlencoded"} + ) + + if response.status_code == 200: + result = response.json() + access_token = result.get('access_token') + if access_token: + print(f"Successfully authenticated with Langflow via {endpoint}") + return access_token + else: + print(f"Langflow login returned: {response.status_code} - {response.text}") + + except Exception as e: + print(f"Error with form login: {e}") + + # If form login didn't work, try JSON (fallback) + try: + response = await client.post( + url, + json=login_data, + headers={"Content-Type": "application/json"} + ) + + if response.status_code == 200: + result = response.json() + access_token = result.get('access_token') + if access_token: + print(f"Successfully authenticated with Langflow via {endpoint} (JSON)") + return access_token + else: + print(f"Langflow login (JSON) returned: {response.status_code} - {response.text}") + + except Exception as e: + print(f"Error with JSON login: {e}") + + print("Failed to authenticate with Langflow") + return None + + except Exception as e: + print(f"Error authenticating with Langflow: {e}") + return None + + async def ensure_binding(self, google_user_id: str, google_user_info: Dict[str, Any]) -> bool: + """Ensure a binding exists for the Google user, create if needed""" + if self.has_binding(google_user_id): + # Update existing binding with fresh Google info + self.update_binding(google_user_id, google_user_info) + return True + + # No binding exists, try to create one + try: + # First authenticate with Langflow + langflow_token = await self.authenticate_with_langflow() + if not langflow_token: + print("Could not authenticate with Langflow to create binding") + return False + + # Get Langflow user info + langflow_user_info = await self.get_langflow_user_info(langflow_token) + if not langflow_user_info: + print("Could not get Langflow user info") + return False + + # Extract Langflow user ID (try different possible fields) + langflow_user_id = None + for id_field in ['id', 'user_id', 'sub', 'username']: + if id_field in langflow_user_info: + langflow_user_id = str(langflow_user_info[id_field]) + break + + if not langflow_user_id: + print(f"Could not extract Langflow user ID from: {langflow_user_info}") + return False + + # Create the binding + self.create_binding(google_user_id, langflow_user_id, google_user_info) + return True + + except Exception as e: + print(f"Error creating binding for Google user {google_user_id}: {e}") + return False + + def get_binding_info(self, google_user_id: str) -> Optional[Dict[str, Any]]: + """Get complete binding information for a Google user ID""" + return self.bindings.get(google_user_id) + + def list_all_bindings(self) -> Dict[str, Any]: + """Get all user bindings (for admin purposes)""" + return self.bindings.copy() + + def is_langflow_user_id(self, user_id: str) -> bool: + """Check if user_id appears to be a Langflow UUID""" + import re + # Basic UUID pattern check (with or without dashes) + uuid_pattern = r'^[0-9a-f]{8}-?[0-9a-f]{4}-?[0-9a-f]{4}-?[0-9a-f]{4}-?[0-9a-f]{12}$' + return bool(re.match(uuid_pattern, user_id.lower().replace('-', ''))) + + def get_user_type(self, user_id: str) -> str: + """Determine user type: 'google_oauth', 'langflow_direct', or 'unknown'""" + if self.has_binding(user_id): + return "google_oauth" + elif self.is_langflow_user_id(user_id): + return "langflow_direct" + else: + return "unknown" + +# Global instance +user_binding_service = UserBindingService() \ No newline at end of file From 0cd02972477ea7d20139f1b3005c19208b338794 Mon Sep 17 00:00:00 2001 From: cristhianzl Date: Wed, 3 Sep 2025 17:42:28 -0300 Subject: [PATCH 02/32] =?UTF-8?q?=E2=9C=A8=20(agent.py):=20Add=20functiona?= =?UTF-8?q?lity=20to=20claim=20session=20ownership=20for=20Google=20users?= =?UTF-8?q?=20in=20async=5Flangflow=5Fchat=20and=20async=5Flangflow=5Fchat?= =?UTF-8?q?=5Fstream=20functions=20=F0=9F=94=A7=20(chat=5Fservice.py):=20R?= =?UTF-8?q?efactor=20conversation=20deduplication=20logic=20and=20source?= =?UTF-8?q?=20statistics=20calculation=20for=20better=20performance=20and?= =?UTF-8?q?=20accuracy=20=F0=9F=94=A7=20(langflow=5Fhistory=5Fservice.py):?= =?UTF-8?q?=20Implement=20session=20ownership=20filtering=20for=20Google?= =?UTF-8?q?=20users=20and=20enhance=20session=20ownership=20tracking=20fun?= =?UTF-8?q?ctionality=20=F0=9F=94=A7=20(session=5Fownership=5Fservice.py):?= =?UTF-8?q?=20Create=20SessionOwnershipService=20to=20track=20session=20ow?= =?UTF-8?q?nership=20for=20proper=20message=20history=20separation?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/agent.py | 38 ++++++++ src/services/chat_service.py | 51 ++++++++-- src/services/langflow_history_service.py | 21 +++- src/services/session_ownership_service.py | 111 ++++++++++++++++++++++ 4 files changed, 211 insertions(+), 10 deletions(-) create mode 100644 src/services/session_ownership_service.py diff --git a/src/agent.py b/src/agent.py index ccd12579..a2059310 100644 --- a/src/agent.py +++ b/src/agent.py @@ -434,6 +434,25 @@ async def async_langflow_chat( if response_id: conversation_state["last_activity"] = datetime.now() store_conversation_thread(user_id, response_id, conversation_state) + + # Claim session ownership if this is a Google user + try: + from services.session_ownership_service import session_ownership_service + from services.user_binding_service import user_binding_service + + # Check if this is a Google user (has binding but not UUID format) + import re + uuid_pattern = r'^[0-9a-f]{8}-?[0-9a-f]{4}-?[0-9a-f]{4}-?[0-9a-f]{4}-?[0-9a-f]{12}$' + is_uuid = bool(re.match(uuid_pattern, user_id.lower().replace('-', ''))) + + if not is_uuid and user_binding_service.has_binding(user_id): + langflow_user_id = user_binding_service.get_langflow_user_id(user_id) + if langflow_user_id: + session_ownership_service.claim_session(user_id, response_id, langflow_user_id) + print(f"[DEBUG] Claimed session {response_id} for Google user {user_id}") + except Exception as e: + print(f"[WARNING] Failed to claim session ownership: {e}") + print( f"[DEBUG] Stored langflow conversation thread for user {user_id} with response_id: {response_id}" ) @@ -513,6 +532,25 @@ async def async_langflow_chat_stream( if response_id: conversation_state["last_activity"] = datetime.now() store_conversation_thread(user_id, response_id, conversation_state) + + # Claim session ownership if this is a Google user + try: + from services.session_ownership_service import session_ownership_service + from services.user_binding_service import user_binding_service + + # Check if this is a Google user (has binding but not UUID format) + import re + uuid_pattern = r'^[0-9a-f]{8}-?[0-9a-f]{4}-?[0-9a-f]{4}-?[0-9a-f]{4}-?[0-9a-f]{12}$' + is_uuid = bool(re.match(uuid_pattern, user_id.lower().replace('-', ''))) + + if not is_uuid and user_binding_service.has_binding(user_id): + langflow_user_id = user_binding_service.get_langflow_user_id(user_id) + if langflow_user_id: + session_ownership_service.claim_session(user_id, response_id, langflow_user_id) + print(f"[DEBUG] Claimed session {response_id} for Google user {user_id} (streaming)") + except Exception as e: + print(f"[WARNING] Failed to claim session ownership (streaming): {e}") + print( f"[DEBUG] Stored langflow conversation thread for user {user_id} with response_id: {response_id}" ) diff --git a/src/services/chat_service.py b/src/services/chat_service.py index ca2dd4d6..2ec35807 100644 --- a/src/services/chat_service.py +++ b/src/services/chat_service.py @@ -377,16 +377,51 @@ class ChatService: print(f"[ERROR] Failed to fetch Langflow history: {e}") # Continue with just in-memory conversations - # Sort all conversations by last activity (most recent first) - all_conversations.sort(key=lambda c: c.get("last_activity", ""), reverse=True) + # Deduplicate conversations by response_id (in-memory takes priority over database) + deduplicated_conversations = {} + + for conversation in all_conversations: + response_id = conversation.get("response_id") + if response_id: + if response_id not in deduplicated_conversations: + # First occurrence - add it + deduplicated_conversations[response_id] = conversation + else: + # Duplicate found - prioritize in-memory (more recent) over database + existing = deduplicated_conversations[response_id] + current_source = conversation.get("source") + existing_source = existing.get("source") + + if current_source == "openrag_memory" and existing_source == "langflow_database": + # Replace database version with in-memory version + deduplicated_conversations[response_id] = conversation + print(f"[DEBUG] Replaced database conversation {response_id} with in-memory version") + # Otherwise keep existing (in-memory has priority, or first database entry) + else: + # No response_id - add with unique key based on content and timestamp + unique_key = f"no_id_{hash(conversation.get('title', ''))}{conversation.get('created_at', '')}" + if unique_key not in deduplicated_conversations: + deduplicated_conversations[unique_key] = conversation + + final_conversations = list(deduplicated_conversations.values()) + + # Sort by last activity (most recent first) + final_conversations.sort(key=lambda c: c.get("last_activity", ""), reverse=True) + + # Calculate source statistics after deduplication + sources = { + "memory": len([c for c in final_conversations if c.get("source") == "openrag_memory"]), + "langflow_db": len([c for c in final_conversations if c.get("source") == "langflow_database"]), + "duplicates_removed": len(all_conversations) - len(final_conversations) + } + + if sources["duplicates_removed"] > 0: + print(f"[DEBUG] Removed {sources['duplicates_removed']} duplicate conversations") return { "user_id": user_id, "endpoint": "langflow", - "conversations": all_conversations, - "total_conversations": len(all_conversations), - "sources": { - "memory": len([c for c in all_conversations if c.get("source") == "openrag_memory"]), - "langflow_db": len([c for c in all_conversations if c.get("source") == "langflow_database"]) - } + "conversations": final_conversations, + "total_conversations": len(final_conversations), + "sources": sources } diff --git a/src/services/langflow_history_service.py b/src/services/langflow_history_service.py index 85f20b3f..e6e49f4d 100644 --- a/src/services/langflow_history_service.py +++ b/src/services/langflow_history_service.py @@ -10,6 +10,7 @@ from datetime import datetime from config.settings import LANGFLOW_URL, LANGFLOW_KEY, LANGFLOW_SUPERUSER, LANGFLOW_SUPERUSER_PASSWORD from services.user_binding_service import user_binding_service +from services.session_ownership_service import session_ownership_service class LangflowHistoryService: @@ -49,6 +50,18 @@ class LangflowHistoryService: uuid_pattern = r'^[0-9a-f]{8}-?[0-9a-f]{4}-?[0-9a-f]{4}-?[0-9a-f]{4}-?[0-9a-f]{12}$' return bool(re.match(uuid_pattern, user_id.lower().replace('-', ''))) + def _filter_sessions_by_ownership(self, session_ids: List[str], user_id: str, langflow_user_id: str) -> List[str]: + """Filter sessions based on user type and ownership""" + if self._is_uuid_format(user_id): + # Direct Langflow user - show all sessions for this Langflow user + print(f"[DEBUG] Direct Langflow user - showing all {len(session_ids)} sessions") + return session_ids + else: + # Google OAuth user - only show sessions they own + owned_sessions = session_ownership_service.filter_sessions_for_google_user(session_ids, user_id) + print(f"[DEBUG] Google user {user_id} owns {len(owned_sessions)} out of {len(session_ids)} total sessions") + return owned_sessions + async def _authenticate(self) -> Optional[str]: """Authenticate with Langflow and get access token""" if self.auth_token: @@ -119,8 +132,12 @@ class LangflowHistoryService: # Filter sessions to only include those belonging to the user user_sessions = await self._filter_sessions_by_user(session_ids, langflow_user_id, token) - print(f"Found {len(user_sessions)} sessions for user {user_id} (Langflow ID: {langflow_user_id})") - return user_sessions + + # Apply ownership-based filtering for Google users + filtered_sessions = self._filter_sessions_by_ownership(user_sessions, user_id, langflow_user_id) + + print(f"Found {len(filtered_sessions)} sessions for user {user_id} (Langflow ID: {langflow_user_id})") + return filtered_sessions else: print(f"Failed to get sessions: {response.status_code} - {response.text}") return [] diff --git a/src/services/session_ownership_service.py b/src/services/session_ownership_service.py new file mode 100644 index 00000000..b3a214d9 --- /dev/null +++ b/src/services/session_ownership_service.py @@ -0,0 +1,111 @@ +""" +Session Ownership Service +Tracks which Google user owns which Langflow session to properly separate message history +""" + +import json +import os +from typing import Dict, List, Optional, Set +from datetime import datetime + + +class SessionOwnershipService: + """Service to track session ownership for proper message history separation""" + + def __init__(self): + self.ownership_file = "session_ownership.json" + self.ownership_data = self._load_ownership_data() + + def _load_ownership_data(self) -> Dict[str, Dict[str, any]]: + """Load session ownership data from JSON file""" + if os.path.exists(self.ownership_file): + try: + with open(self.ownership_file, 'r') as f: + return json.load(f) + except Exception as e: + print(f"Error loading session ownership data: {e}") + return {} + return {} + + def _save_ownership_data(self): + """Save session ownership data to JSON file""" + try: + with open(self.ownership_file, 'w') as f: + json.dump(self.ownership_data, f, indent=2) + print(f"Saved session ownership data to {self.ownership_file}") + except Exception as e: + print(f"Error saving session ownership data: {e}") + + def claim_session(self, google_user_id: str, langflow_session_id: str, langflow_user_id: str): + """Claim a Langflow session for a Google user""" + if langflow_session_id not in self.ownership_data: + self.ownership_data[langflow_session_id] = { + "google_user_id": google_user_id, + "langflow_user_id": langflow_user_id, + "created_at": datetime.now().isoformat(), + "last_accessed": datetime.now().isoformat() + } + self._save_ownership_data() + print(f"Claimed session {langflow_session_id} for Google user {google_user_id}") + else: + # Update last accessed time + self.ownership_data[langflow_session_id]["last_accessed"] = datetime.now().isoformat() + self._save_ownership_data() + + def get_session_owner(self, langflow_session_id: str) -> Optional[str]: + """Get the Google user ID that owns a Langflow session""" + session_data = self.ownership_data.get(langflow_session_id) + return session_data.get("google_user_id") if session_data else None + + def get_user_sessions(self, google_user_id: str) -> List[str]: + """Get all Langflow sessions owned by a Google user""" + return [ + session_id + for session_id, session_data in self.ownership_data.items() + if session_data.get("google_user_id") == google_user_id + ] + + def get_unowned_sessions_for_langflow_user(self, langflow_user_id: str) -> Set[str]: + """Get sessions for a Langflow user that aren't claimed by any Google user + + This requires querying the Langflow database to get all sessions for the user, + then filtering out the ones that are already claimed. + """ + # This will be implemented when we have access to all sessions for a Langflow user + claimed_sessions = set() + for session_data in self.ownership_data.values(): + if session_data.get("langflow_user_id") == langflow_user_id: + claimed_sessions.add(session_data.get("google_user_id")) + return claimed_sessions + + def filter_sessions_for_google_user(self, all_sessions: List[str], google_user_id: str) -> List[str]: + """Filter a list of sessions to only include those owned by the Google user""" + user_sessions = self.get_user_sessions(google_user_id) + return [session for session in all_sessions if session in user_sessions] + + def is_session_owned_by_google_user(self, langflow_session_id: str, google_user_id: str) -> bool: + """Check if a session is owned by a specific Google user""" + return self.get_session_owner(langflow_session_id) == google_user_id + + def get_ownership_stats(self) -> Dict[str, any]: + """Get statistics about session ownership""" + google_users = set() + langflow_users = set() + + for session_data in self.ownership_data.values(): + google_users.add(session_data.get("google_user_id")) + langflow_users.add(session_data.get("langflow_user_id")) + + return { + "total_tracked_sessions": len(self.ownership_data), + "unique_google_users": len(google_users), + "unique_langflow_users": len(langflow_users), + "sessions_per_google_user": { + google_user: len(self.get_user_sessions(google_user)) + for google_user in google_users + } + } + + +# Global instance +session_ownership_service = SessionOwnershipService() \ No newline at end of file From 64edbd8eedcac53f1c3ba1cdecb53dcecdb91056 Mon Sep 17 00:00:00 2001 From: Eric Hare Date: Wed, 3 Sep 2025 14:11:32 -0700 Subject: [PATCH 03/32] feat: Google Drive picker and enhancements --- .gitignore | 1 + .../src/app/connectors/GoogleDrivePicker.tsx | 117 ++ frontend/src/app/connectors/page.tsx | 503 +----- frontend/src/app/settings/page.tsx | 50 +- src/config/settings.py | 1 - src/connectors/google_drive/connector.py | 1351 +++++++++++------ src/connectors/google_drive/oauth.py | 9 +- src/services/auth_service.py | 26 +- 8 files changed, 1041 insertions(+), 1017 deletions(-) create mode 100644 frontend/src/app/connectors/GoogleDrivePicker.tsx diff --git a/.gitignore b/.gitignore index b2977194..4f22035a 100644 --- a/.gitignore +++ b/.gitignore @@ -17,3 +17,4 @@ wheels/ 1001*.pdf *.json +.DS_Store diff --git a/frontend/src/app/connectors/GoogleDrivePicker.tsx b/frontend/src/app/connectors/GoogleDrivePicker.tsx new file mode 100644 index 00000000..7723ca1e --- /dev/null +++ b/frontend/src/app/connectors/GoogleDrivePicker.tsx @@ -0,0 +1,117 @@ +"use client" + +import { useCallback, useState } from "react" +import { Button } from "@/components/ui/button" +import { Badge } from "@/components/ui/badge" + +// declare globals to silence TS +declare global { + interface Window { google?: any; gapi?: any } +} + +const loadScript = (src: string) => + new Promise((resolve, reject) => { + if (document.querySelector(`script[src="${src}"]`)) return resolve() + const s = document.createElement("script") + s.src = src + s.async = true + s.onload = () => resolve() + s.onerror = () => reject(new Error(`Failed to load ${src}`)) + document.head.appendChild(s) + }) + +export type DriveSelection = { files: string[]; folders: string[] } + +export function GoogleDrivePicker({ + value, + onChange, + buttonLabel = "Choose in Drive", +}: { + value?: DriveSelection + onChange: (sel: DriveSelection) => void + buttonLabel?: string +}) { + const [loading, setLoading] = useState(false) + + const ensureGoogleApis = useCallback(async () => { + await loadScript("https://accounts.google.com/gsi/client") + await loadScript("https://apis.google.com/js/api.js") + await new Promise((res) => window.gapi?.load("picker", () => res())) + }, []) + + const openPicker = useCallback(async () => { + const clientId = process.env.NEXT_PUBLIC_GOOGLE_CLIENT_ID + const apiKey = process.env.NEXT_PUBLIC_GOOGLE_API_KEY + if (!clientId || !apiKey) { + alert("Google Picker requires NEXT_PUBLIC_GOOGLE_CLIENT_ID and NEXT_PUBLIC_GOOGLE_API_KEY") + return + } + try { + setLoading(true) + await ensureGoogleApis() + const tokenClient = window.google.accounts.oauth2.initTokenClient({ + client_id: clientId, + scope: "https://www.googleapis.com/auth/drive.readonly https://www.googleapis.com/auth/drive.metadata.readonly", + callback: (tokenResp: any) => { + const viewDocs = new window.google.picker.DocsView() + .setIncludeFolders(true) + .setSelectFolderEnabled(true) + + console.log("Picker using clientId:", clientId, "apiKey:", apiKey) + + const picker = new window.google.picker.PickerBuilder() + .enableFeature(window.google.picker.Feature.MULTISELECT_ENABLED) + .setOAuthToken(tokenResp.access_token) + .setDeveloperKey(apiKey) + .addView(viewDocs) + .setCallback((data: any) => { + if (data.action === window.google.picker.Action.PICKED) { + const pickedFiles: string[] = [] + const pickedFolders: string[] = [] + for (const doc of data.docs || []) { + const id = doc.id + const isFolder = doc?.type === "folder" || doc?.mimeType === "application/vnd.google-apps.folder" + if (isFolder) pickedFolders.push(id) + else pickedFiles.push(id) + } + onChange({ files: pickedFiles, folders: pickedFolders }) + } + }) + .build() + picker.setVisible(true) + }, + }) + tokenClient.requestAccessToken() + } catch (e) { + console.error("Drive Picker error", e) + alert("Failed to open Google Drive Picker. See console.") + } finally { + setLoading(false) + } + }, [ensureGoogleApis, onChange]) + + const filesCount = value?.files?.length ?? 0 + const foldersCount = value?.folders?.length ?? 0 + + return ( +
+
+ + {(filesCount > 0 || foldersCount > 0) && ( + {filesCount} file(s), {foldersCount} folder(s) selected + )} +
+ + {(filesCount > 0 || foldersCount > 0) && ( +
+ {value!.files.slice(0, 6).map((id) => file:{id})} + {filesCount > 6 && +{filesCount - 6} more} + {value!.folders.slice(0, 6).map((id) => folder:{id})} + {foldersCount > 6 && +{foldersCount - 6} more} +
+ )} +
+ ) +} diff --git a/frontend/src/app/connectors/page.tsx b/frontend/src/app/connectors/page.tsx index 3516338d..432d5d0d 100644 --- a/frontend/src/app/connectors/page.tsx +++ b/frontend/src/app/connectors/page.tsx @@ -1,495 +1,14 @@ -"use client" +import React, { useState } from "react"; +import { GoogleDrivePicker, type DriveSelection } from "./GoogleDrivePicker" -import { useState, useEffect, useCallback, Suspense } from "react" -import { useSearchParams } from "next/navigation" -import { Button } from "@/components/ui/button" -import { Card, CardContent, CardDescription, CardHeader, CardTitle } from "@/components/ui/card" -import { Badge } from "@/components/ui/badge" -import { Input } from "@/components/ui/input" -import { Label } from "@/components/ui/label" -import { Loader2, PlugZap, CheckCircle, XCircle, RefreshCw, Download, AlertCircle } from "lucide-react" -import { useAuth } from "@/contexts/auth-context" -import { useTask } from "@/contexts/task-context" -import { ProtectedRoute } from "@/components/protected-route" +const [driveSelection, setDriveSelection] = useState({ files: [], folders: [] }); -interface Connector { - id: string - name: string - description: string - icon: React.ReactNode - status: "not_connected" | "connecting" | "connected" | "error" - type: string - connectionId?: string // Store the active connection ID for syncing - access_token?: string // For connectors that use OAuth -} +// in JSX + -interface SyncResult { - processed?: number; - added?: number; - skipped?: number; - errors?: number; - error?: string; - message?: string; // For sync started messages - isStarted?: boolean; // For sync started state -} - -interface Connection { - connection_id: string - name: string - is_active: boolean - created_at: string - last_sync?: string -} - -function ConnectorsPage() { - const { isAuthenticated } = useAuth() - const { addTask, refreshTasks } = useTask() - const searchParams = useSearchParams() - const [connectors, setConnectors] = useState([]) - - const [isConnecting, setIsConnecting] = useState(null) - const [isSyncing, setIsSyncing] = useState(null) - const [syncResults, setSyncResults] = useState<{[key: string]: SyncResult | null}>({}) - const [maxFiles, setMaxFiles] = useState(10) - - // Helper function to get connector icon - const getConnectorIcon = (iconName: string) => { - const iconMap: { [key: string]: React.ReactElement } = { - 'google-drive':
G
, - 'sharepoint':
SP
, - 'onedrive':
OD
, - } - return iconMap[iconName] ||
?
- } - - // Function definitions first - const checkConnectorStatuses = useCallback(async () => { - try { - // Fetch available connectors from backend - const connectorsResponse = await fetch('/api/connectors') - if (!connectorsResponse.ok) { - throw new Error('Failed to load connectors') - } - - const connectorsResult = await connectorsResponse.json() - const connectorTypes = Object.keys(connectorsResult.connectors) - - // Initialize connectors list with metadata from backend - const initialConnectors = connectorTypes - .filter(type => connectorsResult.connectors[type].available) // Only show available connectors - .map(type => ({ - id: type, - name: connectorsResult.connectors[type].name, - description: connectorsResult.connectors[type].description, - icon: getConnectorIcon(connectorsResult.connectors[type].icon), - status: "not_connected" as const, - type: type - })) - - setConnectors(initialConnectors) - - // Check status for each connector type - - for (const connectorType of connectorTypes) { - const response = await fetch(`/api/connectors/${connectorType}/status`) - if (response.ok) { - const data = await response.json() - const connections = data.connections || [] - const activeConnection = connections.find((conn: Connection) => conn.is_active) - const isConnected = activeConnection !== undefined - - setConnectors(prev => prev.map(c => - c.type === connectorType - ? { - ...c, - status: isConnected ? "connected" : "not_connected", - connectionId: activeConnection?.connection_id - } - : c - )) - } - } - } catch (error) { - console.error('Failed to check connector statuses:', error) - } - }, [setConnectors]) - - const handleConnect = async (connector: Connector) => { - setIsConnecting(connector.id) - setConnectors(prev => prev.map(c => - c.id === connector.id ? { ...c, status: "connecting" } : c - )) - - try { - // Use the shared auth callback URL, not a separate connectors callback - const redirectUri = `${window.location.origin}/auth/callback` - - const response = await fetch('/api/auth/init', { - method: 'POST', - headers: { - 'Content-Type': 'application/json', - }, - body: JSON.stringify({ - connector_type: connector.type, - purpose: "data_source", - name: `${connector.name} Connection`, - redirect_uri: redirectUri - }), - }) - - const result = await response.json() - - if (response.ok) { - // Store connector ID for callback - localStorage.setItem('connecting_connector_id', result.connection_id) - localStorage.setItem('connecting_connector_type', connector.type) - - // Handle client-side OAuth with Google's library - if (result.oauth_config) { - // Use the redirect URI provided by the backend - const authUrl = `${result.oauth_config.authorization_endpoint}?` + - `client_id=${result.oauth_config.client_id}&` + - `response_type=code&` + - `scope=${result.oauth_config.scopes.join(' ')}&` + - `redirect_uri=${encodeURIComponent(result.oauth_config.redirect_uri)}&` + - `access_type=offline&` + - `prompt=consent&` + - `state=${result.connection_id}` - - window.location.href = authUrl - } - } else { - throw new Error(result.error || 'Failed to initialize OAuth') - } - } catch (error) { - console.error('OAuth initialization failed:', error) - setConnectors(prev => prev.map(c => - c.id === connector.id ? { ...c, status: "error" } : c - )) - } finally { - setIsConnecting(null) - } - } - - const handleSync = async (connector: Connector) => { - if (!connector.connectionId) { - console.error('No connection ID available for connector') - return - } - - setIsSyncing(connector.id) - setSyncResults(prev => ({ ...prev, [connector.id]: null })) // Clear any existing progress - - try { - const response = await fetch(`/api/connectors/${connector.type}/sync`, { - method: 'POST', - headers: { - 'Content-Type': 'application/json', - }, - body: JSON.stringify({ - max_files: maxFiles - }), - }) - - const result = await response.json() - - if (response.status === 201 && result.task_id) { - // Task-based sync, use centralized tracking - addTask(result.task_id) - console.log(`Sync task ${result.task_id} added to central tracking for connector ${connector.id}`) - - // Immediately refresh task notifications to show the new task - await refreshTasks() - - // Show sync started message - setSyncResults(prev => ({ - ...prev, - [connector.id]: { - message: "Check task notification panel for progress", - isStarted: true - } - })) - setIsSyncing(null) - } else if (response.ok) { - // Direct sync result - still show "sync started" message - setSyncResults(prev => ({ - ...prev, - [connector.id]: { - message: "Check task notification panel for progress", - isStarted: true - } - })) - setIsSyncing(null) - } else { - throw new Error(result.error || 'Sync failed') - } - } catch (error) { - console.error('Sync failed:', error) - setSyncResults(prev => ({ - ...prev, - [connector.id]: { - error: error instanceof Error ? error.message : 'Sync failed' - } - })) - setIsSyncing(null) - } - } - - const handleDisconnect = async (connector: Connector) => { - // This would call a disconnect endpoint when implemented - setConnectors(prev => prev.map(c => - c.id === connector.id ? { ...c, status: "not_connected", connectionId: undefined } : c - )) - setSyncResults(prev => ({ ...prev, [connector.id]: null })) - } - - const getStatusIcon = (status: Connector['status']) => { - switch (status) { - case "connected": - return - case "connecting": - return - case "error": - return - default: - return - } - } - - const getStatusBadge = (status: Connector['status']) => { - switch (status) { - case "connected": - return Connected - case "connecting": - return Connecting... - case "error": - return Error - default: - return Not Connected - } - } - - // Check connector status on mount and when returning from OAuth - useEffect(() => { - if (isAuthenticated) { - checkConnectorStatuses() - } - - // If we just returned from OAuth, clear the URL parameter - if (searchParams.get('oauth_success') === 'true') { - // Clear the URL parameter without causing a page reload - const url = new URL(window.location.href) - url.searchParams.delete('oauth_success') - window.history.replaceState({}, '', url.toString()) - } - }, [searchParams, isAuthenticated, checkConnectorStatuses]) - - return ( -
-
-

Connectors

-

- Connect external services to automatically sync and index your documents -

-
- - {/* Sync Settings */} - - - - - Sync Settings - - - Configure how many files to sync when manually triggering a sync - - - -
-
- - setMaxFiles(parseInt(e.target.value) || 10)} - className="w-24" - min="1" - max="100" - /> - - (Leave blank or set to 0 for unlimited) - -
-
-
-
- - {/* Connectors Grid */} -
- {connectors.map((connector) => ( - - -
-
- {connector.icon} -
- {connector.name} -
- {getStatusIcon(connector.status)} - {getStatusBadge(connector.status)} -
-
-
-
- - {connector.description} - -
- -
- {connector.status === "not_connected" && ( - - )} - - {connector.status === "connected" && ( - <> - - - - )} - - {connector.status === "error" && ( - - )} -
- - {/* Sync Results */} - {syncResults[connector.id] && ( -
- {syncResults[connector.id]?.isStarted && ( -
-
- - Task initiated: -
-
- {syncResults[connector.id]?.message} -
-
- )} - {syncResults[connector.id]?.error && ( -
-
- - Sync Failed -
-
- {syncResults[connector.id]?.error} -
-
- )} -
- )} -
-
- ))} -
- - {/* Coming Soon Section */} - - - Coming Soon - - Additional connectors are in development - - - -
-
-
D
-
-
Dropbox
-
File storage
-
-
-
-
O
-
-
OneDrive
-
Microsoft cloud storage
-
-
-
-
B
-
-
Box
-
Enterprise file sharing
-
-
-
-
-
-
- ) -} - -export default function ProtectedConnectorsPage() { - return ( - - Loading connectors...
}> - - - - ) -} \ No newline at end of file +// when calling sync: +const body: { file_ids: string[]; folder_ids: string[]; recursive: boolean } = { + file_ids: driveSelection.files, + folder_ids: driveSelection.folders, + recursive: true, +}; diff --git a/frontend/src/app/settings/page.tsx b/frontend/src/app/settings/page.tsx index c42cbeb8..cbc17449 100644 --- a/frontend/src/app/settings/page.tsx +++ b/frontend/src/app/settings/page.tsx @@ -12,6 +12,7 @@ import { Loader2, PlugZap, RefreshCw } from "lucide-react" import { ProtectedRoute } from "@/components/protected-route" import { useTask } from "@/contexts/task-context" import { useAuth } from "@/contexts/auth-context" +import { GoogleDrivePicker, type DriveSelection } from "../connectors/GoogleDrivePicker" interface Connector { @@ -53,6 +54,7 @@ function KnowledgeSourcesPage() { const [syncResults, setSyncResults] = useState<{[key: string]: SyncResult | null}>({}) const [maxFiles, setMaxFiles] = useState(10) const [syncAllFiles, setSyncAllFiles] = useState(false) + const [driveSelection, setDriveSelection] = useState({ files: [], folders: [] }) // Settings state // Note: backend internal Langflow URL is not needed on the frontend @@ -210,44 +212,45 @@ function KnowledgeSourcesPage() { const handleSync = async (connector: Connector) => { if (!connector.connectionId) return - + setIsSyncing(connector.id) setSyncResults(prev => ({ ...prev, [connector.id]: null })) - + try { + const body: any = { + connection_id: connector.connectionId, + max_files: syncAllFiles ? 0 : (maxFiles || undefined), + } + + if (connector.type === "google-drive") { + body.file_ids = driveSelection.files + body.folder_ids = driveSelection.folders + body.recursive = true // or expose a checkbox if you want + } + const response = await fetch(`/api/connectors/${connector.type}/sync`, { - method: 'POST', - headers: { - 'Content-Type': 'application/json', - }, - body: JSON.stringify({ - connection_id: connector.connectionId, - max_files: syncAllFiles ? 0 : (maxFiles || undefined) - }), + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify(body), }) - + const result = await response.json() - if (response.status === 201) { const taskId = result.task_id if (taskId) { addTask(taskId) - setSyncResults(prev => ({ - ...prev, - [connector.id]: { - processed: 0, - total: result.total_files || 0 - } + setSyncResults(prev => ({ + ...prev, + [connector.id]: { processed: 0, total: result.total_files || 0 } })) } } else if (response.ok) { setSyncResults(prev => ({ ...prev, [connector.id]: result })) - // Note: Stats will auto-refresh via task completion watcher for async syncs } else { - console.error('Sync failed:', result.error) + console.error("Sync failed:", result.error) } } catch (error) { - console.error('Sync error:', error) + console.error("Sync error:", error) } finally { setIsSyncing(null) } @@ -433,6 +436,9 @@ function KnowledgeSourcesPage() { {connector.status === "connected" ? (
+
+ +
diff --git a/src/config/settings.py b/src/config/settings.py index 546c15aa..ff53d453 100644 --- a/src/config/settings.py +++ b/src/config/settings.py @@ -1,6 +1,5 @@ import os import requests -import asyncio import time from dotenv import load_dotenv from opensearchpy import AsyncOpenSearch diff --git a/src/connectors/google_drive/connector.py b/src/connectors/google_drive/connector.py index cf370109..5a8099e0 100644 --- a/src/connectors/google_drive/connector.py +++ b/src/connectors/google_drive/connector.py @@ -1,578 +1,945 @@ -import asyncio import io import os -import uuid -from datetime import datetime -from typing import Dict, List, Any, Optional -from googleapiclient.discovery import build +from pathlib import Path +import time +from collections import deque +from dataclasses import dataclass +from typing import Dict, List, Any, Optional, Iterable, Set + from googleapiclient.errors import HttpError from googleapiclient.http import MediaIoBaseDownload +# Project-specific base types (adjust imports to your project) from ..base import BaseConnector, ConnectorDocument, DocumentACL from .oauth import GoogleDriveOAuth -# Global worker service cache for process pools -_worker_drive_service = None - - -def get_worker_drive_service(client_id: str, client_secret: str, token_file: str): - """Get or create a Google Drive service instance for this worker process""" - global _worker_drive_service - if _worker_drive_service is None: - print( - f"🔧 Initializing Google Drive service in worker process (PID: {os.getpid()})" - ) - - # Create OAuth instance and load credentials in worker - from .oauth import GoogleDriveOAuth - - oauth = GoogleDriveOAuth( - client_id=client_id, client_secret=client_secret, token_file=token_file - ) - - # Load credentials synchronously in worker - import asyncio - - loop = asyncio.new_event_loop() - asyncio.set_event_loop(loop) - try: - loop.run_until_complete(oauth.load_credentials()) - _worker_drive_service = oauth.get_service() - print( - f"✅ Google Drive service ready in worker process (PID: {os.getpid()})" - ) - finally: - loop.close() - - return _worker_drive_service - - -# Module-level functions for process pool execution (must be pickleable) -def _sync_list_files_worker( - client_id, client_secret, token_file, query, page_token, page_size -): - """Worker function for listing files in process pool""" - service = get_worker_drive_service(client_id, client_secret, token_file) - return ( - service.files() - .list( - q=query, - pageSize=page_size, - pageToken=page_token, - fields="nextPageToken, files(id, name, mimeType, modifiedTime, createdTime, webViewLink, permissions, owners)", - ) - .execute() - ) - - -def _sync_get_metadata_worker(client_id, client_secret, token_file, file_id): - """Worker function for getting file metadata in process pool""" - service = get_worker_drive_service(client_id, client_secret, token_file) - return ( - service.files() - .get( - fileId=file_id, - fields="id, name, mimeType, modifiedTime, createdTime, webViewLink, permissions, owners, size", - ) - .execute() - ) - - -def _sync_download_worker( - client_id, client_secret, token_file, file_id, mime_type, file_size=None -): - """Worker function for downloading files in process pool""" - import signal - import time - - # File size limits (in bytes) - MAX_REGULAR_FILE_SIZE = 100 * 1024 * 1024 # 100MB for regular files - MAX_GOOGLE_WORKSPACE_SIZE = ( - 50 * 1024 * 1024 - ) # 50MB for Google Workspace docs (they can't be streamed) - - # Check file size limits - if file_size: - if ( - mime_type.startswith("application/vnd.google-apps.") - and file_size > MAX_GOOGLE_WORKSPACE_SIZE - ): - raise ValueError( - f"Google Workspace file too large: {file_size} bytes (max {MAX_GOOGLE_WORKSPACE_SIZE})" - ) - elif ( - not mime_type.startswith("application/vnd.google-apps.") - and file_size > MAX_REGULAR_FILE_SIZE - ): - raise ValueError( - f"File too large: {file_size} bytes (max {MAX_REGULAR_FILE_SIZE})" - ) - - # Dynamic timeout based on file size (minimum 60s, 10s per MB, max 300s) - if file_size: - file_size_mb = file_size / (1024 * 1024) - timeout_seconds = min(300, max(60, int(file_size_mb * 10))) - else: - timeout_seconds = 60 # Default timeout if size unknown - - # Set a timeout for the entire download operation - def timeout_handler(signum, frame): - raise TimeoutError(f"File download timed out after {timeout_seconds} seconds") - - signal.signal(signal.SIGALRM, timeout_handler) - signal.alarm(timeout_seconds) - - try: - service = get_worker_drive_service(client_id, client_secret, token_file) - - # For Google native formats, export as PDF - if mime_type.startswith("application/vnd.google-apps."): - export_format = "application/pdf" - request = service.files().export_media( - fileId=file_id, mimeType=export_format - ) - else: - # For regular files, download directly - request = service.files().get_media(fileId=file_id) - - # Download file with chunked approach - file_io = io.BytesIO() - downloader = MediaIoBaseDownload( - file_io, request, chunksize=1024 * 1024 - ) # 1MB chunks - - done = False - retry_count = 0 - max_retries = 2 - - while not done and retry_count < max_retries: - try: - status, done = downloader.next_chunk() - retry_count = 0 # Reset retry count on successful chunk - except Exception as e: - retry_count += 1 - if retry_count >= max_retries: - raise e - time.sleep(1) # Brief pause before retry - - return file_io.getvalue() - - finally: - # Cancel the alarm - signal.alarm(0) +# ------------------------- +# Config model +# ------------------------- +@dataclass +class GoogleDriveConfig: + client_id: str + client_secret: str + token_file: str + + # Selective sync + file_ids: Optional[List[str]] = None + folder_ids: Optional[List[str]] = None + recursive: bool = True + + # Shared Drives control + drive_id: Optional[str] = None # when set, we use corpora='drive' + corpora: Optional[str] = None # 'user' | 'drive' | 'domain'; auto-picked if None + + # Optional filtering + include_mime_types: Optional[List[str]] = None + exclude_mime_types: Optional[List[str]] = None + + # Export overrides for Google-native types + export_format_overrides: Optional[Dict[str, str]] = None # mime -> export-mime + + # Changes API state persistence (store these in your DB/kv if needed) + changes_page_token: Optional[str] = None + + # Optional: resource_id for webhook cleanup + resource_id: Optional[str] = None +# ------------------------- +# Connector implementation +# ------------------------- class GoogleDriveConnector(BaseConnector): - """Google Drive connector with OAuth and webhook support""" + """ + Google Drive connector with first-class support for selective sync: + - Sync specific file IDs + - Sync specific folder IDs (optionally recursive) + - Works across My Drive and Shared Drives + - Resolves shortcuts to their targets + - Robust changes page token management - # OAuth environment variables - CLIENT_ID_ENV_VAR = "GOOGLE_OAUTH_CLIENT_ID" - CLIENT_SECRET_ENV_VAR = "GOOGLE_OAUTH_CLIENT_SECRET" + Integration points: + - `BaseConnector` is your project’s base class; minimum methods used here: + * self.emit(doc: ConnectorDocument) -> None (or adapt to your ingestion pipeline) + * self.log/info/warn/error (optional) + - Adjust paths, logging, and error handling to match your project style. + """ - # Connector metadata - CONNECTOR_NAME = "Google Drive" - CONNECTOR_DESCRIPTION = "Connect your Google Drive to automatically sync documents" - CONNECTOR_ICON = "google-drive" + # Names of env vars that hold your OAuth client creds + CLIENT_ID_ENV_VAR: str = "GOOGLE_OAUTH_CLIENT_ID" + CLIENT_SECRET_ENV_VAR: str = "GOOGLE_OAUTH_CLIENT_SECRET" - # Supported file types that can be processed by docling - SUPPORTED_MIMETYPES = { - "application/pdf", - "application/vnd.openxmlformats-officedocument.wordprocessingml.document", # .docx - "application/msword", # .doc - "application/vnd.openxmlformats-officedocument.presentationml.presentation", # .pptx - "application/vnd.ms-powerpoint", # .ppt - "text/plain", - "text/html", - "application/rtf", - # Google Docs native formats - we'll export these - "application/vnd.google-apps.document", # Google Docs -> PDF - "application/vnd.google-apps.presentation", # Google Slides -> PDF - "application/vnd.google-apps.spreadsheet", # Google Sheets -> PDF - } + def log(self, message: str) -> None: + print(message) - def __init__(self, config: Dict[str, Any]): - super().__init__(config) + def emit(self, doc: ConnectorDocument) -> None: + """ + Emit a ConnectorDocument instance. + Override this method to integrate with your ingestion pipeline. + """ + # If BaseConnector has an emit method, call super().emit(doc) + # Otherwise, implement your custom logic here. + print(f"Emitting document: {doc.id} ({doc.filename})") + + def __init__(self, config: Dict[str, Any]) -> None: + # Read from config OR env (backend env, not NEXT_PUBLIC_*): + env_client_id = os.getenv(self.CLIENT_ID_ENV_VAR) + env_client_secret = os.getenv(self.CLIENT_SECRET_ENV_VAR) + + client_id = config.get("client_id") or env_client_id + client_secret = config.get("client_secret") or env_client_secret + + # Token file default (so callback & workers don’t need to pass it) + token_file = config.get("token_file") or os.getenv("GOOGLE_DRIVE_TOKEN_FILE") + if not token_file: + token_file = str(Path.home() / ".config" / "openrag" / "google_drive" / "token.json") + Path(token_file).parent.mkdir(parents=True, exist_ok=True) + + if not isinstance(client_id, str) or not client_id.strip(): + raise RuntimeError( + f"Missing Google Drive OAuth client_id. " + f"Provide config['client_id'] or set {self.CLIENT_ID_ENV_VAR}." + ) + if not isinstance(client_secret, str) or not client_secret.strip(): + raise RuntimeError( + f"Missing Google Drive OAuth client_secret. " + f"Provide config['client_secret'] or set {self.CLIENT_SECRET_ENV_VAR}." + ) + + self.cfg = GoogleDriveConfig( + client_id=client_id, + client_secret=client_secret, + token_file=token_file, + file_ids=config.get("file_ids") or config.get("selected_file_ids"), + folder_ids=config.get("folder_ids") or config.get("selected_folder_ids"), + recursive=bool(config.get("recursive", True)), + drive_id=config.get("drive_id"), + corpora=config.get("corpora"), + include_mime_types=config.get("include_mime_types"), + exclude_mime_types=config.get("exclude_mime_types"), + export_format_overrides=config.get("export_format_overrides"), + changes_page_token=config.get("changes_page_token"), + resource_id=config.get("resource_id"), + ) + + # Build OAuth wrapper; DO NOT load creds here (it's async) self.oauth = GoogleDriveOAuth( - client_id=self.get_client_id(), - client_secret=self.get_client_secret(), - token_file=config.get("token_file", "gdrive_token.json"), + client_id=self.cfg.client_id, + client_secret=self.cfg.client_secret, + token_file=self.cfg.token_file, ) - self.service = None - # Load existing webhook channel ID from config if available - self.webhook_channel_id = config.get("webhook_channel_id") or config.get( - "subscription_id" - ) - # Load existing webhook resource ID (Google Drive requires this to stop a channel) - self.webhook_resource_id = config.get("resource_id") + # Drive client is built in authenticate() + from google.oauth2.credentials import Credentials + self.creds: Optional[Credentials] = None + self.service: Any = None + + # cache of resolved shortcutId -> target file metadata + self._shortcut_cache: Dict[str, Dict[str, Any]] = {} + + # Authentication state + self._authenticated: bool = False + + # ------------------------- + # Helpers + # ------------------------- + @property + def _drives_flags(self) -> Dict[str, Any]: + """ + Common flags for ALL Drive calls to ensure Shared Drives are included. + """ + return dict(supportsAllDrives=True, includeItemsFromAllDrives=True) + + def _pick_corpora_args(self) -> Dict[str, Any]: + """ + Decide corpora/driveId based on config. + + If drive_id is provided, prefer corpora='drive' with that driveId. + Otherwise, default to allDrives (so Shared Drive selections from the Picker still work). + """ + if self.cfg.drive_id: + return {"corpora": "drive", "driveId": self.cfg.drive_id} + if self.cfg.corpora: + return {"corpora": self.cfg.corpora} + # Default to allDrives so Picker selections from Shared Drives work without explicit drive_id + return {"corpora": "allDrives"} + + def _resolve_shortcut(self, file_obj: Dict[str, Any]) -> Dict[str, Any]: + """ + If a file is a shortcut, fetch and return the real target metadata. + """ + if file_obj.get("mimeType") != "application/vnd.google-apps.shortcut": + return file_obj + + target_id = file_obj.get("shortcutDetails", {}).get("targetId") + if not target_id: + return file_obj + + if target_id in self._shortcut_cache: + return self._shortcut_cache[target_id] + + try: + meta = ( + self.service.files() + .get( + fileId=target_id, + fields=( + "id, name, mimeType, modifiedTime, createdTime, size, " + "webViewLink, parents, owners, driveId" + ), + **self._drives_flags, + ) + .execute() + ) + self._shortcut_cache[target_id] = meta + return meta + except HttpError: + # shortcut target not accessible + return file_obj + + def _list_children(self, folder_id: str) -> List[Dict[str, Any]]: + """ + List immediate children of a folder. + """ + query = f"'{folder_id}' in parents and trashed = false" + page_token = None + results: List[Dict[str, Any]] = [] + + while True: + resp = ( + self.service.files() + .list( + q=query, + pageSize=1000, + pageToken=page_token, + fields=( + "nextPageToken, files(" + "id, name, mimeType, modifiedTime, createdTime, size, " + "webViewLink, parents, shortcutDetails, driveId)" + ), + **self._drives_flags, + **self._pick_corpora_args(), + ) + .execute() + ) + for f in resp.get("files", []): + results.append(f) + page_token = resp.get("nextPageToken") + if not page_token: + break + + return results + + def _bfs_expand_folders(self, folder_ids: Iterable[str]) -> List[Dict[str, Any]]: + """ + Breadth-first traversal to expand folders to all descendant files (if recursive), + or just immediate children (if not recursive). Folders themselves are returned + as items too, but filtered later. + """ + out: List[Dict[str, Any]] = [] + queue = deque(folder_ids) + + while queue: + fid = queue.popleft() + children = self._list_children(fid) + out.extend(children) + + if self.cfg.recursive: + # Enqueue subfolders + for c in children: + c = self._resolve_shortcut(c) + if c.get("mimeType") == "application/vnd.google-apps.folder": + queue.append(c["id"]) + + return out + + def _get_file_meta_by_id(self, file_id: str) -> Optional[Dict[str, Any]]: + """ + Fetch metadata for a file by ID (resolving shortcuts). + """ + if self.service is None: + raise RuntimeError("Google Drive service is not initialized. Please authenticate first.") + try: + meta = ( + self.service.files() + .get( + fileId=file_id, + fields=( + "id, name, mimeType, modifiedTime, createdTime, size, " + "webViewLink, parents, shortcutDetails, driveId" + ), + **self._drives_flags, + ) + .execute() + ) + return self._resolve_shortcut(meta) + except HttpError: + return None + + def _filter_by_mime(self, items: Iterable[Dict[str, Any]]) -> List[Dict[str, Any]]: + """ + Apply include/exclude mime filters if configured. + """ + include = set(self.cfg.include_mime_types or []) + exclude = set(self.cfg.exclude_mime_types or []) + + def keep(m: Dict[str, Any]) -> bool: + mt = m.get("mimeType") + if exclude and mt in exclude: + return False + if include and mt not in include: + return False + return True + + return [m for m in items if keep(m)] + + def _iter_selected_items(self) -> List[Dict[str, Any]]: + """ + Return a de-duplicated list of file metadata for the selected scope: + - explicit file_ids + - items inside folder_ids (with optional recursion) + Shortcuts are resolved to their targets automatically. + """ + seen: Set[str] = set() + items: List[Dict[str, Any]] = [] + + # Explicit files + if self.cfg.file_ids: + for fid in self.cfg.file_ids: + meta = self._get_file_meta_by_id(fid) + if meta and meta["id"] not in seen: + seen.add(meta["id"]) + items.append(meta) + + # Folders + if self.cfg.folder_ids: + folder_children = self._bfs_expand_folders(self.cfg.folder_ids) + for meta in folder_children: + meta = self._resolve_shortcut(meta) + if meta.get("id") in seen: + continue + seen.add(meta["id"]) + items.append(meta) + + # If neither file_ids nor folder_ids are set, you could: + # - return [] to force explicit selection + # - OR default to entire drive. + # Here we choose to require explicit selection: + if not self.cfg.file_ids and not self.cfg.folder_ids: + return [] + + items = self._filter_by_mime(items) + # Exclude folders from final emits: + items = [m for m in items if m.get("mimeType") != "application/vnd.google-apps.folder"] + return items + + # ------------------------- + # Download logic + # ------------------------- + def _pick_export_mime(self, source_mime: str) -> Optional[str]: + """ + Choose export mime for Google-native docs if needed. + """ + overrides = self.cfg.export_format_overrides or {} + if source_mime == "application/vnd.google-apps.document": + return overrides.get( + source_mime, + "application/vnd.openxmlformats-officedocument.wordprocessingml.document", + ) + if source_mime == "application/vnd.google-apps.spreadsheet": + return overrides.get( + source_mime, + "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", + ) + if source_mime == "application/vnd.google-apps.presentation": + return overrides.get( + source_mime, + "application/vnd.openxmlformats-officedocument.presentationml.presentation", + ) + # Return None for non-Google-native or unsupported types + return overrides.get(source_mime) + + def _download_file_bytes(self, file_meta: Dict[str, Any]) -> bytes: + """ + Download bytes for a given file (exporting if Google-native). + """ + file_id = file_meta["id"] + mime_type = file_meta.get("mimeType") or "" + + # Google-native: export + export_mime = self._pick_export_mime(mime_type) + if mime_type.startswith("application/vnd.google-apps."): + # default fallback if not overridden + if not export_mime: + export_mime = "application/pdf" + request = self.service.files().export_media(fileId=file_id, mimeType=export_mime) + else: + # Binary download + request = self.service.files().get_media(fileId=file_id) + + fh = io.BytesIO() + downloader = MediaIoBaseDownload(fh, request, chunksize=1024 * 1024) + done = False + while not done: + status, done = downloader.next_chunk() + # Optional: you can log progress via status.progress() + + return fh.getvalue() + + # ------------------------- + # Public sync surface + # ------------------------- + # ---- Required by BaseConnector: start OAuth flow async def authenticate(self) -> bool: - """Authenticate with Google Drive""" + """ + Ensure we have valid Google Drive credentials and an authenticated service. + Returns True if ready to use; False otherwise. + """ try: - if await self.oauth.is_authenticated(): - self.service = self.oauth.get_service() - self._authenticated = True - return True - return False + # Load/refresh creds from token file (async) + self.creds = await self.oauth.load_credentials() + + # If still not authenticated, bail (caller should kick off OAuth init) + if not await self.oauth.is_authenticated(): + self.log("authenticate: no valid credentials; run OAuth init/callback first.") + return False + + # Build Drive service from OAuth helper + self.service = self.oauth.get_service() + + # Optional sanity check (small, fast request) + _ = self.service.files().get(fileId="root", fields="id").execute() + self._authenticated = True + return True + except Exception as e: - print(f"Authentication failed: {e}") + self._authenticated = False + self.log(f"GoogleDriveConnector.authenticate failed: {e}") return False - async def setup_subscription(self) -> str: - """Set up Google Drive push notifications""" - if not self._authenticated: - raise ValueError("Not authenticated") + async def list_files(self, page_token: Optional[str] = None, **kwargs) -> Dict[str, Any]: + """ + List files in the currently selected scope (file_ids/folder_ids/recursive). + Returns a dict with 'files' and 'next_page_token'. - # Generate unique channel ID - channel_id = str(uuid.uuid4()) + Since we pre-compute the selected set, pagination is simulated: + - If page_token is None: return all files in one batch. + - Otherwise: return {} and no next_page_token. + """ + try: + items = self._iter_selected_items() - # Set up push notification - # Note: This requires a publicly accessible webhook endpoint - webhook_url = self.config.get("webhook_url") - if not webhook_url: - raise ValueError("webhook_url required in config for subscriptions") + # Simplest: ignore page_token and just dump all + # If you want real pagination, slice items here + if page_token: + return {"files": [], "next_page_token": None} + + return { + "files": items, + "next_page_token": None, # no more pages + } + except Exception as e: + # Optionally log error with your base class logger + try: + self.log(f"GoogleDriveConnector.list_files failed: {e}") + except Exception: + pass + return {"files": [], "next_page_token": None} + + async def get_file_content(self, file_id: str) -> ConnectorDocument: + """ + Fetch a file's metadata and content from Google Drive and wrap it in a ConnectorDocument. + """ + meta = self._get_file_meta_by_id(file_id) + if not meta: + raise FileNotFoundError(f"Google Drive file not found: {file_id}") try: + blob = self._download_file_bytes(meta) + except Exception as e: + # Use your base class logger if available + try: + self.log(f"Download failed for {file_id}: {e}") + except Exception: + pass + raise + + from datetime import datetime + + def parse_datetime(dt_str): + if not dt_str: + return None + try: + # Google Drive returns RFC3339 format + return datetime.strptime(dt_str, "%Y-%m-%dT%H:%M:%S.%fZ") + except ValueError: + try: + return datetime.strptime(dt_str, "%Y-%m-%dT%H:%M:%SZ") + except ValueError: + return None + + doc = ConnectorDocument( + id=meta["id"], + filename=meta.get("name", ""), + source_url=meta.get("webViewLink", ""), + created_time=parse_datetime(meta.get("createdTime")), + modified_time=parse_datetime(meta.get("modifiedTime")), + mimetype=str(meta.get("mimeType", "")), + acl=DocumentACL(), # TODO: map Google Drive permissions if you want ACLs + content=blob, + metadata={ + "parents": meta.get("parents"), + "driveId": meta.get("driveId"), + "size": int(meta.get("size", 0)) if str(meta.get("size", "")).isdigit() else None, + }, + ) + return doc + + async def setup_subscription(self) -> str: + """ + Start a Google Drive Changes API watch (webhook). + Returns the channel ID (subscription ID) as a string. + + Requires a webhook URL to be configured. This implementation looks for: + 1) self.cfg.webhook_address (preferred if you have it in your config dataclass) + 2) os.environ["GOOGLE_DRIVE_WEBHOOK_URL"] + """ + import os + + # 1) Ensure we are authenticated and have a live Drive service + ok = await self.authenticate() + if not ok: + raise RuntimeError("GoogleDriveConnector.setup_subscription: not authenticated") + + # 2) Resolve webhook address (no param in ABC, so pull from config/env) + webhook_address = getattr(self.cfg, "webhook_address", None) or os.getenv("GOOGLE_DRIVE_WEBHOOK_URL") + if not webhook_address: + raise RuntimeError( + "GoogleDriveConnector.setup_subscription: webhook URL not configured. " + "Set cfg.webhook_address or GOOGLE_DRIVE_WEBHOOK_URL." + ) + + # 3) Ensure we have a starting page token (checkpoint) + try: + if not self.cfg.changes_page_token: + self.cfg.changes_page_token = self.get_start_page_token() + except Exception as e: + # Optional: use your base logger + try: + self.log(f"Failed to get start page token: {e}") + except Exception: + pass + raise + + # 4) Start the watch on the current token + try: + # Build a simple watch body; customize id if you want a stable deterministic value body = { - "id": channel_id, + "id": f"drive-channel-{int(time.time())}", # subscription (channel) ID to return "type": "web_hook", - "address": webhook_url, - "payload": True, - "expiration": str( - int((datetime.now().timestamp() + 86400) * 1000) - ), # 24 hours + "address": webhook_address, } + # Shared Drives flags so we see everything we’re scoped to + flags = dict(supportsAllDrives=True) + result = ( self.service.changes() - .watch(pageToken=self._get_start_page_token(), body=body) + .watch(pageToken=self.cfg.changes_page_token, body=body, **flags) .execute() ) - self.webhook_channel_id = channel_id - # Persist the resourceId returned by Google to allow proper cleanup - try: - self.webhook_resource_id = result.get("resourceId") - except Exception: - self.webhook_resource_id = None + # Example fields: id, resourceId, expiration, kind + channel_id = result.get("id") + resource_id = result.get("resourceId") + expiration = result.get("expiration") + + # Persist in-memory so cleanup can stop this channel later. + # If your project has a persistence layer, save these values there. + self._active_channel = { + "channel_id": channel_id, + "resource_id": resource_id, + "expiration": expiration, + "webhook_address": webhook_address, + "page_token": self.cfg.changes_page_token, + } + + if not isinstance(channel_id, str) or not channel_id: + raise RuntimeError(f"Drive watch returned invalid channel id: {channel_id!r}") + return channel_id - except HttpError as e: - print(f"Failed to set up subscription: {e}") + except Exception as e: + try: + self.log(f"GoogleDriveConnector.setup_subscription failed: {e}") + except Exception: + pass raise - def _get_start_page_token(self) -> str: - """Get the current page token for change notifications""" - return self.service.changes().getStartPageToken().execute()["startPageToken"] - - async def list_files( - self, page_token: Optional[str] = None, limit: Optional[int] = None - ) -> Dict[str, Any]: - """List all supported files in Google Drive. - - Uses a thread pool (not the shared process pool) to avoid issues with - Google API clients in forked processes and adds light retries for - transient BrokenPipe/connection errors. + async def cleanup_subscription(self, subscription_id: str) -> bool: """ - if not self._authenticated: - raise ValueError("Not authenticated") + Stop an active Google Drive Changes API watch (webhook) channel. - # Build query for supported file types - mimetype_query = " or ".join( - [f"mimeType='{mt}'" for mt in self.SUPPORTED_MIMETYPES] - ) - query = f"({mimetype_query}) and trashed=false" + Google requires BOTH the channel id (subscription_id) AND its resource_id. + We try to retrieve resource_id from: + 1) self._active_channel (single-channel use) + 2) self._subscriptions[subscription_id] (multi-channel use, if present) + 3) self.cfg.resource_id (as a last-resort override provided by caller/config) - # Use provided limit or default to 100, max 1000 (Google Drive API limit) - page_size = min(limit or 100, 1000) + Returns: + bool: True if the stop call succeeded, otherwise False. + """ + # 1) Ensure auth/service + ok = await self.authenticate() + if not ok: + try: + self.log("cleanup_subscription: not authenticated") + except Exception: + pass + return False - def _sync_list_files_inner(): - import time + # 2) Resolve resource_id + resource_id = None - attempts = 0 - max_attempts = 3 - backoff = 1.0 - while True: - try: - return ( - self.service.files() - .list( - q=query, - pageSize=page_size, - pageToken=page_token, - fields="nextPageToken, files(id, name, mimeType, modifiedTime, createdTime, webViewLink, permissions, owners)", - ) - .execute() - ) - except Exception as e: - attempts += 1 - is_broken_pipe = isinstance(e, BrokenPipeError) or ( - isinstance(e, OSError) and getattr(e, "errno", None) == 32 - ) - if attempts < max_attempts and is_broken_pipe: - time.sleep(backoff) - backoff = min(4.0, backoff * 2) - continue - raise + # Single-channel memory + if getattr(self, "_active_channel", None): + ch = getattr(self, "_active_channel") + if isinstance(ch, dict) and ch.get("channel_id") == subscription_id: + resource_id = ch.get("resource_id") + # Multi-channel memory + if resource_id is None and hasattr(self, "_subscriptions"): + subs = getattr(self, "_subscriptions") + if isinstance(subs, dict): + entry = subs.get(subscription_id) + if isinstance(entry, dict): + resource_id = entry.get("resource_id") + + # Config override (optional) + if resource_id is None and getattr(self.cfg, "resource_id", None): + resource_id = self.cfg.resource_id + + if not resource_id: + try: + self.log( + f"cleanup_subscription: missing resource_id for channel {subscription_id}. " + f"Persist (channel_id, resource_id) when creating the subscription." + ) + except Exception: + pass + return False + + # 3) Call Channels.stop try: - # Offload blocking HTTP call to default ThreadPoolExecutor - import asyncio + self.service.channels().stop(body={"id": subscription_id, "resourceId": resource_id}).execute() - loop = asyncio.get_event_loop() - results = await loop.run_in_executor(None, _sync_list_files_inner) + # 4) Clear local bookkeeping + if getattr(self, "_active_channel", None) and self._active_channel.get("channel_id") == subscription_id: + self._active_channel = {} - files = [] - for file in results.get("files", []): - files.append( - { - "id": file["id"], - "name": file["name"], - "mimeType": file["mimeType"], - "modifiedTime": file["modifiedTime"], - "createdTime": file["createdTime"], - "webViewLink": file["webViewLink"], - "permissions": file.get("permissions", []), - "owners": file.get("owners", []), - } + if hasattr(self, "_subscriptions") and isinstance(self._subscriptions, dict): + self._subscriptions.pop(subscription_id, None) + + return True + + except Exception as e: + try: + self.log(f"cleanup_subscription failed for {subscription_id}: {e}") + except Exception: + pass + return False + + async def handle_webhook(self, payload: Dict[str, Any]) -> List[str]: + """ + Process a Google Drive Changes webhook. + Drive push notifications do NOT include the changed files themselves; they merely tell us + "there are changes". We must pull them using the Changes API with our saved page token. + + Args: + payload: Arbitrary dict your framework passes. We *may* log/use headers like + X-Goog-Resource-State / X-Goog-Message-Number if present, but we don't rely on them. + + Returns: + List[str]: unique list of affected file IDs (filtered to our selected scope). + """ + affected: List[str] = [] + try: + # 1) Ensure we're authenticated / service ready + ok = await self.authenticate() + if not ok: + try: + self.log("handle_webhook: not authenticated") + except Exception: + pass + return affected + + # 2) Establish/restore our checkpoint page token + page_token = self.cfg.changes_page_token + if not page_token: + # First time / missing state: initialize + page_token = self.get_start_page_token() + self.cfg.changes_page_token = page_token + + # 3) Build current selected scope to filter changes + # (file_ids + expanded folder descendants) + try: + selected_items = self._iter_selected_items() + selected_ids = {m["id"] for m in selected_items} + except Exception as e: + selected_ids = set() + try: + self.log(f"handle_webhook: scope build failed, proceeding unfiltered: {e}") + except Exception: + pass + + # 4) Pull changes until nextPageToken is exhausted, then advance to newStartPageToken + while True: + resp = ( + self.service.changes() + .list( + pageToken=page_token, + fields=( + "nextPageToken, newStartPageToken, " + "changes(fileId, file(id, name, mimeType, trashed, parents, " + "shortcutDetails, driveId, modifiedTime, webViewLink))" + ), + supportsAllDrives=True, + includeItemsFromAllDrives=True, + ) + .execute() ) - return {"files": files, "nextPageToken": results.get("nextPageToken")} + for ch in resp.get("changes", []): + fid = ch.get("fileId") + fobj = ch.get("file") or {} - except HttpError as e: - print(f"Failed to list files: {e}") - raise + # Skip if no file or explicitly trashed (you can choose to still return these IDs) + if not fid or fobj.get("trashed"): + # If you want to *include* deletions, collect fid here instead of skipping. + continue - async def get_file_content(self, file_id: str) -> ConnectorDocument: - """Get file content and metadata""" - if not self._authenticated: - raise ValueError("Not authenticated") + # Resolve shortcuts to target + resolved = self._resolve_shortcut(fobj) + rid = resolved.get("id", fid) - try: - # Get file metadata (run in thread pool to avoid blocking) - import asyncio + # Filter to our selected scope if we have one; otherwise accept all + if selected_ids and (rid not in selected_ids): + # Shortcut target might be in scope even if the shortcut isn't + tgt = fobj.get("shortcutDetails", {}).get("targetId") if fobj else None + if not (tgt and tgt in selected_ids): + continue - loop = asyncio.get_event_loop() + affected.append(rid) - # Use the same process pool as docling processing - from utils.process_pool import process_pool + # Handle pagination of the changes feed + next_token = resp.get("nextPageToken") + if next_token: + page_token = next_token + continue - file_metadata = await loop.run_in_executor( - process_pool, - _sync_get_metadata_worker, - self.oauth.client_id, - self.oauth.client_secret, - self.oauth.token_file, - file_id, - ) + # No nextPageToken: checkpoint with newStartPageToken + new_start = resp.get("newStartPageToken") + if new_start: + self.cfg.changes_page_token = new_start + else: + # Fallback: keep the last consumed token if API didn't return newStartPageToken + self.cfg.changes_page_token = page_token + break - # Download file content (pass file size for timeout calculation) - file_size = file_metadata.get("size") - if file_size: - file_size = int(file_size) # Ensure it's an integer - content = await self._download_file_content( - file_id, file_metadata["mimeType"], file_size - ) + # Deduplicate while preserving order + seen = set() + deduped: List[str] = [] + for x in affected: + if x not in seen: + seen.add(x) + deduped.append(x) + return deduped - # Extract ACL information - acl = self._extract_acl(file_metadata) + except Exception as e: + try: + self.log(f"handle_webhook failed: {e}") + except Exception: + pass + return [] - return ConnectorDocument( - id=file_id, - filename=file_metadata["name"], - mimetype=file_metadata["mimeType"], - content=content, - source_url=file_metadata["webViewLink"], - acl=acl, - modified_time=datetime.fromisoformat( - file_metadata["modifiedTime"].replace("Z", "+00:00") - ).replace(tzinfo=None), - created_time=datetime.fromisoformat( - file_metadata["createdTime"].replace("Z", "+00:00") - ).replace(tzinfo=None), + def sync_once(self) -> None: + """ + Perform a one-shot sync of the currently selected scope and emit documents. + + Emits ConnectorDocument instances (adapt to your BaseConnector ingestion). + """ + items = self._iter_selected_items() + for meta in items: + try: + blob = self._download_file_bytes(meta) + except HttpError as e: + # Skip/record failures + self.log(f"Failed to download {meta.get('name')} ({meta.get('id')}): {e}") + continue + + from datetime import datetime + + def parse_datetime(dt_str): + if not dt_str: + return None + try: + # Google Drive returns RFC3339 format + return datetime.strptime(dt_str, "%Y-%m-%dT%H:%M:%S.%fZ") + except ValueError: + try: + return datetime.strptime(dt_str, "%Y-%m-%dT%H:%M:%SZ") + except ValueError: + return None + + doc = ConnectorDocument( + id=meta["id"], + filename=meta.get("name", ""), + source_url=meta.get("webViewLink", ""), + created_time=parse_datetime(meta.get("createdTime")), + modified_time=parse_datetime(meta.get("modifiedTime")), + mimetype=str(meta.get("mimeType", "")), + acl=DocumentACL(), # TODO: set appropriate ACL instance or value metadata={ - "size": file_metadata.get("size"), - "owners": file_metadata.get("owners", []), + "name": meta.get("name"), + "webViewLink": meta.get("webViewLink"), + "parents": meta.get("parents"), + "driveId": meta.get("driveId"), + "size": int(meta.get("size", 0)) if str(meta.get("size", "")).isdigit() else None, }, + content=blob, ) + self.emit(doc) - except HttpError as e: - print(f"Failed to get file content: {e}") - raise + # ------------------------- + # Changes API (polling or webhook-backed) + # ------------------------- + def get_start_page_token(self) -> str: + resp = self.service.changes().getStartPageToken(**self._drives_flags).execute() + return resp["startPageToken"] - async def _download_file_content( - self, file_id: str, mime_type: str, file_size: int = None - ) -> bytes: - """Download file content, converting Google Docs formats if needed""" + def poll_changes_and_sync(self) -> Optional[str]: + """ + Incrementally process changes since the last page token in cfg.changes_page_token. - # Download file (run in process pool to avoid blocking) - import asyncio + Returns the new page token you should persist (or None if unchanged). + """ + page_token = self.cfg.changes_page_token or self.get_start_page_token() - loop = asyncio.get_event_loop() - - # Use the same process pool as docling processing - from utils.process_pool import process_pool - - return await loop.run_in_executor( - process_pool, - _sync_download_worker, - self.oauth.client_id, - self.oauth.client_secret, - self.oauth.token_file, - file_id, - mime_type, - file_size, - ) - - def _extract_acl(self, file_metadata: Dict[str, Any]) -> DocumentACL: - """Extract ACL information from file metadata""" - user_permissions = {} - group_permissions = {} - - owner = None - if file_metadata.get("owners"): - owner = file_metadata["owners"][0].get("emailAddress") - - # Process permissions - for perm in file_metadata.get("permissions", []): - email = perm.get("emailAddress") - role = perm.get("role", "reader") - perm_type = perm.get("type") - - if perm_type == "user" and email: - user_permissions[email] = role - elif perm_type == "group" and email: - group_permissions[email] = role - elif perm_type == "domain": - # Domain-wide permissions - could be treated as a group - domain = perm.get("domain", "unknown-domain") - group_permissions[f"domain:{domain}"] = role - - return DocumentACL( - owner=owner, - user_permissions=user_permissions, - group_permissions=group_permissions, - ) - - def extract_webhook_channel_id( - self, payload: Dict[str, Any], headers: Dict[str, str] - ) -> Optional[str]: - """Extract Google Drive channel ID from webhook headers""" - return headers.get("x-goog-channel-id") - - def extract_webhook_resource_id(self, headers: Dict[str, str]) -> Optional[str]: - """Extract Google Drive resource ID from webhook headers""" - return headers.get("x-goog-resource-id") - - async def handle_webhook(self, payload: Dict[str, Any]) -> List[str]: - """Handle Google Drive webhook notification""" - if not self._authenticated: - raise ValueError("Not authenticated") - - # Google Drive sends headers with the important info - headers = payload.get("_headers", {}) - - # Extract Google Drive specific headers - channel_id = headers.get("x-goog-channel-id") - resource_state = headers.get("x-goog-resource-state") - - if not channel_id: - print("[WEBHOOK] No channel ID found in Google Drive webhook") - return [] - - # Check if this webhook belongs to this connection - if self.webhook_channel_id != channel_id: - print( - f"[WEBHOOK] Channel ID mismatch: expected {self.webhook_channel_id}, got {channel_id}" - ) - return [] - - # Only process certain states (ignore 'sync' which is just a ping) - if resource_state not in ["exists", "not_exists", "change"]: - print(f"[WEBHOOK] Ignoring resource state: {resource_state}") - return [] - - try: - # Extract page token from the resource URI if available - page_token = None - headers = payload.get("_headers", {}) - resource_uri = headers.get("x-goog-resource-uri") - - if resource_uri and "pageToken=" in resource_uri: - # Extract page token from URI like: - # https://www.googleapis.com/drive/v3/changes?alt=json&pageToken=4337807 - import urllib.parse - - parsed = urllib.parse.urlparse(resource_uri) - query_params = urllib.parse.parse_qs(parsed.query) - page_token = query_params.get("pageToken", [None])[0] - - if not page_token: - print("[WEBHOOK] No page token found, cannot identify specific changes") - return [] - - print(f"[WEBHOOK] Getting changes since page token: {page_token}") - - # Get list of changes since the page token - changes = ( + while True: + resp = ( self.service.changes() .list( pageToken=page_token, - fields="changes(fileId, file(id, name, mimeType, trashed, parents))", + fields=( + "nextPageToken, newStartPageToken, " + "changes(fileId, file(id, name, mimeType, trashed, parents, " + "shortcutDetails, driveId, modifiedTime, webViewLink))" + ), + **self._drives_flags, ) .execute() ) - affected_files = [] - for change in changes.get("changes", []): - file_info = change.get("file", {}) - file_id = change.get("fileId") + changes = resp.get("changes", []) - if not file_id: + # Filter to our selected scope (files and folder descendants): + selected_ids = {m["id"] for m in self._iter_selected_items()} + for ch in changes: + fid = ch.get("fileId") + file_obj = ch.get("file") or {} + if not fid or not file_obj or file_obj.get("trashed"): continue - # Only include supported file types that aren't trashed - mime_type = file_info.get("mimeType", "") - is_trashed = file_info.get("trashed", False) + # Match scope + if fid not in selected_ids: + # also consider shortcut target + if file_obj.get("mimeType") == "application/vnd.google-apps.shortcut": + tgt = file_obj.get("shortcutDetails", {}).get("targetId") + if tgt and tgt in selected_ids: + pass + else: + continue - if not is_trashed and mime_type in self.SUPPORTED_MIMETYPES: - print( - f"[WEBHOOK] File changed: {file_info.get('name', 'Unknown')} ({file_id})" - ) - affected_files.append(file_id) - elif is_trashed: - print( - f"[WEBHOOK] File deleted/trashed: {file_info.get('name', 'Unknown')} ({file_id})" - ) - # TODO: Handle file deletion (remove from index) - else: - print(f"[WEBHOOK] Ignoring unsupported file type: {mime_type}") + # Download and emit the updated file + resolved = self._resolve_shortcut(file_obj) + try: + blob = self._download_file_bytes(resolved) + except HttpError: + continue - print(f"[WEBHOOK] Found {len(affected_files)} affected supported files") - return affected_files + from datetime import datetime - except HttpError as e: - print(f"Failed to handle webhook: {e}") - return [] + def parse_datetime(dt_str): + if not dt_str: + return None + try: + return datetime.strptime(dt_str, "%Y-%m-%dT%H:%M:%S.%fZ") + except ValueError: + try: + return datetime.strptime(dt_str, "%Y-%m-%dT%H:%M:%SZ") + except ValueError: + return None - async def cleanup_subscription(self, subscription_id: str) -> bool: - """Clean up Google Drive subscription for this connection. - - Uses the stored resource_id captured during subscription setup. - """ - if not self._authenticated: - return False - - try: - # Google Channels API requires both 'id' (channel) and 'resourceId' - if not self.webhook_resource_id: - raise ValueError( - "Missing resource_id for cleanup; ensure subscription state is persisted" + doc = ConnectorDocument( + id=resolved["id"], + filename=resolved.get("name", ""), + source_url=resolved.get("webViewLink", ""), + created_time=parse_datetime(resolved.get("createdTime")), + modified_time=parse_datetime(resolved.get("modifiedTime")), + mimetype=str(resolved.get("mimeType", "")), + acl=DocumentACL(), # Set appropriate ACL if needed + metadata={"parents": resolved.get("parents"), "driveId": resolved.get("driveId")}, + content=blob, ) - body = {"id": subscription_id, "resourceId": self.webhook_resource_id} + self.emit(doc) - self.service.channels().stop(body=body).execute() + new_page_token = resp.get("nextPageToken") + if new_page_token: + page_token = new_page_token + continue + + # No nextPageToken: advance to newStartPageToken (checkpoint) + new_start = resp.get("newStartPageToken") + if new_start: + self.cfg.changes_page_token = new_start + return new_start + + # Should not happen often + return page_token + + # ------------------------- + # Optional: webhook stubs + # ------------------------- + def build_watch_body(self, webhook_address: str, channel_id: Optional[str] = None) -> Dict[str, Any]: + """ + Prepare the request body for changes.watch if you use webhooks. + """ + return { + "id": channel_id or f"drive-channel-{int(time.time())}", + "type": "web_hook", + "address": webhook_address, + } + + def start_watch(self, webhook_address: str) -> Dict[str, Any]: + """ + Start a webhook watch on changes using the current page token. + Persist the returned resourceId/expiration on your side. + """ + page_token = self.cfg.changes_page_token or self.get_start_page_token() + body = self.build_watch_body(webhook_address) + result = ( + self.service.changes() + .watch(pageToken=page_token, body=body, **self._drives_flags) + .execute() + ) + return result + + def stop_watch(self, channel_id: str, resource_id: str) -> bool: + """ + Stop a previously started webhook watch. + """ + try: + self.service.channels().stop(body={"id": channel_id, "resourceId": resource_id}).execute() return True - except HttpError as e: - print(f"Failed to cleanup subscription: {e}") + except HttpError: return False diff --git a/src/connectors/google_drive/oauth.py b/src/connectors/google_drive/oauth.py index 1c33079f..f23e4796 100644 --- a/src/connectors/google_drive/oauth.py +++ b/src/connectors/google_drive/oauth.py @@ -1,7 +1,6 @@ import os import json -import asyncio -from typing import Dict, Any, Optional +from typing import Optional from google.auth.transport.requests import Request from google.oauth2.credentials import Credentials from google_auth_oauthlib.flow import Flow @@ -25,8 +24,8 @@ class GoogleDriveOAuth: def __init__( self, - client_id: str = None, - client_secret: str = None, + client_id: Optional[str] = None, + client_secret: Optional[str] = None, token_file: str = "token.json", ): self.client_id = client_id @@ -133,7 +132,7 @@ class GoogleDriveOAuth: if not self.creds: await self.load_credentials() - return self.creds and self.creds.valid + return bool(self.creds and self.creds.valid) def get_service(self): """Get authenticated Google Drive service""" diff --git a/src/services/auth_service.py b/src/services/auth_service.py index e5361233..38372b13 100644 --- a/src/services/auth_service.py +++ b/src/services/auth_service.py @@ -107,11 +107,27 @@ class AuthService: auth_endpoint = oauth_class.AUTH_ENDPOINT token_endpoint = oauth_class.TOKEN_ENDPOINT - # Get client_id from environment variable using connector's env var name - client_id = os.getenv(connector_class.CLIENT_ID_ENV_VAR) - if not client_id: - raise ValueError( - f"{connector_class.CLIENT_ID_ENV_VAR} environment variable not set" + # src/services/auth_service.py + client_key = getattr(connector_class, "CLIENT_ID_ENV_VAR", None) + secret_key = getattr(connector_class, "CLIENT_SECRET_ENV_VAR", None) + + def _assert_env_key(name, val): + if not isinstance(val, str) or not val.strip(): + raise RuntimeError( + f"{connector_class.__name__} misconfigured: {name} must be a non-empty string " + f"(got {val!r}). Define it as a class attribute on the connector." + ) + + _assert_env_key("CLIENT_ID_ENV_VAR", client_key) + _assert_env_key("CLIENT_SECRET_ENV_VAR", secret_key) + + client_id = os.getenv(client_key) + client_secret = os.getenv(secret_key) + + if not client_id or not client_secret: + raise RuntimeError( + f"Missing OAuth env vars for {connector_class.__name__}. " + f"Set {client_key} and {secret_key} in the environment." ) oauth_config = { From 2dfc8faaac38b7a1d7cf5faeee9a366f1461e256 Mon Sep 17 00:00:00 2001 From: Eric Hare Date: Thu, 4 Sep 2025 09:27:18 -0700 Subject: [PATCH 04/32] Add Mike's UI enhancements for picker --- .../src/app/connectors/GoogleDrivePicker.tsx | 117 -------- frontend/src/app/settings/page.tsx | 109 +++++-- .../src/components/google-drive-picker.tsx | 269 ++++++++++++++++++ src/api/connectors.py | 44 ++- src/connectors/google_drive/connector.py | 31 +- src/main.py | 22 +- 6 files changed, 438 insertions(+), 154 deletions(-) delete mode 100644 frontend/src/app/connectors/GoogleDrivePicker.tsx create mode 100644 frontend/src/components/google-drive-picker.tsx diff --git a/frontend/src/app/connectors/GoogleDrivePicker.tsx b/frontend/src/app/connectors/GoogleDrivePicker.tsx deleted file mode 100644 index 7723ca1e..00000000 --- a/frontend/src/app/connectors/GoogleDrivePicker.tsx +++ /dev/null @@ -1,117 +0,0 @@ -"use client" - -import { useCallback, useState } from "react" -import { Button } from "@/components/ui/button" -import { Badge } from "@/components/ui/badge" - -// declare globals to silence TS -declare global { - interface Window { google?: any; gapi?: any } -} - -const loadScript = (src: string) => - new Promise((resolve, reject) => { - if (document.querySelector(`script[src="${src}"]`)) return resolve() - const s = document.createElement("script") - s.src = src - s.async = true - s.onload = () => resolve() - s.onerror = () => reject(new Error(`Failed to load ${src}`)) - document.head.appendChild(s) - }) - -export type DriveSelection = { files: string[]; folders: string[] } - -export function GoogleDrivePicker({ - value, - onChange, - buttonLabel = "Choose in Drive", -}: { - value?: DriveSelection - onChange: (sel: DriveSelection) => void - buttonLabel?: string -}) { - const [loading, setLoading] = useState(false) - - const ensureGoogleApis = useCallback(async () => { - await loadScript("https://accounts.google.com/gsi/client") - await loadScript("https://apis.google.com/js/api.js") - await new Promise((res) => window.gapi?.load("picker", () => res())) - }, []) - - const openPicker = useCallback(async () => { - const clientId = process.env.NEXT_PUBLIC_GOOGLE_CLIENT_ID - const apiKey = process.env.NEXT_PUBLIC_GOOGLE_API_KEY - if (!clientId || !apiKey) { - alert("Google Picker requires NEXT_PUBLIC_GOOGLE_CLIENT_ID and NEXT_PUBLIC_GOOGLE_API_KEY") - return - } - try { - setLoading(true) - await ensureGoogleApis() - const tokenClient = window.google.accounts.oauth2.initTokenClient({ - client_id: clientId, - scope: "https://www.googleapis.com/auth/drive.readonly https://www.googleapis.com/auth/drive.metadata.readonly", - callback: (tokenResp: any) => { - const viewDocs = new window.google.picker.DocsView() - .setIncludeFolders(true) - .setSelectFolderEnabled(true) - - console.log("Picker using clientId:", clientId, "apiKey:", apiKey) - - const picker = new window.google.picker.PickerBuilder() - .enableFeature(window.google.picker.Feature.MULTISELECT_ENABLED) - .setOAuthToken(tokenResp.access_token) - .setDeveloperKey(apiKey) - .addView(viewDocs) - .setCallback((data: any) => { - if (data.action === window.google.picker.Action.PICKED) { - const pickedFiles: string[] = [] - const pickedFolders: string[] = [] - for (const doc of data.docs || []) { - const id = doc.id - const isFolder = doc?.type === "folder" || doc?.mimeType === "application/vnd.google-apps.folder" - if (isFolder) pickedFolders.push(id) - else pickedFiles.push(id) - } - onChange({ files: pickedFiles, folders: pickedFolders }) - } - }) - .build() - picker.setVisible(true) - }, - }) - tokenClient.requestAccessToken() - } catch (e) { - console.error("Drive Picker error", e) - alert("Failed to open Google Drive Picker. See console.") - } finally { - setLoading(false) - } - }, [ensureGoogleApis, onChange]) - - const filesCount = value?.files?.length ?? 0 - const foldersCount = value?.folders?.length ?? 0 - - return ( -
-
- - {(filesCount > 0 || foldersCount > 0) && ( - {filesCount} file(s), {foldersCount} folder(s) selected - )} -
- - {(filesCount > 0 || foldersCount > 0) && ( -
- {value!.files.slice(0, 6).map((id) => file:{id})} - {filesCount > 6 && +{filesCount - 6} more} - {value!.folders.slice(0, 6).map((id) => folder:{id})} - {foldersCount > 6 && +{foldersCount - 6} more} -
- )} -
- ) -} diff --git a/frontend/src/app/settings/page.tsx b/frontend/src/app/settings/page.tsx index cbc17449..a1eea2a7 100644 --- a/frontend/src/app/settings/page.tsx +++ b/frontend/src/app/settings/page.tsx @@ -12,9 +12,17 @@ import { Loader2, PlugZap, RefreshCw } from "lucide-react" import { ProtectedRoute } from "@/components/protected-route" import { useTask } from "@/contexts/task-context" import { useAuth } from "@/contexts/auth-context" -import { GoogleDrivePicker, type DriveSelection } from "../connectors/GoogleDrivePicker" +import { GoogleDrivePicker } from "@/components/google-drive-picker" +interface GoogleDriveFile { + id: string + name: string + mimeType: string + webViewLink?: string + iconLink?: string +} + interface Connector { id: string name: string @@ -24,6 +32,7 @@ interface Connector { type: string connectionId?: string access_token?: string + selectedFiles?: GoogleDriveFile[] } interface SyncResult { @@ -54,7 +63,8 @@ function KnowledgeSourcesPage() { const [syncResults, setSyncResults] = useState<{[key: string]: SyncResult | null}>({}) const [maxFiles, setMaxFiles] = useState(10) const [syncAllFiles, setSyncAllFiles] = useState(false) - const [driveSelection, setDriveSelection] = useState({ files: [], folders: [] }) + const [selectedFiles, setSelectedFiles] = useState<{[connectorId: string]: GoogleDriveFile[]}>({}) + const [connectorAccessTokens, setConnectorAccessTokens] = useState<{[connectorId: string]: string}>({}) // Settings state // Note: backend internal Langflow URL is not needed on the frontend @@ -145,6 +155,24 @@ function KnowledgeSourcesPage() { const activeConnection = connections.find((conn: Connection) => conn.is_active) const isConnected = activeConnection !== undefined + // For Google Drive, try to get access token for the picker + if (connectorType === 'google_drive' && activeConnection) { + try { + const tokenResponse = await fetch(`/api/connectors/${connectorType}/token?connection_id=${activeConnection.connection_id}`) + if (tokenResponse.ok) { + const tokenData = await tokenResponse.json() + if (tokenData.access_token) { + setConnectorAccessTokens(prev => ({ + ...prev, + [connectorType]: tokenData.access_token + })) + } + } + } catch (e) { + console.log('Could not fetch access token for Google Drive picker:', e) + } + } + setConnectors(prev => prev.map(c => c.type === connectorType ? { @@ -210,47 +238,71 @@ function KnowledgeSourcesPage() { } } + const handleFileSelection = (connectorId: string, files: GoogleDriveFile[]) => { + setSelectedFiles(prev => ({ + ...prev, + [connectorId]: files + })) + + // Update the connector with selected files + setConnectors(prev => prev.map(c => + c.id === connectorId + ? { ...c, selectedFiles: files } + : c + )) + } + const handleSync = async (connector: Connector) => { if (!connector.connectionId) return - + setIsSyncing(connector.id) setSyncResults(prev => ({ ...prev, [connector.id]: null })) - + try { - const body: any = { + const syncBody: { + connection_id: string; + max_files?: number; + selected_files?: string[]; + } = { connection_id: connector.connectionId, - max_files: syncAllFiles ? 0 : (maxFiles || undefined), + max_files: syncAllFiles ? 0 : (maxFiles || undefined) } - - if (connector.type === "google-drive") { - body.file_ids = driveSelection.files - body.folder_ids = driveSelection.folders - body.recursive = true // or expose a checkbox if you want + + // Add selected files for Google Drive + if (connector.type === "google_drive" && selectedFiles[connector.id]?.length > 0) { + syncBody.selected_files = selectedFiles[connector.id].map(file => file.id) } - + const response = await fetch(`/api/connectors/${connector.type}/sync`, { - method: "POST", - headers: { "Content-Type": "application/json" }, - body: JSON.stringify(body), + method: 'POST', + headers: { + 'Content-Type': 'application/json', + }, + body: JSON.stringify(syncBody), }) - + const result = await response.json() + if (response.status === 201) { const taskId = result.task_id if (taskId) { addTask(taskId) - setSyncResults(prev => ({ - ...prev, - [connector.id]: { processed: 0, total: result.total_files || 0 } + setSyncResults(prev => ({ + ...prev, + [connector.id]: { + processed: 0, + total: result.total_files || 0 + } })) } } else if (response.ok) { setSyncResults(prev => ({ ...prev, [connector.id]: result })) + // Note: Stats will auto-refresh via task completion watcher for async syncs } else { - console.error("Sync failed:", result.error) + console.error('Sync failed:', result.error) } } catch (error) { - console.error("Sync error:", error) + console.error('Sync error:', error) } finally { setIsSyncing(null) } @@ -436,9 +488,16 @@ function KnowledgeSourcesPage() { {connector.status === "connected" ? (
-
- -
+ {/* Google Drive file picker */} + {connector.type === "google_drive" && ( + handleFileSelection(connector.id, files)} + selectedFiles={selectedFiles[connector.id] || []} + isAuthenticated={connector.status === "connected"} + accessToken={connectorAccessTokens[connector.type]} + /> + )} + diff --git a/frontend/src/components/google-drive-picker.tsx b/frontend/src/components/google-drive-picker.tsx new file mode 100644 index 00000000..e3a9555b --- /dev/null +++ b/frontend/src/components/google-drive-picker.tsx @@ -0,0 +1,269 @@ +"use client" + +import { useState, useEffect } from "react" +import { Button } from "@/components/ui/button" +import { Badge } from "@/components/ui/badge" +import { FileText, Folder, X } from "lucide-react" + +interface GoogleDrivePickerProps { + onFileSelected: (files: GoogleDriveFile[]) => void + selectedFiles?: GoogleDriveFile[] + isAuthenticated: boolean + accessToken?: string +} + +interface GoogleDriveFile { + id: string + name: string + mimeType: string + webViewLink?: string + iconLink?: string +} + +interface GoogleAPI { + load: (api: string, options: { callback: () => void; onerror?: () => void }) => void +} + +interface GooglePickerData { + action: string + docs: GooglePickerDocument[] +} + +interface GooglePickerDocument { + [key: string]: string +} + +declare global { + interface Window { + gapi: GoogleAPI + google: { + picker: { + api: { + load: (callback: () => void) => void + } + PickerBuilder: new () => GooglePickerBuilder + ViewId: { + DOCS: string + FOLDERS: string + DOCS_IMAGES_AND_VIDEOS: string + DOCUMENTS: string + PRESENTATIONS: string + SPREADSHEETS: string + } + Feature: { + MULTISELECT_ENABLED: string + NAV_HIDDEN: string + SIMPLE_UPLOAD_ENABLED: string + } + Action: { + PICKED: string + CANCEL: string + } + Document: { + ID: string + NAME: string + MIME_TYPE: string + URL: string + ICON_URL: string + } + } + } + } +} + +interface GooglePickerBuilder { + addView: (view: string) => GooglePickerBuilder + setOAuthToken: (token: string) => GooglePickerBuilder + setCallback: (callback: (data: GooglePickerData) => void) => GooglePickerBuilder + enableFeature: (feature: string) => GooglePickerBuilder + setTitle: (title: string) => GooglePickerBuilder + build: () => GooglePicker +} + +interface GooglePicker { + setVisible: (visible: boolean) => void +} + +export function GoogleDrivePicker({ + onFileSelected, + selectedFiles = [], + isAuthenticated, + accessToken +}: GoogleDrivePickerProps) { + const [isPickerLoaded, setIsPickerLoaded] = useState(false) + const [isPickerOpen, setIsPickerOpen] = useState(false) + + useEffect(() => { + const loadPickerApi = () => { + if (typeof window !== 'undefined' && window.gapi) { + window.gapi.load('picker', { + callback: () => { + setIsPickerLoaded(true) + }, + onerror: () => { + console.error('Failed to load Google Picker API') + } + }) + } + } + + // Load Google API script if not already loaded + if (typeof window !== 'undefined') { + if (!window.gapi) { + const script = document.createElement('script') + script.src = 'https://apis.google.com/js/api.js' + script.async = true + script.defer = true + script.onload = loadPickerApi + script.onerror = () => { + console.error('Failed to load Google API script') + } + document.head.appendChild(script) + + return () => { + if (document.head.contains(script)) { + document.head.removeChild(script) + } + } + } else { + loadPickerApi() + } + } + }, []) + + const openPicker = () => { + if (!isPickerLoaded || !accessToken || !window.google?.picker) { + return + } + + try { + setIsPickerOpen(true) + + const picker = new window.google.picker.PickerBuilder() + .addView(window.google.picker.ViewId.DOCS) + .addView(window.google.picker.ViewId.FOLDERS) + .setOAuthToken(accessToken) + .enableFeature(window.google.picker.Feature.MULTISELECT_ENABLED) + .setTitle('Select files from Google Drive') + .setCallback(pickerCallback) + .build() + + picker.setVisible(true) + } catch (error) { + console.error('Error creating picker:', error) + setIsPickerOpen(false) + } + } + + const pickerCallback = (data: GooglePickerData) => { + if (data.action === window.google.picker.Action.PICKED) { + const files: GoogleDriveFile[] = data.docs.map((doc: GooglePickerDocument) => ({ + id: doc[window.google.picker.Document.ID], + name: doc[window.google.picker.Document.NAME], + mimeType: doc[window.google.picker.Document.MIME_TYPE], + webViewLink: doc[window.google.picker.Document.URL], + iconLink: doc[window.google.picker.Document.ICON_URL] + })) + + onFileSelected(files) + } + + setIsPickerOpen(false) + } + + const removeFile = (fileId: string) => { + const updatedFiles = selectedFiles.filter(file => file.id !== fileId) + onFileSelected(updatedFiles) + } + + const getFileIcon = (mimeType: string) => { + if (mimeType.includes('folder')) { + return + } + return + } + + const getMimeTypeLabel = (mimeType: string) => { + const typeMap: { [key: string]: string } = { + 'application/vnd.google-apps.document': 'Google Doc', + 'application/vnd.google-apps.spreadsheet': 'Google Sheet', + 'application/vnd.google-apps.presentation': 'Google Slides', + 'application/vnd.google-apps.folder': 'Folder', + 'application/pdf': 'PDF', + 'text/plain': 'Text', + 'application/vnd.openxmlformats-officedocument.wordprocessingml.document': 'Word Doc', + 'application/vnd.openxmlformats-officedocument.presentationml.presentation': 'PowerPoint' + } + + return typeMap[mimeType] || 'Document' + } + + if (!isAuthenticated) { + return ( +
+ Please connect to Google Drive first to select specific files. +
+ ) + } + + return ( +
+
+
+

File Selection

+

+ Choose specific files to sync instead of syncing everything +

+
+ +
+ + {selectedFiles.length > 0 && ( +
+

+ Selected files ({selectedFiles.length}): +

+
+ {selectedFiles.map((file) => ( +
+
+ {getFileIcon(file.mimeType)} + {file.name} + + {getMimeTypeLabel(file.mimeType)} + +
+ +
+ ))} +
+ +
+ )} +
+ ) +} diff --git a/src/api/connectors.py b/src/api/connectors.py index 87f21b4b..43b9400d 100644 --- a/src/api/connectors.py +++ b/src/api/connectors.py @@ -111,6 +111,8 @@ async def connector_status(request: Request, connector_service, session_manager) async def connector_webhook(request: Request, connector_service, session_manager): """Handle webhook notifications from any connector type""" connector_type = request.path_params.get("connector_type") + if connector_type is None: + connector_type = "unknown" # Handle webhook validation (connector-specific) temp_config = {"token_file": "temp.json"} @@ -118,7 +120,7 @@ async def connector_webhook(request: Request, connector_service, session_manager temp_connection = ConnectionConfig( connection_id="temp", - connector_type=connector_type, + connector_type=str(connector_type), name="temp", config=temp_config, ) @@ -186,7 +188,6 @@ async def connector_webhook(request: Request, connector_service, session_manager ) # Process webhook for the specific connection - results = [] try: # Get the connector instance connector = await connector_service._get_connector(connection.connection_id) @@ -272,3 +273,42 @@ async def connector_webhook(request: Request, connector_service, session_manager return JSONResponse( {"error": f"Webhook processing failed: {str(e)}"}, status_code=500 ) + +async def connector_token(request: Request, connector_service, session_manager): + """Get access token for connector API calls (e.g., Google Picker)""" + connector_type = request.path_params.get("connector_type") + connection_id = request.query_params.get("connection_id") + + if not connection_id: + return JSONResponse({"error": "connection_id is required"}, status_code=400) + + user = request.state.user + + try: + # Get the connection and verify it belongs to the user + connection = await connector_service.connection_manager.get_connection(connection_id) + if not connection or connection.user_id != user.user_id: + return JSONResponse({"error": "Connection not found"}, status_code=404) + + # Get the connector instance + connector = await connector_service._get_connector(connection_id) + if not connector: + return JSONResponse({"error": "Connector not available"}, status_code=404) + + # For Google Drive, get the access token + if connector_type == "google_drive" and hasattr(connector, 'oauth'): + await connector.oauth.load_credentials() + if connector.oauth.creds and connector.oauth.creds.valid: + return JSONResponse({ + "access_token": connector.oauth.creds.token, + "expires_in": (connector.oauth.creds.expiry.timestamp() - + __import__('time').time()) if connector.oauth.creds.expiry else None + }) + else: + return JSONResponse({"error": "Invalid or expired credentials"}, status_code=401) + + return JSONResponse({"error": "Token not available for this connector type"}, status_code=400) + + except Exception as e: + print(f"Error getting connector token: {e}") + return JSONResponse({"error": str(e)}, status_code=500) diff --git a/src/connectors/google_drive/connector.py b/src/connectors/google_drive/connector.py index 5a8099e0..b392f8cc 100644 --- a/src/connectors/google_drive/connector.py +++ b/src/connectors/google_drive/connector.py @@ -69,6 +69,10 @@ class GoogleDriveConnector(BaseConnector): CLIENT_ID_ENV_VAR: str = "GOOGLE_OAUTH_CLIENT_ID" CLIENT_SECRET_ENV_VAR: str = "GOOGLE_OAUTH_CLIENT_SECRET" + # Supported alias keys coming from various frontends / pickers + _FILE_ID_ALIASES = ("file_ids", "selected_file_ids", "selected_files") + _FOLDER_ID_ALIASES = ("folder_ids", "selected_folder_ids", "selected_folders") + def log(self, message: str) -> None: print(message) @@ -106,12 +110,24 @@ class GoogleDriveConnector(BaseConnector): f"Provide config['client_secret'] or set {self.CLIENT_SECRET_ENV_VAR}." ) + # Normalize incoming IDs from any of the supported alias keys + def _first_present_list(cfg: Dict[str, Any], keys: Iterable[str]) -> Optional[List[str]]: + for k in keys: + v = cfg.get(k) + if v: # accept non-empty list + return list(v) + return None + + normalized_file_ids = _first_present_list(config, self._FILE_ID_ALIASES) + normalized_folder_ids = _first_present_list(config, self._FOLDER_ID_ALIASES) + self.cfg = GoogleDriveConfig( client_id=client_id, client_secret=client_secret, token_file=token_file, - file_ids=config.get("file_ids") or config.get("selected_file_ids"), - folder_ids=config.get("folder_ids") or config.get("selected_folder_ids"), + # Accept "selected_files" and "selected_folders" used by the Drive Picker flow + file_ids=normalized_file_ids, + folder_ids=normalized_folder_ids, recursive=bool(config.get("recursive", True)), drive_id=config.get("drive_id"), corpora=config.get("corpora"), @@ -417,7 +433,11 @@ class GoogleDriveConnector(BaseConnector): self.log(f"GoogleDriveConnector.authenticate failed: {e}") return False - async def list_files(self, page_token: Optional[str] = None, **kwargs) -> Dict[str, Any]: + async def list_files( + self, + page_token: Optional[str] = None, + **kwargs + ) -> Dict[str, Any]: """ List files in the currently selected scope (file_ids/folder_ids/recursive). Returns a dict with 'files' and 'next_page_token'. @@ -429,6 +449,11 @@ class GoogleDriveConnector(BaseConnector): try: items = self._iter_selected_items() + # Optionally honor a request-scoped max_files (e.g., from your API payload) + max_files = kwargs.get("max_files") + if isinstance(max_files, int) and max_files > 0: + items = items[:max_files] + # Simplest: ignore page_token and just dump all # If you want real pagination, slice items here if page_token: diff --git a/src/main.py b/src/main.py index 611d9885..ebe89149 100644 --- a/src/main.py +++ b/src/main.py @@ -6,18 +6,12 @@ import subprocess from functools import partial from starlette.applications import Starlette from starlette.routing import Route - -# Set multiprocessing start method to 'spawn' for CUDA compatibility -multiprocessing.set_start_method("spawn", force=True) - -# Create process pool FIRST, before any torch/CUDA imports from utils.process_pool import process_pool import torch # Configuration and setup from config.settings import clients, INDEX_NAME, INDEX_BODY, SESSION_SECRET -from utils.gpu_detection import detect_gpu_devices # Services from services.document_service import DocumentService @@ -46,6 +40,9 @@ from api import ( settings, ) +# Set multiprocessing start method to 'spawn' for CUDA compatibility +multiprocessing.set_start_method("spawn", force=True) + print("CUDA available:", torch.cuda.is_available()) print("CUDA version PyTorch was built with:", torch.version.cuda) @@ -240,7 +237,7 @@ async def initialize_services(): except Exception as e: print(f"[WARNING] Failed to load persisted connections on startup: {e}") else: - print(f"[CONNECTORS] Skipping connection loading in no-auth mode") + print("[CONNECTORS] Skipping connection loading in no-auth mode") return { "document_service": document_service, @@ -586,6 +583,17 @@ async def create_app(): ), methods=["GET"], ), + Route( + "/connectors/{connector_type}/token", + require_auth(services["session_manager"])( + partial( + connectors.connector_token, + connector_service=services["connector_service"], + session_manager=services["session_manager"], + ) + ), + methods=["GET"], + ), Route( "/connectors/{connector_type}/webhook", partial( From da55c9e6073b48d8fd3e371b21ca247bb577c3bd Mon Sep 17 00:00:00 2001 From: Eric Hare Date: Thu, 4 Sep 2025 09:41:56 -0700 Subject: [PATCH 05/32] Update main.py --- src/main.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/main.py b/src/main.py index 257a2865..02e114f5 100644 --- a/src/main.py +++ b/src/main.py @@ -56,8 +56,8 @@ from api import ( # Set multiprocessing start method to 'spawn' for CUDA compatibility multiprocessing.set_start_method("spawn", force=True) -logger.info("CUDA available:", torch.cuda.is_available()) -logger.info("CUDA version PyTorch was built with:", torch.version.cuda) +logger.info("CUDA available", cuda_available=torch.cuda.is_available()) +logger.info("CUDA version PyTorch was built with", cuda_version=torch.version.cuda) async def wait_for_opensearch(): From 3aad42390e5b683ab5860a938a85db308111f3f6 Mon Sep 17 00:00:00 2001 From: Eric Hare Date: Thu, 4 Sep 2025 10:02:07 -0700 Subject: [PATCH 06/32] Get sync sort of working... --- src/api/connectors.py | 8 ++++++- src/connectors/service.py | 44 ++++----------------------------------- 2 files changed, 11 insertions(+), 41 deletions(-) diff --git a/src/api/connectors.py b/src/api/connectors.py index 45369241..426c32ed 100644 --- a/src/api/connectors.py +++ b/src/api/connectors.py @@ -47,7 +47,13 @@ async def connector_sync(request: Request, connector_service, session_manager): for connection in active_connections: logger.debug("About to call sync_connector_files for connection", connection_id=connection.connection_id) task_id = await connector_service.sync_connector_files( - connection.connection_id, user.user_id, max_files, jwt_token=jwt_token + connection.connection_id, + user.user_id, + max_files, + jwt_token=jwt_token, + # NEW: thread picker selections through + selected_files=data.get("selected_files"), + selected_folders=data.get("selected_folders"), ) task_ids.append(task_id) logger.debug("Got task ID", task_id=task_id) diff --git a/src/connectors/service.py b/src/connectors/service.py index c2225f5c..e69b1025 100644 --- a/src/connectors/service.py +++ b/src/connectors/service.py @@ -194,6 +194,8 @@ class ConnectorService: user_id: str, max_files: int = None, jwt_token: str = None, + selected_files: List[str] = None, + selected_folders: List[str] = None, ) -> str: """Sync files from a connector connection using existing task tracking system""" if not self.task_service: @@ -216,41 +218,6 @@ class ConnectorService: if not connector.is_authenticated: raise ValueError(f"Connection '{connection_id}' not authenticated") - # Collect files to process (limited by max_files) - files_to_process = [] - page_token = None - - # Calculate page size to minimize API calls - page_size = min(max_files or 100, 1000) if max_files else 100 - - while True: - # List files from connector with limit - print( - f"[DEBUG] Calling list_files with page_size={page_size}, page_token={page_token}" - ) - file_list = await connector.list_files(page_token, limit=page_size) - print(f"[DEBUG] Got {len(file_list.get('files', []))} files") - files = file_list["files"] - - if not files: - break - - for file_info in files: - if max_files and len(files_to_process) >= max_files: - break - files_to_process.append(file_info) - - # Stop if we have enough files or no more pages - if (max_files and len(files_to_process) >= max_files) or not file_list.get( - "nextPageToken" - ): - break - - page_token = file_list.get("nextPageToken") - - if not files_to_process: - raise ValueError("No files found to sync") - # Get user information user = self.session_manager.get_user(user_id) if self.session_manager else None owner_name = user.name if user else None @@ -262,19 +229,16 @@ class ConnectorService: processor = ConnectorFileProcessor( self, connection_id, - files_to_process, + selected_files, user_id, jwt_token=jwt_token, owner_name=owner_name, owner_email=owner_email, ) - # Use file IDs as items (no more fake file paths!) - file_ids = [file_info["id"] for file_info in files_to_process] - # Create custom task using TaskService task_id = await self.task_service.create_custom_task( - user_id, file_ids, processor + user_id, selected_files, processor ) return task_id From 895ffc1a171ccd2bbcd0a749203b64622afe2f72 Mon Sep 17 00:00:00 2001 From: Eric Hare Date: Thu, 4 Sep 2025 10:03:26 -0700 Subject: [PATCH 07/32] Update service.py --- src/connectors/service.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/connectors/service.py b/src/connectors/service.py index e69b1025..a760d976 100644 --- a/src/connectors/service.py +++ b/src/connectors/service.py @@ -229,7 +229,7 @@ class ConnectorService: processor = ConnectorFileProcessor( self, connection_id, - selected_files, + selected_files or [], user_id, jwt_token=jwt_token, owner_name=owner_name, From a808f0a3f582e9d2b3da0e5a715346602491f960 Mon Sep 17 00:00:00 2001 From: Mike Fortman Date: Thu, 4 Sep 2025 12:06:03 -0500 Subject: [PATCH 08/32] fix google icon --- src/connectors/google_drive/connector.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/connectors/google_drive/connector.py b/src/connectors/google_drive/connector.py index b392f8cc..784be95e 100644 --- a/src/connectors/google_drive/connector.py +++ b/src/connectors/google_drive/connector.py @@ -69,6 +69,11 @@ class GoogleDriveConnector(BaseConnector): CLIENT_ID_ENV_VAR: str = "GOOGLE_OAUTH_CLIENT_ID" CLIENT_SECRET_ENV_VAR: str = "GOOGLE_OAUTH_CLIENT_SECRET" + # Connector metadata + CONNECTOR_NAME = "Google Drive" + CONNECTOR_DESCRIPTION = "Connect your Google Drive to automatically sync documents" + CONNECTOR_ICON = "google-drive" + # Supported alias keys coming from various frontends / pickers _FILE_ID_ALIASES = ("file_ids", "selected_file_ids", "selected_files") _FOLDER_ID_ALIASES = ("folder_ids", "selected_folder_ids", "selected_folders") From e15321e9fc434a28cf6abb2ced0b1b07d972de37 Mon Sep 17 00:00:00 2001 From: Eric Hare Date: Thu, 4 Sep 2025 10:14:57 -0700 Subject: [PATCH 09/32] try to get sync working --- docker-compose.yml | 27 ++++++++++++------------ frontend/package-lock.json | 12 ----------- src/api/connectors.py | 3 +++ src/connectors/google_drive/connector.py | 25 +++++++++++++++------- 4 files changed, 34 insertions(+), 33 deletions(-) diff --git a/docker-compose.yml b/docker-compose.yml index 8e2fdee2..252088ac 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -5,11 +5,9 @@ services: #context: . #dockerfile: Dockerfile container_name: os - depends_on: - - openrag-backend environment: - discovery.type=single-node - - OPENSEARCH_INITIAL_ADMIN_PASSWORD=${OPENSEARCH_PASSWORD} + - OPENSEARCH_INITIAL_ADMIN_PASSWORD=${OPENSEARCH_PASSWORD:-admin123} # Run security setup in background after OpenSearch starts command: > bash -c " @@ -34,7 +32,7 @@ services: environment: OPENSEARCH_HOSTS: '["https://opensearch:9200"]' OPENSEARCH_USERNAME: "admin" - OPENSEARCH_PASSWORD: ${OPENSEARCH_PASSWORD} + OPENSEARCH_PASSWORD: ${OPENSEARCH_PASSWORD:-admin123} ports: - "5601:5601" @@ -45,17 +43,18 @@ services: #dockerfile: Dockerfile.backend container_name: openrag-backend depends_on: + - opensearch - langflow environment: - OPENSEARCH_HOST=opensearch - LANGFLOW_URL=http://langflow:7860 - LANGFLOW_PUBLIC_URL=${LANGFLOW_PUBLIC_URL} - - LANGFLOW_SUPERUSER=${LANGFLOW_SUPERUSER} - - LANGFLOW_SUPERUSER_PASSWORD=${LANGFLOW_SUPERUSER_PASSWORD} + - LANGFLOW_SUPERUSER=${LANGFLOW_SUPERUSER:-admin} + - LANGFLOW_SUPERUSER_PASSWORD=${LANGFLOW_SUPERUSER_PASSWORD:-admin123} - FLOW_ID=${FLOW_ID} - OPENSEARCH_PORT=9200 - OPENSEARCH_USERNAME=admin - - OPENSEARCH_PASSWORD=${OPENSEARCH_PASSWORD} + - OPENSEARCH_PASSWORD=${OPENSEARCH_PASSWORD:-admin123} - OPENAI_API_KEY=${OPENAI_API_KEY} - NVIDIA_DRIVER_CAPABILITIES=compute,utility - NVIDIA_VISIBLE_DEVICES=all @@ -69,6 +68,8 @@ services: volumes: - ./documents:/app/documents:Z - ./keys:/app/keys:Z + ports: + - "8000:8000" gpus: all openrag-frontend: @@ -87,7 +88,7 @@ services: langflow: volumes: - ./flows:/app/flows:Z - image: phact/langflow:responses + image: langflowai/langflow:latest container_name: langflow ports: - "7860:7860" @@ -99,8 +100,8 @@ services: - OPENRAG-QUERY-FILTER="{}" - LANGFLOW_VARIABLES_TO_GET_FROM_ENVIRONMENT=JWT,OPENRAG-QUERY-FILTER - LANGFLOW_LOG_LEVEL=DEBUG - - LANGFLOW_AUTO_LOGIN=${LANGFLOW_AUTO_LOGIN} - - LANGFLOW_SUPERUSER=${LANGFLOW_SUPERUSER} - - LANGFLOW_SUPERUSER_PASSWORD=${LANGFLOW_SUPERUSER_PASSWORD} - - LANGFLOW_NEW_USER_IS_ACTIVE=${LANGFLOW_NEW_USER_IS_ACTIVE} - - LANGFLOW_ENABLE_SUPERUSER_CLI=${LANGFLOW_ENABLE_SUPERUSER_CLI} + - LANGFLOW_AUTO_LOGIN=${LANGFLOW_AUTO_LOGIN:-true} + - LANGFLOW_SUPERUSER=${LANGFLOW_SUPERUSER:-admin} + - LANGFLOW_SUPERUSER_PASSWORD=${LANGFLOW_SUPERUSER_PASSWORD:-admin123} + - LANGFLOW_NEW_USER_IS_ACTIVE=${LANGFLOW_NEW_USER_IS_ACTIVE:-true} + - LANGFLOW_ENABLE_SUPERUSER_CLI=${LANGFLOW_ENABLE_SUPERUSER_CLI:-true} diff --git a/frontend/package-lock.json b/frontend/package-lock.json index 103dd7aa..5d7c9750 100644 --- a/frontend/package-lock.json +++ b/frontend/package-lock.json @@ -5402,18 +5402,6 @@ "@pkgjs/parseargs": "^0.11.0" } }, - "node_modules/jiti": { - "version": "2.4.2", - "resolved": "https://registry.npmjs.org/jiti/-/jiti-2.4.2.tgz", - "integrity": "sha512-rg9zJN+G4n2nfJl5MW3BMygZX56zKPNVEYYqq7adpmMh4Jn2QNEwhvQlFy6jPVdcod7txZtKHWnyZiA3a0zP7A==", - "dev": true, - "license": "MIT", - "optional": true, - "peer": true, - "bin": { - "jiti": "lib/jiti-cli.mjs" - } - }, "node_modules/js-tokens": { "version": "4.0.0", "resolved": "https://registry.npmjs.org/js-tokens/-/js-tokens-4.0.0.tgz", diff --git a/src/api/connectors.py b/src/api/connectors.py index 426c32ed..8594075c 100644 --- a/src/api/connectors.py +++ b/src/api/connectors.py @@ -23,6 +23,9 @@ async def connector_sync(request: Request, connector_service, session_manager): data = await request.json() max_files = data.get("max_files") + if not data.get("selected_files"): + return JSONResponse({"error": "selected_files is required"}, status_code=400) + try: logger.debug("Starting connector sync", connector_type=connector_type, max_files=max_files) diff --git a/src/connectors/google_drive/connector.py b/src/connectors/google_drive/connector.py index 06c294a6..07145fcd 100644 --- a/src/connectors/google_drive/connector.py +++ b/src/connectors/google_drive/connector.py @@ -168,11 +168,18 @@ class GoogleDriveConnector(BaseConnector): # Helpers # ------------------------- @property - def _drives_flags(self) -> Dict[str, Any]: + def _drives_get_flags(self) -> Dict[str, Any]: """ - Common flags for ALL Drive calls to ensure Shared Drives are included. + Flags valid for GET-like calls (files.get, changes.getStartPageToken). """ - return dict(supportsAllDrives=True, includeItemsFromAllDrives=True) + return {"supportsAllDrives": True} + + @property + def _drives_list_flags(self) -> Dict[str, Any]: + """ + Flags valid for LIST-like calls (files.list, changes.list). + """ + return {"supportsAllDrives": True, "includeItemsFromAllDrives": True} def _pick_corpora_args(self) -> Dict[str, Any]: """ @@ -241,7 +248,7 @@ class GoogleDriveConnector(BaseConnector): "id, name, mimeType, modifiedTime, createdTime, size, " "webViewLink, parents, shortcutDetails, driveId)" ), - **self._drives_flags, + **self._drives_list_flags, **self._pick_corpora_args(), ) .execute() @@ -292,7 +299,7 @@ class GoogleDriveConnector(BaseConnector): "id, name, mimeType, modifiedTime, createdTime, size, " "webViewLink, parents, shortcutDetails, driveId" ), - **self._drives_flags, + **self._drives_get_flags, ) .execute() ) @@ -396,9 +403,10 @@ class GoogleDriveConnector(BaseConnector): # default fallback if not overridden if not export_mime: export_mime = "application/pdf" + # NOTE: export_media does not accept supportsAllDrives/includeItemsFromAllDrives request = self.service.files().export_media(fileId=file_id, mimeType=export_mime) else: - # Binary download + # Binary download (get_media also doesn't accept the Drive flags) request = self.service.files().get_media(fileId=file_id) fh = io.BytesIO() @@ -846,7 +854,8 @@ class GoogleDriveConnector(BaseConnector): # Changes API (polling or webhook-backed) # ------------------------- def get_start_page_token(self) -> str: - resp = self.service.changes().getStartPageToken(**self._drives_flags).execute() + # getStartPageToken accepts supportsAllDrives (not includeItemsFromAllDrives) + resp = self.service.changes().getStartPageToken(**self._drives_get_flags).execute() return resp["startPageToken"] def poll_changes_and_sync(self) -> Optional[str]: @@ -867,7 +876,7 @@ class GoogleDriveConnector(BaseConnector): "changes(fileId, file(id, name, mimeType, trashed, parents, " "shortcutDetails, driveId, modifiedTime, webViewLink))" ), - **self._drives_flags, + **self._drives_list_flags, ) .execute() ) From 6dcb65debd6a61c5d0aef5aca413a0caa71e4b2f Mon Sep 17 00:00:00 2001 From: cristhianzl Date: Thu, 4 Sep 2025 15:36:41 -0300 Subject: [PATCH 10/32] =?UTF-8?q?=F0=9F=93=9D=20(frontend):=20Add=20new=20?= =?UTF-8?q?function=20'refreshConversationsSilent'=20to=20update=20data=20?= =?UTF-8?q?without=20loading=20states=20=F0=9F=9A=80=20(frontend):=20Imple?= =?UTF-8?q?ment=20support=20for=20process.env.PORT=20to=20run=20app=20on?= =?UTF-8?q?=20a=20configurable=20port=20=F0=9F=94=A7=20(frontend):=20Chang?= =?UTF-8?q?e=20port=20variable=20case=20from=20lowercase=20'port'=20to=20u?= =?UTF-8?q?ppercase=20'PORT'=20for=20better=20semantics=20=F0=9F=93=9D=20(?= =?UTF-8?q?frontend):=20Add=20comments=20to=20clarify=20the=20purpose=20of?= =?UTF-8?q?=20loading=20conversation=20data=20only=20when=20user=20explici?= =?UTF-8?q?tly=20selects=20a=20conversation=20=F0=9F=93=9D=20(frontend):?= =?UTF-8?q?=20Add=20comments=20to=20explain=20the=20logic=20for=20loading?= =?UTF-8?q?=20conversation=20data=20based=20on=20certain=20conditions=20?= =?UTF-8?q?=F0=9F=93=9D=20(frontend):=20Add=20comments=20to=20describe=20t?= =?UTF-8?q?he=20purpose=20of=20handling=20new=20conversation=20creation=20?= =?UTF-8?q?and=20resetting=20messages=20=F0=9F=93=9D=20(frontend):=20Add?= =?UTF-8?q?=20comments=20to=20explain=20the=20logic=20for=20loading=20conv?= =?UTF-8?q?ersation=20data=20when=20conversationData=20changes=20?= =?UTF-8?q?=F0=9F=93=9D=20(frontend):=20Add=20comments=20to=20clarify=20th?= =?UTF-8?q?e=20purpose=20of=20loading=20conversations=20from=20the=20backe?= =?UTF-8?q?nd=20=F0=9F=93=9D=20(frontend):=20Add=20comments=20to=20describ?= =?UTF-8?q?e=20the=20logic=20for=20silent=20refresh=20to=20update=20data?= =?UTF-8?q?=20without=20loading=20states=20=F0=9F=93=9D=20(frontend):=20Ad?= =?UTF-8?q?d=20comments=20to=20explain=20the=20purpose=20of=20starting=20a?= =?UTF-8?q?=20new=20conversation=20and=20creating=20a=20placeholder=20conv?= =?UTF-8?q?ersation=20=F0=9F=93=9D=20(frontend):=20Add=20comments=20to=20c?= =?UTF-8?q?larify=20the=20logic=20for=20forking=20from=20a=20response=20an?= =?UTF-8?q?d=20starting=20a=20new=20conversation=20=F0=9F=93=9D=20(fronten?= =?UTF-8?q?d):=20Add=20comments=20to=20describe=20the=20purpose=20of=20add?= =?UTF-8?q?ing=20a=20conversation=20document=20and=20clearing=20conversati?= =?UTF-8?q?on=20documents=20=F0=9F=93=9D=20(frontend):=20Add=20comments=20?= =?UTF-8?q?to=20explain=20the=20logic=20for=20using=20a=20timeout=20to=20d?= =?UTF-8?q?ebounce=20multiple=20rapid=20refresh=20calls=20=F0=9F=93=9D=20(?= =?UTF-8?q?frontend):=20Add=20comments=20to=20clarify=20the=20purpose=20of?= =?UTF-8?q?=20cleaning=20up=20timeout=20on=20unmount=20=F0=9F=93=9D=20(fro?= =?UTF-8?q?ntend):=20Add=20comments=20to=20describe=20the=20logic=20for=20?= =?UTF-8?q?handling=20new=20conversation=20creation=20and=20resetting=20st?= =?UTF-8?q?ate=20=F0=9F=93=9D=20(frontend):=20Add=20comments=20to=20explai?= =?UTF-8?q?n=20the=20logic=20for=20forking=20from=20a=20response=20and=20s?= =?UTF-8?q?tarting=20a=20new=20conversation=20=F0=9F=93=9D=20(frontend):?= =?UTF-8?q?=20Add=20comments=20to=20clarify=20the=20purpose=20of=20using?= =?UTF-8?q?=20useMemo=20for=20optimizing=20performance=20in=20ChatProvider?= =?UTF-8?q?=20=F0=9F=93=9D=20(frontend):=20Add=20comments=20to=20describe?= =?UTF-8?q?=20the=20logic=20for=20using=20useMemo=20in=20the=20ChatProvide?= =?UTF-8?q?r=20component=20=F0=9F=93=9D=20(frontend):=20Add=20comments=20t?= =?UTF-8?q?o=20explain=20the=20purpose=20of=20the=20useChat=20custom=20hoo?= =?UTF-8?q?k=20=F0=9F=93=9D=20(frontend):=20Add=20comments=20to=20clarify?= =?UTF-8?q?=20the=20error=20message=20when=20useChat=20is=20not=20used=20w?= =?UTF-8?q?ithin=20a=20ChatProvider=20=F0=9F=93=9D=20(services):=20Update?= =?UTF-8?q?=20ChatService=20to=20fetch=20Langflow=20history=20with=20flow?= =?UTF-8?q?=5Fid=20parameter=20for=20better=20control?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- frontend/src/app/chat/page.tsx | 110 ++++---- frontend/src/components/navigation.tsx | 230 +++++++++++++++++ frontend/src/contexts/chat-context.tsx | 333 +++++++++++++++---------- src/services/chat_service.py | 2 +- 4 files changed, 495 insertions(+), 180 deletions(-) create mode 100644 frontend/src/components/navigation.tsx diff --git a/frontend/src/app/chat/page.tsx b/frontend/src/app/chat/page.tsx index 100228ea..19703214 100644 --- a/frontend/src/app/chat/page.tsx +++ b/frontend/src/app/chat/page.tsx @@ -91,8 +91,10 @@ function ChatPage() { addConversationDoc, forkFromResponse, refreshConversations, + refreshConversationsSilent, previousResponseIds, setPreviousResponseIds, + placeholderConversation, } = useChat(); const [messages, setMessages] = useState([ { @@ -133,6 +135,7 @@ function ChatPage() { const dropdownRef = useRef(null); const streamAbortRef = useRef(null); const streamIdRef = useRef(0); + const lastLoadedConversationRef = useRef(null); const { addTask, isMenuOpen } = useTask(); const { selectedFilter, parsedFilterData, isPanelOpen, setSelectedFilter } = useKnowledgeFilter(); @@ -241,11 +244,16 @@ function ChatPage() { ...prev, [endpoint]: result.response_id, })); + + // If this is a new conversation (no currentConversationId), set it now + if (!currentConversationId) { + setCurrentConversationId(result.response_id); + refreshConversations(true); + } else { + // For existing conversations, do a silent refresh to keep backend in sync + refreshConversationsSilent(); + } } - // Sidebar should show this conversation after upload creates it - try { - refreshConversations(); - } catch {} } else { throw new Error(`Upload failed: ${response.status}`); } @@ -406,6 +414,7 @@ function ChatPage() { setExpandedFunctionCalls(new Set()); setIsFilterHighlighted(false); setLoading(false); + lastLoadedConversationRef.current = null; }; const handleFocusInput = () => { @@ -420,25 +429,19 @@ function ChatPage() { }; }, []); - // Load conversation when conversationData changes + // Load conversation only when user explicitly selects a conversation useEffect(() => { - const now = Date.now(); - - // Don't reset messages if user is in the middle of an interaction (like forking) - if (isUserInteracting || isForkingInProgress) { - console.log( - "Skipping conversation load due to user interaction or forking" - ); - return; - } - - // Don't reload if we just forked recently (within 1 second) - if (now - lastForkTimestamp < 1000) { - console.log("Skipping conversation load - recent fork detected"); - return; - } - - if (conversationData && conversationData.messages) { + // Only load conversation data when: + // 1. conversationData exists AND + // 2. It's different from the last loaded conversation AND + // 3. User is not in the middle of an interaction + if ( + conversationData && + conversationData.messages && + lastLoadedConversationRef.current !== conversationData.response_id && + !isUserInteracting && + !isForkingInProgress + ) { console.log( "Loading conversation with", conversationData.messages.length, @@ -460,6 +463,7 @@ function ChatPage() { ); setMessages(convertedMessages); + lastLoadedConversationRef.current = conversationData.response_id; // Set the previous response ID for this conversation setPreviousResponseIds((prev) => ({ @@ -467,14 +471,16 @@ function ChatPage() { [conversationData.endpoint]: conversationData.response_id, })); } - // Reset messages when starting a new conversation (but not during forking) - else if ( - currentConversationId === null && - !isUserInteracting && - !isForkingInProgress && - now - lastForkTimestamp > 1000 - ) { - console.log("Resetting to default message for new conversation"); + }, [ + conversationData, + isUserInteracting, + isForkingInProgress, + ]); + + // Handle new conversation creation - only reset messages when placeholderConversation is set + useEffect(() => { + if (placeholderConversation && currentConversationId === null) { + console.log("Starting new conversation"); setMessages([ { role: "assistant", @@ -482,15 +488,9 @@ function ChatPage() { timestamp: new Date(), }, ]); + lastLoadedConversationRef.current = null; } - }, [ - conversationData, - currentConversationId, - isUserInteracting, - isForkingInProgress, - lastForkTimestamp, - setPreviousResponseIds, - ]); + }, [placeholderConversation, currentConversationId]); // Listen for file upload events from navigation useEffect(() => { @@ -1280,14 +1280,16 @@ function ChatPage() { ...prev, [endpoint]: newResponseId, })); + + // If this is a new conversation (no currentConversationId), set it now + if (!currentConversationId) { + setCurrentConversationId(newResponseId); + refreshConversations(true); + } else { + // For existing conversations, do a silent refresh to keep backend in sync + refreshConversationsSilent(); + } } - - // Trigger sidebar refresh to include this conversation (with small delay to ensure backend has processed) - setTimeout(() => { - try { - refreshConversations(); - } catch {} - }, 100); } catch (error) { // If stream was aborted (e.g., starting new conversation), do not append errors or final messages if (streamAbortRef.current?.signal.aborted) { @@ -1390,13 +1392,16 @@ function ChatPage() { ...prev, [endpoint]: result.response_id, })); + + // If this is a new conversation (no currentConversationId), set it now + if (!currentConversationId) { + setCurrentConversationId(result.response_id); + refreshConversations(true); + } else { + // For existing conversations, do a silent refresh to keep backend in sync + refreshConversationsSilent(); + } } - // Trigger sidebar refresh to include/update this conversation (with small delay to ensure backend has processed) - setTimeout(() => { - try { - refreshConversations(); - } catch {} - }, 100); } else { console.error("Chat failed:", result.error); const errorMessage: Message = { @@ -2013,9 +2018,6 @@ function ChatPage() { // Clear filter highlight when user starts typing if (isFilterHighlighted) { setIsFilterHighlighted(false); - try { - refreshConversations(); - } catch {} } // Find if there's an @ at the start of the last word diff --git a/frontend/src/components/navigation.tsx b/frontend/src/components/navigation.tsx new file mode 100644 index 00000000..30ac9a4b --- /dev/null +++ b/frontend/src/components/navigation.tsx @@ -0,0 +1,230 @@ +"use client" + +import { useState, useEffect, useRef } from "react" +import { useRouter, usePathname } from "next/navigation" +import { Button } from "@/components/ui/button" +import { Plus, MessageSquare, Database, Settings, GitBranch } from "lucide-react" +import { useChat } from "@/contexts/chat-context" +import { useAuth } from "@/contexts/auth-context" + +interface Conversation { + id: string + title: string + endpoint: string + last_activity: string + created_at: string + response_id: string + messages?: Array<{ + role: string + content: string + timestamp?: string + response_id?: string + }> +} + +export function Navigation() { + const router = useRouter() + const pathname = usePathname() + const { user } = useAuth() + const { + refreshTrigger, + refreshTriggerSilent, + loadConversation, + startNewConversation, + currentConversationId, + placeholderConversation, + } = useChat() + + const [conversations, setConversations] = useState([]) + const [loading, setLoading] = useState(false) + + // Load conversations from backend + const loadConversations = async () => { + if (!user) return + + try { + setLoading(true) + const response = await fetch("/api/conversations") + if (response.ok) { + const data = await response.json() + setConversations(data.conversations || []) + } + } catch (error) { + console.error("Failed to load conversations:", error) + } finally { + setLoading(false) + } + } + + // Load conversations on mount and when refreshTrigger changes (with loading state) + useEffect(() => { + loadConversations() + }, [refreshTrigger, user]) + + // Silent refresh - update data without loading state + useEffect(() => { + const loadSilent = async () => { + if (!user) return + + try { + // Don't show loading state for silent refresh + const response = await fetch("/api/conversations") + if (response.ok) { + const data = await response.json() + setConversations(data.conversations || []) + } + } catch (error) { + console.error("Silent conversation refresh failed:", error) + } + } + + // Only do silent refresh if we have a silent trigger change (not initial load) + if (refreshTriggerSilent > 0) { + loadSilent() + } + }, [refreshTriggerSilent, user]) + + const handleNewConversation = () => { + startNewConversation() + // Dispatch custom event to notify chat page + window.dispatchEvent(new CustomEvent('newConversation')) + router.push('/chat') + } + + const handleConversationClick = async (conversation: Conversation) => { + try { + // Load full conversation data from backend + const response = await fetch(`/api/conversations/${conversation.response_id}`) + if (response.ok) { + const fullConversation = await response.json() + loadConversation(fullConversation) + router.push('/chat') + } + } catch (error) { + console.error("Failed to load conversation:", error) + } + } + + const formatRelativeTime = (timestamp: string) => { + const date = new Date(timestamp) + const now = new Date() + const diffMs = now.getTime() - date.getTime() + const diffHours = Math.floor(diffMs / (1000 * 60 * 60)) + const diffDays = Math.floor(diffHours / 24) + + if (diffDays > 0) { + return `${diffDays}d ago` + } else if (diffHours > 0) { + return `${diffHours}h ago` + } else { + return 'Just now' + } + } + + return ( + + ) +} \ No newline at end of file diff --git a/frontend/src/contexts/chat-context.tsx b/frontend/src/contexts/chat-context.tsx index cc734d99..db79e0d3 100644 --- a/frontend/src/contexts/chat-context.tsx +++ b/frontend/src/contexts/chat-context.tsx @@ -1,161 +1,244 @@ -"use client" +"use client"; -import React, { createContext, useContext, useState, ReactNode } from 'react' +import { + createContext, + ReactNode, + useCallback, + useContext, + useEffect, + useMemo, + useRef, + useState, +} from "react"; -export type EndpointType = 'chat' | 'langflow' +export type EndpointType = "chat" | "langflow"; interface ConversationDocument { - filename: string - uploadTime: Date + filename: string; + uploadTime: Date; } interface ConversationMessage { - role: string - content: string - timestamp?: string - response_id?: string + role: string; + content: string; + timestamp?: string; + response_id?: string; } interface ConversationData { - messages: ConversationMessage[] - endpoint: EndpointType - response_id: string - title: string - [key: string]: unknown + messages: ConversationMessage[]; + endpoint: EndpointType; + response_id: string; + title: string; + [key: string]: unknown; } interface ChatContextType { - endpoint: EndpointType - setEndpoint: (endpoint: EndpointType) => void - currentConversationId: string | null - setCurrentConversationId: (id: string | null) => void + endpoint: EndpointType; + setEndpoint: (endpoint: EndpointType) => void; + currentConversationId: string | null; + setCurrentConversationId: (id: string | null) => void; previousResponseIds: { - chat: string | null - langflow: string | null - } - setPreviousResponseIds: (ids: { chat: string | null; langflow: string | null } | ((prev: { chat: string | null; langflow: string | null }) => { chat: string | null; langflow: string | null })) => void - refreshConversations: () => void - refreshTrigger: number - loadConversation: (conversation: ConversationData) => void - startNewConversation: () => void - conversationData: ConversationData | null - forkFromResponse: (responseId: string) => void - conversationDocs: ConversationDocument[] - addConversationDoc: (filename: string) => void - clearConversationDocs: () => void - placeholderConversation: ConversationData | null - setPlaceholderConversation: (conversation: ConversationData | null) => void + chat: string | null; + langflow: string | null; + }; + setPreviousResponseIds: ( + ids: + | { chat: string | null; langflow: string | null } + | ((prev: { chat: string | null; langflow: string | null }) => { + chat: string | null; + langflow: string | null; + }) + ) => void; + refreshConversations: (force?: boolean) => void; + refreshConversationsSilent: () => Promise; + refreshTrigger: number; + refreshTriggerSilent: number; + loadConversation: (conversation: ConversationData) => void; + startNewConversation: () => void; + conversationData: ConversationData | null; + forkFromResponse: (responseId: string) => void; + conversationDocs: ConversationDocument[]; + addConversationDoc: (filename: string) => void; + clearConversationDocs: () => void; + placeholderConversation: ConversationData | null; + setPlaceholderConversation: (conversation: ConversationData | null) => void; } -const ChatContext = createContext(undefined) +const ChatContext = createContext(undefined); interface ChatProviderProps { - children: ReactNode + children: ReactNode; } export function ChatProvider({ children }: ChatProviderProps) { - const [endpoint, setEndpoint] = useState('langflow') - const [currentConversationId, setCurrentConversationId] = useState(null) + const [endpoint, setEndpoint] = useState("langflow"); + const [currentConversationId, setCurrentConversationId] = useState< + string | null + >(null); const [previousResponseIds, setPreviousResponseIds] = useState<{ - chat: string | null - langflow: string | null - }>({ chat: null, langflow: null }) - const [refreshTrigger, setRefreshTrigger] = useState(0) - const [conversationData, setConversationData] = useState(null) - const [conversationDocs, setConversationDocs] = useState([]) - const [placeholderConversation, setPlaceholderConversation] = useState(null) + chat: string | null; + langflow: string | null; + }>({ chat: null, langflow: null }); + const [refreshTrigger, setRefreshTrigger] = useState(0); + const [refreshTriggerSilent, setRefreshTriggerSilent] = useState(0); + const [conversationData, setConversationData] = + useState(null); + const [conversationDocs, setConversationDocs] = useState< + ConversationDocument[] + >([]); + const [placeholderConversation, setPlaceholderConversation] = + useState(null); - const refreshConversations = () => { - setRefreshTrigger(prev => prev + 1) - } + // Debounce refresh requests to prevent excessive reloads + const refreshTimeoutRef = useRef(null); - const loadConversation = (conversation: ConversationData) => { - setCurrentConversationId(conversation.response_id) - setEndpoint(conversation.endpoint) - // Store the full conversation data for the chat page to use - // We'll pass it through a ref or state that the chat page can access - setConversationData(conversation) - // Clear placeholder when loading a real conversation - setPlaceholderConversation(null) - } - - const startNewConversation = () => { - // Create a temporary placeholder conversation - const placeholderConversation: ConversationData = { - response_id: 'new-conversation-' + Date.now(), - title: 'New conversation', - endpoint: endpoint, - messages: [{ - role: 'assistant', - content: 'How can I assist?', - timestamp: new Date().toISOString() - }], - created_at: new Date().toISOString(), - last_activity: new Date().toISOString() + const refreshConversations = useCallback((force = false) => { + if (force) { + // Immediate refresh for important updates like new conversations + setRefreshTrigger((prev) => prev + 1); + return; } - - setCurrentConversationId(null) - setPreviousResponseIds({ chat: null, langflow: null }) - setConversationData(null) - setConversationDocs([]) - setPlaceholderConversation(placeholderConversation) - // Force a refresh to ensure sidebar shows correct state - setRefreshTrigger(prev => prev + 1) - } - const addConversationDoc = (filename: string) => { - setConversationDocs(prev => [...prev, { filename, uploadTime: new Date() }]) - } + // Clear any existing timeout + if (refreshTimeoutRef.current) { + clearTimeout(refreshTimeoutRef.current); + } - const clearConversationDocs = () => { - setConversationDocs([]) - } + // Set a new timeout to debounce multiple rapid refresh calls + refreshTimeoutRef.current = setTimeout(() => { + setRefreshTrigger((prev) => prev + 1); + }, 250); // 250ms debounce + }, []); - const forkFromResponse = (responseId: string) => { - // Start a new conversation with the messages up to the fork point - setCurrentConversationId(null) // Clear current conversation to indicate new conversation - setConversationData(null) // Clear conversation data to prevent reloading - // Set the response ID that we're forking from as the previous response ID - setPreviousResponseIds(prev => ({ + // Cleanup timeout on unmount + useEffect(() => { + return () => { + if (refreshTimeoutRef.current) { + clearTimeout(refreshTimeoutRef.current); + } + }; + }, []); + + // Silent refresh - updates data without loading states + const refreshConversationsSilent = useCallback(async () => { + // Trigger silent refresh that updates conversation data without showing loading states + setRefreshTriggerSilent((prev) => prev + 1); + }, []); + + const loadConversation = useCallback((conversation: ConversationData) => { + setCurrentConversationId(conversation.response_id); + setEndpoint(conversation.endpoint); + // Store the full conversation data for the chat page to use + setConversationData(conversation); + // Clear placeholder when loading a real conversation + setPlaceholderConversation(null); + }, []); + + const startNewConversation = useCallback(() => { + // Clear current conversation data and reset state + setCurrentConversationId(null); + setPreviousResponseIds({ chat: null, langflow: null }); + setConversationData(null); + setConversationDocs([]); + + // Create a temporary placeholder conversation to show in sidebar + const placeholderConversation: ConversationData = { + response_id: "new-conversation-" + Date.now(), + title: "New conversation", + endpoint: endpoint, + messages: [ + { + role: "assistant", + content: "How can I assist?", + timestamp: new Date().toISOString(), + }, + ], + created_at: new Date().toISOString(), + last_activity: new Date().toISOString(), + }; + + setPlaceholderConversation(placeholderConversation); + // Force immediate refresh to ensure sidebar shows correct state + refreshConversations(true); + }, [endpoint, refreshConversations]); + + const addConversationDoc = useCallback((filename: string) => { + setConversationDocs((prev) => [ ...prev, - [endpoint]: responseId - })) - // Clear placeholder when forking - setPlaceholderConversation(null) - // The messages are already set by the chat page component before calling this - } + { filename, uploadTime: new Date() }, + ]); + }, []); - const value: ChatContextType = { - endpoint, - setEndpoint, - currentConversationId, - setCurrentConversationId, - previousResponseIds, - setPreviousResponseIds, - refreshConversations, - refreshTrigger, - loadConversation, - startNewConversation, - conversationData, - forkFromResponse, - conversationDocs, - addConversationDoc, - clearConversationDocs, - placeholderConversation, - setPlaceholderConversation, - } + const clearConversationDocs = useCallback(() => { + setConversationDocs([]); + }, []); - return ( - - {children} - - ) + const forkFromResponse = useCallback( + (responseId: string) => { + // Start a new conversation with the messages up to the fork point + setCurrentConversationId(null); // Clear current conversation to indicate new conversation + setConversationData(null); // Clear conversation data to prevent reloading + // Set the response ID that we're forking from as the previous response ID + setPreviousResponseIds((prev) => ({ + ...prev, + [endpoint]: responseId, + })); + // Clear placeholder when forking + setPlaceholderConversation(null); + // The messages are already set by the chat page component before calling this + }, + [endpoint] + ); + + const value = useMemo( + () => ({ + endpoint, + setEndpoint, + currentConversationId, + setCurrentConversationId, + previousResponseIds, + setPreviousResponseIds, + refreshConversations, + refreshConversationsSilent, + refreshTrigger, + refreshTriggerSilent, + loadConversation, + startNewConversation, + conversationData, + forkFromResponse, + conversationDocs, + addConversationDoc, + clearConversationDocs, + placeholderConversation, + setPlaceholderConversation, + }), + [ + endpoint, + currentConversationId, + previousResponseIds, + refreshConversations, + refreshConversationsSilent, + refreshTrigger, + refreshTriggerSilent, + loadConversation, + startNewConversation, + conversationData, + forkFromResponse, + conversationDocs, + addConversationDoc, + clearConversationDocs, + placeholderConversation, + ] + ); + + return {children}; } export function useChat(): ChatContextType { - const context = useContext(ChatContext) + const context = useContext(ChatContext); if (context === undefined) { - throw new Error('useChat must be used within a ChatProvider') + throw new Error("useChat must be used within a ChatProvider"); } - return context -} \ No newline at end of file + return context; +} diff --git a/src/services/chat_service.py b/src/services/chat_service.py index 1b811d69..93fddcc8 100644 --- a/src/services/chat_service.py +++ b/src/services/chat_service.py @@ -328,7 +328,7 @@ class ChatService: # 2. Get historical conversations from Langflow database # (works with both Google-bound users and direct Langflow users) print(f"[DEBUG] Attempting to fetch Langflow history for user: {user_id}") - langflow_history = await langflow_history_service.get_user_conversation_history(user_id) + langflow_history = await langflow_history_service.get_user_conversation_history(user_id, flow_id=FLOW_ID) if langflow_history.get("conversations"): for conversation in langflow_history["conversations"]: From 3b26a6b60041be1b55ad08413b3f7de6dd541795 Mon Sep 17 00:00:00 2001 From: cristhianzl Date: Thu, 4 Sep 2025 15:56:22 -0300 Subject: [PATCH 11/32] =?UTF-8?q?=F0=9F=94=A5=20(navigation.tsx):=20remove?= =?UTF-8?q?=20unused=20code=20and=20streamline=20navigation=20component=20?= =?UTF-8?q?for=20better=20performance=20and=20readability?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- frontend/src/components/navigation.tsx | 230 ------------------------- 1 file changed, 230 deletions(-) delete mode 100644 frontend/src/components/navigation.tsx diff --git a/frontend/src/components/navigation.tsx b/frontend/src/components/navigation.tsx deleted file mode 100644 index 30ac9a4b..00000000 --- a/frontend/src/components/navigation.tsx +++ /dev/null @@ -1,230 +0,0 @@ -"use client" - -import { useState, useEffect, useRef } from "react" -import { useRouter, usePathname } from "next/navigation" -import { Button } from "@/components/ui/button" -import { Plus, MessageSquare, Database, Settings, GitBranch } from "lucide-react" -import { useChat } from "@/contexts/chat-context" -import { useAuth } from "@/contexts/auth-context" - -interface Conversation { - id: string - title: string - endpoint: string - last_activity: string - created_at: string - response_id: string - messages?: Array<{ - role: string - content: string - timestamp?: string - response_id?: string - }> -} - -export function Navigation() { - const router = useRouter() - const pathname = usePathname() - const { user } = useAuth() - const { - refreshTrigger, - refreshTriggerSilent, - loadConversation, - startNewConversation, - currentConversationId, - placeholderConversation, - } = useChat() - - const [conversations, setConversations] = useState([]) - const [loading, setLoading] = useState(false) - - // Load conversations from backend - const loadConversations = async () => { - if (!user) return - - try { - setLoading(true) - const response = await fetch("/api/conversations") - if (response.ok) { - const data = await response.json() - setConversations(data.conversations || []) - } - } catch (error) { - console.error("Failed to load conversations:", error) - } finally { - setLoading(false) - } - } - - // Load conversations on mount and when refreshTrigger changes (with loading state) - useEffect(() => { - loadConversations() - }, [refreshTrigger, user]) - - // Silent refresh - update data without loading state - useEffect(() => { - const loadSilent = async () => { - if (!user) return - - try { - // Don't show loading state for silent refresh - const response = await fetch("/api/conversations") - if (response.ok) { - const data = await response.json() - setConversations(data.conversations || []) - } - } catch (error) { - console.error("Silent conversation refresh failed:", error) - } - } - - // Only do silent refresh if we have a silent trigger change (not initial load) - if (refreshTriggerSilent > 0) { - loadSilent() - } - }, [refreshTriggerSilent, user]) - - const handleNewConversation = () => { - startNewConversation() - // Dispatch custom event to notify chat page - window.dispatchEvent(new CustomEvent('newConversation')) - router.push('/chat') - } - - const handleConversationClick = async (conversation: Conversation) => { - try { - // Load full conversation data from backend - const response = await fetch(`/api/conversations/${conversation.response_id}`) - if (response.ok) { - const fullConversation = await response.json() - loadConversation(fullConversation) - router.push('/chat') - } - } catch (error) { - console.error("Failed to load conversation:", error) - } - } - - const formatRelativeTime = (timestamp: string) => { - const date = new Date(timestamp) - const now = new Date() - const diffMs = now.getTime() - date.getTime() - const diffHours = Math.floor(diffMs / (1000 * 60 * 60)) - const diffDays = Math.floor(diffHours / 24) - - if (diffDays > 0) { - return `${diffDays}d ago` - } else if (diffHours > 0) { - return `${diffHours}h ago` - } else { - return 'Just now' - } - } - - return ( - - ) -} \ No newline at end of file From c87877bb8023753614c5c1de855b145cb6544e2f Mon Sep 17 00:00:00 2001 From: cristhianzl Date: Thu, 4 Sep 2025 16:26:31 -0300 Subject: [PATCH 12/32] =?UTF-8?q?=E2=9C=A8=20(agent.py):=20Improve=20user?= =?UTF-8?q?=20ID=20validation=20by=20checking=20if=20it=20is=20a=20Google?= =?UTF-8?q?=20numeric=20ID=20or=20a=20Langflow=20UUID=20format=20=E2=99=BB?= =?UTF-8?q?=EF=B8=8F=20(langflow=5Fhistory=5Fservice.py,=20user=5Fbinding?= =?UTF-8?q?=5Fservice.py):=20Refactor=20UUID=20format=20check=20to=20use?= =?UTF-8?q?=20a=20more=20descriptive=20and=20clear=20logic=20based=20on=20?= =?UTF-8?q?user=20ID=20type?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/agent.py | 16 ++++------------ src/services/langflow_history_service.py | 8 +++----- src/services/user_binding_service.py | 8 +++----- 3 files changed, 10 insertions(+), 22 deletions(-) diff --git a/src/agent.py b/src/agent.py index b8816291..07fd911e 100644 --- a/src/agent.py +++ b/src/agent.py @@ -418,12 +418,8 @@ async def async_langflow_chat( from services.session_ownership_service import session_ownership_service from services.user_binding_service import user_binding_service - # Check if this is a Google user (has binding but not UUID format) - import re - uuid_pattern = r'^[0-9a-f]{8}-?[0-9a-f]{4}-?[0-9a-f]{4}-?[0-9a-f]{4}-?[0-9a-f]{12}$' - is_uuid = bool(re.match(uuid_pattern, user_id.lower().replace('-', ''))) - - if not is_uuid and user_binding_service.has_binding(user_id): + # Check if this is a Google user (Google IDs are numeric, Langflow IDs are UUID) + if user_id.isdigit() and user_binding_service.has_binding(user_id): langflow_user_id = user_binding_service.get_langflow_user_id(user_id) if langflow_user_id: session_ownership_service.claim_session(user_id, response_id, langflow_user_id) @@ -511,12 +507,8 @@ async def async_langflow_chat_stream( from services.session_ownership_service import session_ownership_service from services.user_binding_service import user_binding_service - # Check if this is a Google user (has binding but not UUID format) - import re - uuid_pattern = r'^[0-9a-f]{8}-?[0-9a-f]{4}-?[0-9a-f]{4}-?[0-9a-f]{4}-?[0-9a-f]{12}$' - is_uuid = bool(re.match(uuid_pattern, user_id.lower().replace('-', ''))) - - if not is_uuid and user_binding_service.has_binding(user_id): + # Check if this is a Google user (Google IDs are numeric, Langflow IDs are UUID) + if user_id.isdigit() and user_binding_service.has_binding(user_id): langflow_user_id = user_binding_service.get_langflow_user_id(user_id) if langflow_user_id: session_ownership_service.claim_session(user_id, response_id, langflow_user_id) diff --git a/src/services/langflow_history_service.py b/src/services/langflow_history_service.py index e6e49f4d..ad17a238 100644 --- a/src/services/langflow_history_service.py +++ b/src/services/langflow_history_service.py @@ -44,11 +44,9 @@ class LangflowHistoryService: return None def _is_uuid_format(self, user_id: str) -> bool: - """Check if string looks like a UUID (Langflow user ID format)""" - import re - # Basic UUID pattern check (with or without dashes) - uuid_pattern = r'^[0-9a-f]{8}-?[0-9a-f]{4}-?[0-9a-f]{4}-?[0-9a-f]{4}-?[0-9a-f]{12}$' - return bool(re.match(uuid_pattern, user_id.lower().replace('-', ''))) + """Check if string looks like a UUID (Langflow user ID format vs Google numeric ID)""" + # Langflow IDs are UUID v4, Google IDs are purely numeric + return not user_id.isdigit() def _filter_sessions_by_ownership(self, session_ids: List[str], user_id: str, langflow_user_id: str) -> List[str]: """Filter sessions based on user type and ownership""" diff --git a/src/services/user_binding_service.py b/src/services/user_binding_service.py index 4cead0aa..b7bbe905 100644 --- a/src/services/user_binding_service.py +++ b/src/services/user_binding_service.py @@ -237,11 +237,9 @@ class UserBindingService: return self.bindings.copy() def is_langflow_user_id(self, user_id: str) -> bool: - """Check if user_id appears to be a Langflow UUID""" - import re - # Basic UUID pattern check (with or without dashes) - uuid_pattern = r'^[0-9a-f]{8}-?[0-9a-f]{4}-?[0-9a-f]{4}-?[0-9a-f]{4}-?[0-9a-f]{12}$' - return bool(re.match(uuid_pattern, user_id.lower().replace('-', ''))) + """Check if user_id appears to be a Langflow UUID (vs Google numeric ID)""" + # Langflow IDs are UUID v4, Google IDs are purely numeric + return not user_id.isdigit() def get_user_type(self, user_id: str) -> str: """Determine user type: 'google_oauth', 'langflow_direct', or 'unknown'""" From 0ed98cb6e1e355f98172ebc3e765bd9180b8cd22 Mon Sep 17 00:00:00 2001 From: Mike Fortman Date: Thu, 4 Sep 2025 21:37:30 -0500 Subject: [PATCH 13/32] move to stand alone page --- frontend/components/knowledge-dropdown.tsx | 109 ++++-- frontend/src/app/connectors/page.tsx | 66 +++- frontend/src/app/settings/page.tsx | 61 +--- frontend/src/app/upload/[provider]/page.tsx | 307 +++++++++++++++++ .../components/cloud-connectors-dialog.tsx | 299 ++++++++++++++++ .../components/cloud-connectors-dropdown.tsx | 77 +++++ .../src/components/google-drive-picker.tsx | 22 +- frontend/src/components/onedrive-picker.tsx | 322 ++++++++++++++++++ src/api/connectors.py | 38 +-- 9 files changed, 1201 insertions(+), 100 deletions(-) create mode 100644 frontend/src/app/upload/[provider]/page.tsx create mode 100644 frontend/src/components/cloud-connectors-dialog.tsx create mode 100644 frontend/src/components/cloud-connectors-dropdown.tsx create mode 100644 frontend/src/components/onedrive-picker.tsx diff --git a/frontend/components/knowledge-dropdown.tsx b/frontend/components/knowledge-dropdown.tsx index e73db5e9..75591087 100644 --- a/frontend/components/knowledge-dropdown.tsx +++ b/frontend/components/knowledge-dropdown.tsx @@ -1,7 +1,6 @@ "use client" import { useState, useEffect, useRef } from "react" -import { useRouter } from "next/navigation" import { ChevronDown, Upload, FolderOpen, Cloud, PlugZap, Plus } from "lucide-react" import { Button } from "@/components/ui/button" import { Dialog, DialogContent, DialogDescription, DialogHeader, DialogTitle } from "@/components/ui/dialog" @@ -9,6 +8,7 @@ import { Input } from "@/components/ui/input" import { Label } from "@/components/ui/label" import { cn } from "@/lib/utils" import { useTask } from "@/contexts/task-context" +import { useRouter } from "next/navigation" interface KnowledgeDropdownProps { active?: boolean @@ -16,8 +16,8 @@ interface KnowledgeDropdownProps { } export function KnowledgeDropdown({ active, variant = 'navigation' }: KnowledgeDropdownProps) { - const router = useRouter() const { addTask } = useTask() + const router = useRouter() const [isOpen, setIsOpen] = useState(false) const [showFolderDialog, setShowFolderDialog] = useState(false) const [showS3Dialog, setShowS3Dialog] = useState(false) @@ -27,23 +27,76 @@ export function KnowledgeDropdown({ active, variant = 'navigation' }: KnowledgeD const [folderLoading, setFolderLoading] = useState(false) const [s3Loading, setS3Loading] = useState(false) const [fileUploading, setFileUploading] = useState(false) + const [cloudConnectors, setCloudConnectors] = useState<{[key: string]: {name: string, available: boolean, connected: boolean, hasToken: boolean}}>({}) const fileInputRef = useRef(null) const dropdownRef = useRef(null) - // Check AWS availability on mount + // Check AWS availability and cloud connectors on mount useEffect(() => { - const checkAws = async () => { + const checkAvailability = async () => { try { - const res = await fetch("/api/upload_options") - if (res.ok) { - const data = await res.json() - setAwsEnabled(Boolean(data.aws)) + // Check AWS + const awsRes = await fetch("/api/upload_options") + if (awsRes.ok) { + const awsData = await awsRes.json() + setAwsEnabled(Boolean(awsData.aws)) + } + + // Check cloud connectors + const connectorsRes = await fetch('/api/connectors') + if (connectorsRes.ok) { + const connectorsResult = await connectorsRes.json() + const cloudConnectorTypes = ['google_drive', 'onedrive', 'sharepoint'] + const connectorInfo: {[key: string]: {name: string, available: boolean, connected: boolean, hasToken: boolean}} = {} + + for (const type of cloudConnectorTypes) { + if (connectorsResult.connectors[type]) { + connectorInfo[type] = { + name: connectorsResult.connectors[type].name, + available: connectorsResult.connectors[type].available, + connected: false, + hasToken: false + } + + // Check connection status + try { + const statusRes = await fetch(`/api/connectors/${type}/status`) + if (statusRes.ok) { + const statusData = await statusRes.json() + const connections = statusData.connections || [] + const activeConnection = connections.find((conn: {is_active: boolean, connection_id: string}) => conn.is_active) + const isConnected = activeConnection !== undefined + + if (isConnected && activeConnection) { + connectorInfo[type].connected = true + + // Check token availability + try { + const tokenRes = await fetch(`/api/connectors/${type}/token?connection_id=${activeConnection.connection_id}`) + if (tokenRes.ok) { + const tokenData = await tokenRes.json() + if (tokenData.access_token) { + connectorInfo[type].hasToken = true + } + } + } catch { + // Token check failed + } + } + } + } catch { + // Status check failed + } + } + } + + setCloudConnectors(connectorInfo) } } catch (err) { - console.error("Failed to check AWS availability", err) + console.error("Failed to check availability", err) } } - checkAws() + checkAvailability() }, []) // Handle click outside to close dropdown @@ -194,6 +247,25 @@ export function KnowledgeDropdown({ active, variant = 'navigation' }: KnowledgeD } } + const cloudConnectorItems = Object.entries(cloudConnectors) + .filter(([, info]) => info.available) + .map(([type, info]) => ({ + label: info.name, + icon: PlugZap, + onClick: () => { + setIsOpen(false) + if (info.connected && info.hasToken) { + router.push(`/upload/${type}`) + } else { + router.push('/settings') + } + }, + disabled: !info.connected || !info.hasToken, + tooltip: !info.connected ? `Connect ${info.name} in Settings first` : + !info.hasToken ? `Reconnect ${info.name} - access token required` : + undefined + })) + const menuItems = [ { label: "Add File", @@ -216,14 +288,7 @@ export function KnowledgeDropdown({ active, variant = 'navigation' }: KnowledgeD setShowS3Dialog(true) } }] : []), - { - label: "Cloud Connectors", - icon: PlugZap, - onClick: () => { - setIsOpen(false) - router.push("/settings") - } - } + ...cloudConnectorItems ] return ( @@ -265,7 +330,12 @@ export function KnowledgeDropdown({ active, variant = 'navigation' }: KnowledgeD @@ -364,6 +434,7 @@ export function KnowledgeDropdown({ active, variant = 'navigation' }: KnowledgeD
+ ) } \ No newline at end of file diff --git a/frontend/src/app/connectors/page.tsx b/frontend/src/app/connectors/page.tsx index 432d5d0d..9bf206d7 100644 --- a/frontend/src/app/connectors/page.tsx +++ b/frontend/src/app/connectors/page.tsx @@ -1,14 +1,60 @@ +"use client" + import React, { useState } from "react"; -import { GoogleDrivePicker, type DriveSelection } from "./GoogleDrivePicker" +import { GoogleDrivePicker } from "@/components/google-drive-picker" -const [driveSelection, setDriveSelection] = useState({ files: [], folders: [] }); +interface GoogleDriveFile { + id: string; + name: string; + mimeType: string; + webViewLink?: string; + iconLink?: string; +} -// in JSX - +export default function ConnectorsPage() { + const [selectedFiles, setSelectedFiles] = useState([]); -// when calling sync: -const body: { file_ids: string[]; folder_ids: string[]; recursive: boolean } = { - file_ids: driveSelection.files, - folder_ids: driveSelection.folders, - recursive: true, -}; + const handleFileSelection = (files: GoogleDriveFile[]) => { + setSelectedFiles(files); + }; + + const handleSync = () => { + const fileIds = selectedFiles.map(file => file.id); + const body = { + file_ids: fileIds, + folder_ids: [], // Add folder handling if needed + recursive: true, + }; + + console.log('Syncing with:', body); + }; + + return ( +
+

Connectors

+ +
+

+ This is a demo page for the Google Drive picker component. + For full connector functionality, visit the Settings page. +

+ + +
+ + {selectedFiles.length > 0 && ( + + )} +
+ ); +} diff --git a/frontend/src/app/settings/page.tsx b/frontend/src/app/settings/page.tsx index a1eea2a7..711f43e3 100644 --- a/frontend/src/app/settings/page.tsx +++ b/frontend/src/app/settings/page.tsx @@ -12,7 +12,6 @@ import { Loader2, PlugZap, RefreshCw } from "lucide-react" import { ProtectedRoute } from "@/components/protected-route" import { useTask } from "@/contexts/task-context" import { useAuth } from "@/contexts/auth-context" -import { GoogleDrivePicker } from "@/components/google-drive-picker" interface GoogleDriveFile { @@ -23,6 +22,17 @@ interface GoogleDriveFile { iconLink?: string } +interface OneDriveFile { + id: string + name: string + mimeType?: string + webUrl?: string + driveItem?: { + file?: { mimeType: string } + folder?: any + } +} + interface Connector { id: string name: string @@ -32,7 +42,7 @@ interface Connector { type: string connectionId?: string access_token?: string - selectedFiles?: GoogleDriveFile[] + selectedFiles?: GoogleDriveFile[] | OneDriveFile[] } interface SyncResult { @@ -63,8 +73,6 @@ function KnowledgeSourcesPage() { const [syncResults, setSyncResults] = useState<{[key: string]: SyncResult | null}>({}) const [maxFiles, setMaxFiles] = useState(10) const [syncAllFiles, setSyncAllFiles] = useState(false) - const [selectedFiles, setSelectedFiles] = useState<{[connectorId: string]: GoogleDriveFile[]}>({}) - const [connectorAccessTokens, setConnectorAccessTokens] = useState<{[connectorId: string]: string}>({}) // Settings state // Note: backend internal Langflow URL is not needed on the frontend @@ -155,23 +163,6 @@ function KnowledgeSourcesPage() { const activeConnection = connections.find((conn: Connection) => conn.is_active) const isConnected = activeConnection !== undefined - // For Google Drive, try to get access token for the picker - if (connectorType === 'google_drive' && activeConnection) { - try { - const tokenResponse = await fetch(`/api/connectors/${connectorType}/token?connection_id=${activeConnection.connection_id}`) - if (tokenResponse.ok) { - const tokenData = await tokenResponse.json() - if (tokenData.access_token) { - setConnectorAccessTokens(prev => ({ - ...prev, - [connectorType]: tokenData.access_token - })) - } - } - } catch (e) { - console.log('Could not fetch access token for Google Drive picker:', e) - } - } setConnectors(prev => prev.map(c => c.type === connectorType @@ -238,19 +229,6 @@ function KnowledgeSourcesPage() { } } - const handleFileSelection = (connectorId: string, files: GoogleDriveFile[]) => { - setSelectedFiles(prev => ({ - ...prev, - [connectorId]: files - })) - - // Update the connector with selected files - setConnectors(prev => prev.map(c => - c.id === connectorId - ? { ...c, selectedFiles: files } - : c - )) - } const handleSync = async (connector: Connector) => { if (!connector.connectionId) return @@ -268,10 +246,7 @@ function KnowledgeSourcesPage() { max_files: syncAllFiles ? 0 : (maxFiles || undefined) } - // Add selected files for Google Drive - if (connector.type === "google_drive" && selectedFiles[connector.id]?.length > 0) { - syncBody.selected_files = selectedFiles[connector.id].map(file => file.id) - } + // Note: File selection is now handled via the cloud connectors dialog const response = await fetch(`/api/connectors/${connector.type}/sync`, { method: 'POST', @@ -488,16 +463,6 @@ function KnowledgeSourcesPage() { {connector.status === "connected" ? (
- {/* Google Drive file picker */} - {connector.type === "google_drive" && ( - handleFileSelection(connector.id, files)} - selectedFiles={selectedFiles[connector.id] || []} - isAuthenticated={connector.status === "connected"} - accessToken={connectorAccessTokens[connector.type]} - /> - )} - +
+ +
+
+ +

Provider Not Available

+

{error}

+ +
+
+
+ ) + } + + if (connector.status !== "connected") { + return ( +
+
+ +
+ +
+
+ +

{connector.name} Not Connected

+

+ You need to connect your {connector.name} account before you can select files. +

+ +
+
+
+ ) + } + + if (!connector.hasAccessToken) { + return ( +
+
+ +
+ +
+
+ +

Access Token Required

+

+ {connector.accessTokenError || `Unable to get access token for ${connector.name}. Try reconnecting your account.`} +

+ +
+
+
+ ) + } + + return ( +
+
+ + +
+

Select Files from {connector.name}

+

+ Choose specific files from your {connector.name} account to add to your knowledge base. +

+
+
+ +
+ {connector.type === "google_drive" && ( + + )} + + {(connector.type === "onedrive" || connector.type === "sharepoint") && ( + + )} +
+ + {selectedFiles.length > 0 && ( +
+ + +
+ )} +
+ ) +} \ No newline at end of file diff --git a/frontend/src/components/cloud-connectors-dialog.tsx b/frontend/src/components/cloud-connectors-dialog.tsx new file mode 100644 index 00000000..a9fefbd1 --- /dev/null +++ b/frontend/src/components/cloud-connectors-dialog.tsx @@ -0,0 +1,299 @@ +"use client" + +import { useState, useEffect, useCallback } from "react" +import { Button } from "@/components/ui/button" +import { Badge } from "@/components/ui/badge" +import { + Dialog, + DialogContent, + DialogDescription, + DialogHeader, + DialogTitle, +} from "@/components/ui/dialog" +import { GoogleDrivePicker } from "@/components/google-drive-picker" +import { OneDrivePicker } from "@/components/onedrive-picker" +import { Loader2 } from "lucide-react" + +interface GoogleDriveFile { + id: string + name: string + mimeType: string + webViewLink?: string + iconLink?: string +} + +interface OneDriveFile { + id: string + name: string + mimeType?: string + webUrl?: string + driveItem?: { + file?: { mimeType: string } + folder?: any + } +} + +interface CloudConnector { + id: string + name: string + description: string + icon: React.ReactNode + status: "not_connected" | "connecting" | "connected" | "error" + type: string + connectionId?: string + hasAccessToken: boolean + accessTokenError?: string +} + +interface CloudConnectorsDialogProps { + isOpen: boolean + onOpenChange: (open: boolean) => void + onFileSelected?: (files: GoogleDriveFile[] | OneDriveFile[], connectorType: string) => void +} + +export function CloudConnectorsDialog({ + isOpen, + onOpenChange, + onFileSelected +}: CloudConnectorsDialogProps) { + const [connectors, setConnectors] = useState([]) + const [isLoading, setIsLoading] = useState(true) + const [selectedFiles, setSelectedFiles] = useState<{[connectorId: string]: GoogleDriveFile[] | OneDriveFile[]}>({}) + const [connectorAccessTokens, setConnectorAccessTokens] = useState<{[connectorType: string]: string}>({}) + const [activePickerType, setActivePickerType] = useState(null) + const [isGooglePickerOpen, setIsGooglePickerOpen] = useState(false) + + const getConnectorIcon = (iconName: string) => { + const iconMap: { [key: string]: React.ReactElement } = { + 'google-drive': ( +
+ G +
+ ), + 'sharepoint': ( +
+ SP +
+ ), + 'onedrive': ( +
+ OD +
+ ), + } + return iconMap[iconName] || ( +
+ ? +
+ ) + } + + const fetchConnectorStatuses = useCallback(async () => { + if (!isOpen) return + + setIsLoading(true) + try { + // Fetch available connectors from backend + const connectorsResponse = await fetch('/api/connectors') + if (!connectorsResponse.ok) { + throw new Error('Failed to load connectors') + } + + const connectorsResult = await connectorsResponse.json() + const connectorTypes = Object.keys(connectorsResult.connectors) + + // Filter to only cloud connectors + const cloudConnectorTypes = connectorTypes.filter(type => + ['google_drive', 'onedrive', 'sharepoint'].includes(type) && + connectorsResult.connectors[type].available + ) + + // Initialize connectors list + const initialConnectors = cloudConnectorTypes.map(type => ({ + id: type, + name: connectorsResult.connectors[type].name, + description: connectorsResult.connectors[type].description, + icon: getConnectorIcon(connectorsResult.connectors[type].icon), + status: "not_connected" as const, + type: type, + hasAccessToken: false, + accessTokenError: undefined + })) + + setConnectors(initialConnectors) + + // Check status for each cloud connector type + for (const connectorType of cloudConnectorTypes) { + try { + const response = await fetch(`/api/connectors/${connectorType}/status`) + if (response.ok) { + const data = await response.json() + const connections = data.connections || [] + const activeConnection = connections.find((conn: any) => conn.is_active) + const isConnected = activeConnection !== undefined + + let hasAccessToken = false + let accessTokenError: string | undefined = undefined + + // Try to get access token for connected connectors + if (isConnected && activeConnection) { + try { + const tokenResponse = await fetch(`/api/connectors/${connectorType}/token?connection_id=${activeConnection.connection_id}`) + if (tokenResponse.ok) { + const tokenData = await tokenResponse.json() + if (tokenData.access_token) { + hasAccessToken = true + setConnectorAccessTokens(prev => ({ + ...prev, + [connectorType]: tokenData.access_token + })) + } + } else { + const errorData = await tokenResponse.json().catch(() => ({ error: 'Token unavailable' })) + accessTokenError = errorData.error || 'Access token unavailable' + } + } catch (e) { + accessTokenError = 'Failed to fetch access token' + } + } + + setConnectors(prev => prev.map(c => + c.type === connectorType + ? { + ...c, + status: isConnected ? "connected" : "not_connected", + connectionId: activeConnection?.connection_id, + hasAccessToken, + accessTokenError + } + : c + )) + } + } catch (error) { + console.error(`Failed to check status for ${connectorType}:`, error) + } + } + } catch (error) { + console.error('Failed to load cloud connectors:', error) + } finally { + setIsLoading(false) + } + }, [isOpen]) + + const handleFileSelection = (connectorId: string, files: GoogleDriveFile[] | OneDriveFile[]) => { + setSelectedFiles(prev => ({ + ...prev, + [connectorId]: files + })) + + onFileSelected?.(files, connectorId) + } + + useEffect(() => { + fetchConnectorStatuses() + }, [fetchConnectorStatuses]) + + + return ( + + + + Cloud File Connectors + + Select files from your connected cloud storage providers + + + +
+ {isLoading ? ( +
+ + Loading connectors... +
+ ) : connectors.length === 0 ? ( +
+ No cloud connectors available. Configure them in Settings first. +
+ ) : ( +
+ {/* Service Buttons Row */} +
+ {connectors + .filter(connector => connector.status === "connected") + .map((connector) => ( + + ))} +
+ + {connectors.every(c => c.status !== "connected") && ( +
+

No connected cloud providers found.

+

Go to Settings to connect your cloud storage accounts.

+
+ )} + + {/* Render pickers inside dialog */} + {activePickerType && connectors.find(c => c.id === activePickerType) && (() => { + const connector = connectors.find(c => c.id === activePickerType)! + + if (connector.type === "google_drive") { + return ( +
+ { + handleFileSelection(connector.id, files) + setActivePickerType(null) + setIsGooglePickerOpen(false) + }} + selectedFiles={selectedFiles[connector.id] as GoogleDriveFile[] || []} + isAuthenticated={connector.status === "connected"} + accessToken={connectorAccessTokens[connector.type]} + onPickerStateChange={setIsGooglePickerOpen} + /> +
+ ) + } + + if (connector.type === "onedrive" || connector.type === "sharepoint") { + return ( +
+ { + handleFileSelection(connector.id, files) + setActivePickerType(null) + }} + selectedFiles={selectedFiles[connector.id] as OneDriveFile[] || []} + isAuthenticated={connector.status === "connected"} + accessToken={connectorAccessTokens[connector.type]} + connectorType={connector.type as "onedrive" | "sharepoint"} + /> +
+ ) + } + + return null + })()} +
+ )} +
+
+
+ ) +} \ No newline at end of file diff --git a/frontend/src/components/cloud-connectors-dropdown.tsx b/frontend/src/components/cloud-connectors-dropdown.tsx new file mode 100644 index 00000000..1989132a --- /dev/null +++ b/frontend/src/components/cloud-connectors-dropdown.tsx @@ -0,0 +1,77 @@ +"use client" + +import { useState } from "react" +import { Button } from "@/components/ui/button" +import { + DropdownMenu, + DropdownMenuContent, + DropdownMenuItem, + DropdownMenuTrigger, +} from "@/components/ui/dropdown-menu" +import { CloudConnectorsDialog } from "@/components/cloud-connectors-dialog" +import { Cloud, ChevronDown } from "lucide-react" + +interface GoogleDriveFile { + id: string + name: string + mimeType: string + webViewLink?: string + iconLink?: string +} + +interface OneDriveFile { + id: string + name: string + mimeType?: string + webUrl?: string + driveItem?: { + file?: { mimeType: string } + folder?: any + } +} + +interface CloudConnectorsDropdownProps { + onFileSelected?: (files: GoogleDriveFile[] | OneDriveFile[], connectorType: string) => void + buttonText?: string + variant?: "default" | "outline" | "secondary" | "ghost" | "link" | "destructive" + size?: "default" | "sm" | "lg" | "icon" +} + +export function CloudConnectorsDropdown({ + onFileSelected, + buttonText = "Cloud Files", + variant = "outline", + size = "default" +}: CloudConnectorsDropdownProps) { + const [isDialogOpen, setIsDialogOpen] = useState(false) + + const handleOpenDialog = () => { + setIsDialogOpen(true) + } + + return ( + <> + + + + + + + + Select Cloud Files + + + + + + + ) +} \ No newline at end of file diff --git a/frontend/src/components/google-drive-picker.tsx b/frontend/src/components/google-drive-picker.tsx index e3a9555b..f173f05f 100644 --- a/frontend/src/components/google-drive-picker.tsx +++ b/frontend/src/components/google-drive-picker.tsx @@ -10,6 +10,7 @@ interface GoogleDrivePickerProps { selectedFiles?: GoogleDriveFile[] isAuthenticated: boolean accessToken?: string + onPickerStateChange?: (isOpen: boolean) => void } interface GoogleDriveFile { @@ -88,7 +89,8 @@ export function GoogleDrivePicker({ onFileSelected, selectedFiles = [], isAuthenticated, - accessToken + accessToken, + onPickerStateChange }: GoogleDrivePickerProps) { const [isPickerLoaded, setIsPickerLoaded] = useState(false) const [isPickerOpen, setIsPickerOpen] = useState(false) @@ -131,6 +133,7 @@ export function GoogleDrivePicker({ } }, []) + const openPicker = () => { if (!isPickerLoaded || !accessToken || !window.google?.picker) { return @@ -138,7 +141,9 @@ export function GoogleDrivePicker({ try { setIsPickerOpen(true) + onPickerStateChange?.(true) + // Create picker with higher z-index and focus handling const picker = new window.google.picker.PickerBuilder() .addView(window.google.picker.ViewId.DOCS) .addView(window.google.picker.ViewId.FOLDERS) @@ -149,9 +154,23 @@ export function GoogleDrivePicker({ .build() picker.setVisible(true) + + // Apply z-index fix after a short delay to ensure picker is rendered + setTimeout(() => { + const pickerElements = document.querySelectorAll('.picker-dialog, .goog-modalpopup') + pickerElements.forEach(el => { + (el as HTMLElement).style.zIndex = '10000' + }) + const bgElements = document.querySelectorAll('.picker-dialog-bg, .goog-modalpopup-bg') + bgElements.forEach(el => { + (el as HTMLElement).style.zIndex = '9999' + }) + }, 100) + } catch (error) { console.error('Error creating picker:', error) setIsPickerOpen(false) + onPickerStateChange?.(false) } } @@ -169,6 +188,7 @@ export function GoogleDrivePicker({ } setIsPickerOpen(false) + onPickerStateChange?.(false) } const removeFile = (fileId: string) => { diff --git a/frontend/src/components/onedrive-picker.tsx b/frontend/src/components/onedrive-picker.tsx new file mode 100644 index 00000000..b40650a7 --- /dev/null +++ b/frontend/src/components/onedrive-picker.tsx @@ -0,0 +1,322 @@ +"use client" + +import { useState, useEffect } from "react" +import { Button } from "@/components/ui/button" +import { Badge } from "@/components/ui/badge" +import { FileText, Folder, X } from "lucide-react" + +interface OneDrivePickerProps { + onFileSelected: (files: OneDriveFile[]) => void + selectedFiles?: OneDriveFile[] + isAuthenticated: boolean + accessToken?: string + connectorType?: "onedrive" | "sharepoint" + onPickerStateChange?: (isOpen: boolean) => void +} + +interface OneDriveFile { + id: string + name: string + mimeType?: string + webUrl?: string + driveItem?: { + file?: { mimeType: string } + folder?: any + } +} + +interface GraphResponse { + value: OneDriveFile[] +} + +declare global { + interface Window { + mgt?: { + Providers: { + globalProvider: any + } + } + } +} + +export function OneDrivePicker({ + onFileSelected, + selectedFiles = [], + isAuthenticated, + accessToken, + connectorType = "onedrive", + onPickerStateChange +}: OneDrivePickerProps) { + const [isLoading, setIsLoading] = useState(false) + const [files, setFiles] = useState([]) + const [isPickerOpen, setIsPickerOpen] = useState(false) + const [currentPath, setCurrentPath] = useState( + connectorType === "sharepoint" ? 'sites?search=' : 'me/drive/root/children' + ) + const [breadcrumbs, setBreadcrumbs] = useState<{id: string, name: string}[]>([ + {id: 'root', name: connectorType === "sharepoint" ? 'SharePoint' : 'OneDrive'} + ]) + + useEffect(() => { + const loadMGT = async () => { + if (typeof window !== 'undefined' && !window.mgt) { + try { + const mgtModule = await import('@microsoft/mgt-components') + const mgtProvider = await import('@microsoft/mgt-msal2-provider') + + // Initialize provider if needed + if (!window.mgt?.Providers?.globalProvider && accessToken) { + // For simplicity, we'll use direct Graph API calls instead of MGT components + } + } catch (error) { + console.warn('MGT not available, falling back to direct API calls') + } + } + } + + loadMGT() + }, [accessToken]) + + + const fetchFiles = async (path: string = currentPath) => { + if (!accessToken) return + + setIsLoading(true) + try { + const response = await fetch(`https://graph.microsoft.com/v1.0/${path}`, { + headers: { + 'Authorization': `Bearer ${accessToken}`, + 'Content-Type': 'application/json' + } + }) + + if (response.ok) { + const data: GraphResponse = await response.json() + setFiles(data.value || []) + } else { + console.error('Failed to fetch OneDrive files:', response.statusText) + } + } catch (error) { + console.error('Error fetching OneDrive files:', error) + } finally { + setIsLoading(false) + } + } + + const openPicker = () => { + if (!accessToken) return + + setIsPickerOpen(true) + onPickerStateChange?.(true) + fetchFiles() + } + + const closePicker = () => { + setIsPickerOpen(false) + onPickerStateChange?.(false) + setFiles([]) + setCurrentPath( + connectorType === "sharepoint" ? 'sites?search=' : 'me/drive/root/children' + ) + setBreadcrumbs([ + {id: 'root', name: connectorType === "sharepoint" ? 'SharePoint' : 'OneDrive'} + ]) + } + + const handleFileClick = (file: OneDriveFile) => { + if (file.driveItem?.folder) { + // Navigate to folder + const newPath = `me/drive/items/${file.id}/children` + setCurrentPath(newPath) + setBreadcrumbs([...breadcrumbs, {id: file.id, name: file.name}]) + fetchFiles(newPath) + } else { + // Select file + const isAlreadySelected = selectedFiles.some(f => f.id === file.id) + if (!isAlreadySelected) { + onFileSelected([...selectedFiles, file]) + } + } + } + + const navigateToBreadcrumb = (index: number) => { + if (index === 0) { + setCurrentPath('me/drive/root/children') + setBreadcrumbs([{id: 'root', name: 'OneDrive'}]) + fetchFiles('me/drive/root/children') + } else { + const targetCrumb = breadcrumbs[index] + const newPath = `me/drive/items/${targetCrumb.id}/children` + setCurrentPath(newPath) + setBreadcrumbs(breadcrumbs.slice(0, index + 1)) + fetchFiles(newPath) + } + } + + const removeFile = (fileId: string) => { + const updatedFiles = selectedFiles.filter(file => file.id !== fileId) + onFileSelected(updatedFiles) + } + + const getFileIcon = (file: OneDriveFile) => { + if (file.driveItem?.folder) { + return + } + return + } + + const getMimeTypeLabel = (file: OneDriveFile) => { + const mimeType = file.driveItem?.file?.mimeType || file.mimeType || '' + const typeMap: { [key: string]: string } = { + 'application/vnd.openxmlformats-officedocument.wordprocessingml.document': 'Word Doc', + 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet': 'Excel', + 'application/vnd.openxmlformats-officedocument.presentationml.presentation': 'PowerPoint', + 'application/pdf': 'PDF', + 'text/plain': 'Text', + 'image/jpeg': 'Image', + 'image/png': 'Image', + } + + if (file.driveItem?.folder) return 'Folder' + return typeMap[mimeType] || 'Document' + } + + const serviceName = connectorType === "sharepoint" ? "SharePoint" : "OneDrive" + + if (!isAuthenticated) { + return ( +
+ Please connect to {serviceName} first to select specific files. +
+ ) + } + + return ( +
+
+
+

{serviceName} File Selection

+

+ Choose specific files to sync instead of syncing everything +

+
+ +
+ + {/* Status message when access token is missing */} + {isAuthenticated && !accessToken && ( +
+
Access token unavailable
+
The file picker requires an access token. Try disconnecting and reconnecting your {serviceName} account.
+
+ )} + + {/* File Picker Modal */} + {isPickerOpen && ( +
+
+
+

Select Files from {serviceName}

+ +
+ + {/* Breadcrumbs */} +
+ {breadcrumbs.map((crumb, index) => ( +
+ {index > 0 && /} + +
+ ))} +
+ + {/* File List */} +
+ {isLoading ? ( +
Loading...
+ ) : files.length === 0 ? ( +
No files found
+ ) : ( +
+ {files.map((file) => ( +
handleFileClick(file)} + > +
+ {getFileIcon(file)} + {file.name} + + {getMimeTypeLabel(file)} + +
+ {selectedFiles.some(f => f.id === file.id) && ( + Selected + )} +
+ ))} +
+ )} +
+
+
+ )} + + {selectedFiles.length > 0 && ( +
+

+ Selected files ({selectedFiles.length}): +

+
+ {selectedFiles.map((file) => ( +
+
+ {getFileIcon(file)} + {file.name} + + {getMimeTypeLabel(file)} + +
+ +
+ ))} +
+ +
+ )} +
+ ) +} \ No newline at end of file diff --git a/src/api/connectors.py b/src/api/connectors.py index 426c32ed..d9f76196 100644 --- a/src/api/connectors.py +++ b/src/api/connectors.py @@ -24,11 +24,8 @@ async def connector_sync(request: Request, connector_service, session_manager): max_files = data.get("max_files") try: - logger.debug("Starting connector sync", connector_type=connector_type, max_files=max_files) - user = request.state.user jwt_token = request.state.jwt_token - logger.debug("User authenticated", user_id=user.user_id) # Get all active connections for this connector type and user connections = await connector_service.connection_manager.list_connections( @@ -45,18 +42,15 @@ async def connector_sync(request: Request, connector_service, session_manager): # Start sync tasks for all active connections task_ids = [] for connection in active_connections: - logger.debug("About to call sync_connector_files for connection", connection_id=connection.connection_id) task_id = await connector_service.sync_connector_files( connection.connection_id, user.user_id, max_files, jwt_token=jwt_token, - # NEW: thread picker selections through selected_files=data.get("selected_files"), selected_folders=data.get("selected_folders"), ) task_ids.append(task_id) - logger.debug("Got task ID", task_id=task_id) return JSONResponse( { @@ -69,14 +63,7 @@ async def connector_sync(request: Request, connector_service, session_manager): ) except Exception as e: - import sys - import traceback - - error_msg = f"[ERROR] Connector sync failed: {str(e)}" - logger.error(error_msg) - traceback.print_exc(file=sys.stderr) - sys.stderr.flush() - + logger.error("Connector sync failed", error=str(e)) return JSONResponse({"error": f"Sync failed: {str(e)}"}, status_code=500) @@ -247,9 +234,6 @@ async def connector_webhook(request: Request, connector_service, session_manager except Exception as e: logger.error("Failed to process webhook for connection", connection_id=connection.connection_id, error=str(e)) - import traceback - - traceback.print_exc() return JSONResponse( { "status": "error", @@ -261,10 +245,7 @@ async def connector_webhook(request: Request, connector_service, session_manager ) except Exception as e: - import traceback - logger.error("Webhook processing failed", error=str(e)) - traceback.print_exc() return JSONResponse( {"error": f"Webhook processing failed: {str(e)}"}, status_code=500 ) @@ -288,7 +269,7 @@ async def connector_token(request: Request, connector_service, session_manager): # Get the connector instance connector = await connector_service._get_connector(connection_id) if not connector: - return JSONResponse({"error": "Connector not available"}, status_code=404) + return JSONResponse({"error": f"Connector not available - authentication may have failed for {connector_type}"}, status_code=404) # For Google Drive, get the access token if connector_type == "google_drive" and hasattr(connector, 'oauth'): @@ -301,9 +282,22 @@ async def connector_token(request: Request, connector_service, session_manager): }) else: return JSONResponse({"error": "Invalid or expired credentials"}, status_code=401) + + # For OneDrive and SharePoint, get the access token + elif connector_type in ["onedrive", "sharepoint"] and hasattr(connector, 'oauth'): + try: + access_token = connector.oauth.get_access_token() + return JSONResponse({ + "access_token": access_token, + "expires_in": None # MSAL handles token expiry internally + }) + except ValueError as e: + return JSONResponse({"error": f"Failed to get access token: {str(e)}"}, status_code=401) + except Exception as e: + return JSONResponse({"error": f"Authentication error: {str(e)}"}, status_code=500) return JSONResponse({"error": "Token not available for this connector type"}, status_code=400) except Exception as e: - print(f"Error getting connector token: {e}") + logger.error("Error getting connector token", error=str(e)) return JSONResponse({"error": str(e)}, status_code=500) From 54ace16803e73467c1b013f60357ddec1ff35d55 Mon Sep 17 00:00:00 2001 From: phact Date: Fri, 5 Sep 2025 12:09:45 -0400 Subject: [PATCH 14/32] fix warmup logger --- warm_up_docling.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/warm_up_docling.py b/warm_up_docling.py index 7e865ae4..272768ce 100644 --- a/warm_up_docling.py +++ b/warm_up_docling.py @@ -1,16 +1,17 @@ from docling.document_converter import DocumentConverter -from src.utils.logging_config import get_logger +import logging -logger = get_logger(__name__) +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) logger.info("Warming up docling models") try: # Use the sample document to warm up docling test_file = "/app/warmup_ocr.pdf" - logger.info("Using test file to warm up docling", test_file=test_file) + logger.info(f"Using test file to warm up docling: {test_file}") DocumentConverter().convert(test_file) logger.info("Docling models warmed up successfully") except Exception as e: - logger.info("Docling warm-up completed with exception", error=str(e)) + logger.info(f"Docling warm-up completed with exception: {str(e)}") # This is expected - we just want to trigger the model downloads From 796fd28b8fe13293483d44b12eb979f788ae3ed6 Mon Sep 17 00:00:00 2001 From: Edwin Jose Date: Fri, 5 Sep 2025 12:14:33 -0400 Subject: [PATCH 15/32] Enhance task management to support shared tasks for anonymous users. Updated `get_task_status` and `get_all_tasks` methods to include fallback to tasks stored under the "anonymous" user key. Improved task cancellation logic to handle shared tasks. Refactored code for clarity and maintainability. --- src/services/task_service.py | 81 ++++++++++++++++++++++++++---------- 1 file changed, 60 insertions(+), 21 deletions(-) diff --git a/src/services/task_service.py b/src/services/task_service.py index bd2a73d7..257ae2ce 100644 --- a/src/services/task_service.py +++ b/src/services/task_service.py @@ -2,9 +2,10 @@ import asyncio import uuid import time import random -from typing import Dict +from typing import Dict, Optional from models.tasks import TaskStatus, UploadTask, FileTask +from session_manager import AnonymousUser from src.utils.gpu_detection import get_worker_count from utils.logging_config import get_logger @@ -179,16 +180,29 @@ class TaskService: self.task_store[user_id][task_id].status = TaskStatus.FAILED self.task_store[user_id][task_id].updated_at = time.time() - def get_task_status(self, user_id: str, task_id: str) -> dict: - """Get the status of a specific upload task""" - if ( - not task_id - or user_id not in self.task_store - or task_id not in self.task_store[user_id] - ): + def get_task_status(self, user_id: str, task_id: str) -> Optional[dict]: + """Get the status of a specific upload task + + Includes fallback to shared tasks stored under the "anonymous" user key + so default system tasks are visible to all users. + """ + if not task_id: return None - upload_task = self.task_store[user_id][task_id] + # Prefer the caller's user_id; otherwise check shared/anonymous tasks + candidate_user_ids = [user_id, AnonymousUser().user_id] + + upload_task = None + for candidate_user_id in candidate_user_ids: + if ( + candidate_user_id in self.task_store + and task_id in self.task_store[candidate_user_id] + ): + upload_task = self.task_store[candidate_user_id][task_id] + break + + if upload_task is None: + return None file_statuses = {} for file_path, file_task in upload_task.file_tasks.items(): @@ -214,14 +228,21 @@ class TaskService: } def get_all_tasks(self, user_id: str) -> list: - """Get all tasks for a user""" - if user_id not in self.task_store: - return [] + """Get all tasks for a user - tasks = [] - for task_id, upload_task in self.task_store[user_id].items(): - tasks.append( - { + Returns the union of the user's own tasks and shared default tasks stored + under the "anonymous" user key. User-owned tasks take precedence + if a task_id overlaps. + """ + tasks_by_id = {} + + def add_tasks_from_store(store_user_id): + if store_user_id not in self.task_store: + return + for task_id, upload_task in self.task_store[store_user_id].items(): + if task_id in tasks_by_id: + continue + tasks_by_id[task_id] = { "task_id": upload_task.task_id, "status": upload_task.status.value, "total_files": upload_task.total_files, @@ -231,18 +252,36 @@ class TaskService: "created_at": upload_task.created_at, "updated_at": upload_task.updated_at, } - ) - # Sort by creation time, most recent first + # First, add user-owned tasks; then shared anonymous; + add_tasks_from_store(user_id) + add_tasks_from_store(AnonymousUser().user_id) + + tasks = list(tasks_by_id.values()) tasks.sort(key=lambda x: x["created_at"], reverse=True) return tasks def cancel_task(self, user_id: str, task_id: str) -> bool: - """Cancel a task if it exists and is not already completed""" - if user_id not in self.task_store or task_id not in self.task_store[user_id]: + """Cancel a task if it exists and is not already completed. + + Supports cancellation of shared default tasks stored under the anonymous user. + """ + # Try user's own tasks first, then shared anonymous tasks + candidate_user_ids = [user_id, AnonymousUser().user_id] + + store_user_id = None + for candidate_user_id in candidate_user_ids: + if ( + candidate_user_id in self.task_store + and task_id in self.task_store[candidate_user_id] + ): + store_user_id = candidate_user_id + break + + if store_user_id is None: return False - upload_task = self.task_store[user_id][task_id] + upload_task = self.task_store[store_user_id][task_id] # Can only cancel pending or running tasks if upload_task.status in [TaskStatus.COMPLETED, TaskStatus.FAILED]: From d8ec7584dae91e05f76cc4ea04540ad7aee8f0a0 Mon Sep 17 00:00:00 2001 From: Eric Hare Date: Fri, 5 Sep 2025 09:21:16 -0700 Subject: [PATCH 16/32] Fix the regular sync functionality --- src/api/connectors.py | 5 -- src/connectors/base.py | 2 +- src/connectors/google_drive/connector.py | 2 +- src/connectors/service.py | 58 +++++++++++++++++++----- 4 files changed, 49 insertions(+), 18 deletions(-) diff --git a/src/api/connectors.py b/src/api/connectors.py index f35e9f5a..0b767b65 100644 --- a/src/api/connectors.py +++ b/src/api/connectors.py @@ -23,9 +23,6 @@ async def connector_sync(request: Request, connector_service, session_manager): data = await request.json() max_files = data.get("max_files") - if not data.get("selected_files"): - return JSONResponse({"error": "selected_files is required"}, status_code=400) - try: user = request.state.user jwt_token = request.state.jwt_token @@ -50,8 +47,6 @@ async def connector_sync(request: Request, connector_service, session_manager): user.user_id, max_files, jwt_token=jwt_token, - selected_files=data.get("selected_files"), - selected_folders=data.get("selected_folders"), ) task_ids.append(task_id) diff --git a/src/connectors/base.py b/src/connectors/base.py index d16fe4cf..35c43555 100644 --- a/src/connectors/base.py +++ b/src/connectors/base.py @@ -108,7 +108,7 @@ class BaseConnector(ABC): pass @abstractmethod - async def list_files(self, page_token: Optional[str] = None) -> Dict[str, Any]: + async def list_files(self, page_token: Optional[str] = None, max_files: Optional[int] = None) -> Dict[str, Any]: """List all files. Returns files and next_page_token if any.""" pass diff --git a/src/connectors/google_drive/connector.py b/src/connectors/google_drive/connector.py index 07145fcd..887ffeca 100644 --- a/src/connectors/google_drive/connector.py +++ b/src/connectors/google_drive/connector.py @@ -452,6 +452,7 @@ class GoogleDriveConnector(BaseConnector): async def list_files( self, page_token: Optional[str] = None, + max_files: Optional[int] = None, **kwargs ) -> Dict[str, Any]: """ @@ -466,7 +467,6 @@ class GoogleDriveConnector(BaseConnector): items = self._iter_selected_items() # Optionally honor a request-scoped max_files (e.g., from your API payload) - max_files = kwargs.get("max_files") if isinstance(max_files, int) and max_files > 0: items = items[:max_files] diff --git a/src/connectors/service.py b/src/connectors/service.py index a760d976..e3de9e10 100644 --- a/src/connectors/service.py +++ b/src/connectors/service.py @@ -1,14 +1,13 @@ -import asyncio import tempfile import os from typing import Dict, Any, List, Optional from .base import BaseConnector, ConnectorDocument -from .google_drive import GoogleDriveConnector -from .sharepoint import SharePointConnector -from .onedrive import OneDriveConnector +from utils.logging_config import get_logger from .connection_manager import ConnectionManager +logger = get_logger(__name__) + class ConnectorService: """Service to manage document connectors and process files""" @@ -194,8 +193,6 @@ class ConnectorService: user_id: str, max_files: int = None, jwt_token: str = None, - selected_files: List[str] = None, - selected_folders: List[str] = None, ) -> str: """Sync files from a connector connection using existing task tracking system""" if not self.task_service: @@ -203,8 +200,10 @@ class ConnectorService: "TaskService not available - connector sync requires task service dependency" ) - print( - f"[DEBUG] Starting sync for connection {connection_id}, max_files={max_files}" + logger.debug( + "Starting sync for connection", + connection_id=connection_id, + max_files=max_files, ) connector = await self.get_connector(connection_id) @@ -213,11 +212,45 @@ class ConnectorService: f"Connection '{connection_id}' not found or not authenticated" ) - print(f"[DEBUG] Got connector, authenticated: {connector.is_authenticated}") + logger.debug("Got connector", authenticated=connector.is_authenticated) if not connector.is_authenticated: raise ValueError(f"Connection '{connection_id}' not authenticated") + # Collect files to process (limited by max_files) + files_to_process = [] + page_token = None + + # Calculate page size to minimize API calls + page_size = min(max_files or 100, 1000) if max_files else 100 + + while True: + # List files from connector with limit + logger.info( + "Calling list_files", page_size=page_size, page_token=page_token + ) + file_list = await connector.list_files(page_token, max_files=page_size) + logger.info( + "Got files from connector", file_count=len(file_list.get("files", [])) + ) + files = file_list["files"] + + if not files: + break + + for file_info in files: + if max_files and len(files_to_process) >= max_files: + break + files_to_process.append(file_info) + + # Stop if we have enough files or no more pages + if (max_files and len(files_to_process) >= max_files) or not file_list.get( + "nextPageToken" + ): + break + + page_token = file_list.get("nextPageToken") + # Get user information user = self.session_manager.get_user(user_id) if self.session_manager else None owner_name = user.name if user else None @@ -229,16 +262,19 @@ class ConnectorService: processor = ConnectorFileProcessor( self, connection_id, - selected_files or [], + files_to_process, user_id, jwt_token=jwt_token, owner_name=owner_name, owner_email=owner_email, ) + # Use file IDs as items (no more fake file paths!) + file_ids = [file_info["id"] for file_info in files_to_process] + # Create custom task using TaskService task_id = await self.task_service.create_custom_task( - user_id, selected_files, processor + user_id, file_ids, processor ) return task_id From 947c7f8f3b00f1b9232099cc6920720589228028 Mon Sep 17 00:00:00 2001 From: Edwin Jose Date: Fri, 5 Sep 2025 12:24:28 -0400 Subject: [PATCH 17/32] shared tasks --- src/services/task_service.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/services/task_service.py b/src/services/task_service.py index 257ae2ce..695752d8 100644 --- a/src/services/task_service.py +++ b/src/services/task_service.py @@ -266,7 +266,7 @@ class TaskService: Supports cancellation of shared default tasks stored under the anonymous user. """ - # Try user's own tasks first, then shared anonymous tasks + # Check candidate user IDs first, then anonymous to find which user ID the task is mapped to candidate_user_ids = [user_id, AnonymousUser().user_id] store_user_id = None From 18f41523c16952af80a20a1bdd08f45ce313deb7 Mon Sep 17 00:00:00 2001 From: Eric Hare Date: Fri, 5 Sep 2025 09:50:42 -0700 Subject: [PATCH 18/32] allow selection of selected files --- src/api/connectors.py | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/src/api/connectors.py b/src/api/connectors.py index 0b767b65..2c35d135 100644 --- a/src/api/connectors.py +++ b/src/api/connectors.py @@ -22,6 +22,7 @@ async def connector_sync(request: Request, connector_service, session_manager): connector_type = request.path_params.get("connector_type", "google_drive") data = await request.json() max_files = data.get("max_files") + selected_files = data.get("selected_files") try: user = request.state.user @@ -33,6 +34,7 @@ async def connector_sync(request: Request, connector_service, session_manager): ) active_connections = [conn for conn in connections if conn.is_active] + active_connections = active_connections[:1] # TODO: Temporary workaround for duplicate connections if not active_connections: return JSONResponse( {"error": f"No active {connector_type} connections found"}, @@ -42,12 +44,20 @@ async def connector_sync(request: Request, connector_service, session_manager): # Start sync tasks for all active connections task_ids = [] for connection in active_connections: - task_id = await connector_service.sync_connector_files( - connection.connection_id, - user.user_id, - max_files, - jwt_token=jwt_token, - ) + if selected_files: + task_id = await connector_service.sync_specific_files( + connection.connection_id, + user.user_id, + selected_files, + jwt_token=jwt_token, + ) + else: + task_id = await connector_service.sync_connector_files( + connection.connection_id, + user.user_id, + max_files, + jwt_token=jwt_token, + ) task_ids.append(task_id) return JSONResponse( From efd9e3aad96f282e1ea46e8d1b72b757266f39ce Mon Sep 17 00:00:00 2001 From: Mike Fortman Date: Fri, 5 Sep 2025 12:16:43 -0500 Subject: [PATCH 19/32] update be call --- frontend/src/app/connectors/page.tsx | 96 +++++++++++++--- frontend/src/app/upload/[provider]/page.tsx | 106 +++++++++++++++--- .../src/components/google-drive-picker.tsx | 2 +- 3 files changed, 173 insertions(+), 31 deletions(-) diff --git a/frontend/src/app/connectors/page.tsx b/frontend/src/app/connectors/page.tsx index 9bf206d7..8011d1bd 100644 --- a/frontend/src/app/connectors/page.tsx +++ b/frontend/src/app/connectors/page.tsx @@ -2,6 +2,7 @@ import React, { useState } from "react"; import { GoogleDrivePicker } from "@/components/google-drive-picker" +import { useTask } from "@/contexts/task-context" interface GoogleDriveFile { id: string; @@ -12,21 +13,63 @@ interface GoogleDriveFile { } export default function ConnectorsPage() { + const { addTask } = useTask() const [selectedFiles, setSelectedFiles] = useState([]); + const [isSyncing, setIsSyncing] = useState(false); + const [syncResult, setSyncResult] = useState(null); const handleFileSelection = (files: GoogleDriveFile[]) => { setSelectedFiles(files); }; - const handleSync = () => { - const fileIds = selectedFiles.map(file => file.id); - const body = { - file_ids: fileIds, - folder_ids: [], // Add folder handling if needed - recursive: true, - }; + const handleSync = async (connector: { connectionId: string, type: string }) => { + if (!connector.connectionId || selectedFiles.length === 0) return - console.log('Syncing with:', body); + setIsSyncing(true) + setSyncResult(null) + + try { + const syncBody: { + connection_id: string; + max_files?: number; + selected_files?: string[]; + } = { + connection_id: connector.connectionId, + selected_files: selectedFiles.map(file => file.id) + } + + const response = await fetch(`/api/connectors/${connector.type}/sync`, { + method: 'POST', + headers: { + 'Content-Type': 'application/json', + }, + body: JSON.stringify(syncBody), + }) + + const result = await response.json() + + if (response.status === 201) { + const taskId = result.task_id + if (taskId) { + addTask(taskId) + setSyncResult({ + processed: 0, + total: selectedFiles.length, + status: 'started' + }) + } + } else if (response.ok) { + setSyncResult(result) + } else { + console.error('Sync failed:', result.error) + setSyncResult({ error: result.error || 'Sync failed' }) + } + } catch (error) { + console.error('Sync error:', error) + setSyncResult({ error: 'Network error occurred' }) + } finally { + setIsSyncing(false) + } }; return ( @@ -48,12 +91,37 @@ export default function ConnectorsPage() {
{selectedFiles.length > 0 && ( - +
+ + + {syncResult && ( +
+ {syncResult.error ? ( +
Error: {syncResult.error}
+ ) : syncResult.status === 'started' ? ( +
+ Sync started for {syncResult.total} files. Check the task notification for progress. +
+ ) : ( +
+
Processed: {syncResult.processed || 0}
+
Added: {syncResult.added || 0}
+ {syncResult.errors &&
Errors: {syncResult.errors}
} +
+ )} +
+ )} +
)}
); diff --git a/frontend/src/app/upload/[provider]/page.tsx b/frontend/src/app/upload/[provider]/page.tsx index f53c1913..c00391f2 100644 --- a/frontend/src/app/upload/[provider]/page.tsx +++ b/frontend/src/app/upload/[provider]/page.tsx @@ -7,6 +7,7 @@ import { Badge } from "@/components/ui/badge" import { ArrowLeft, AlertCircle } from "lucide-react" import { GoogleDrivePicker } from "@/components/google-drive-picker" import { OneDrivePicker } from "@/components/onedrive-picker" +import { useTask } from "@/contexts/task-context" interface GoogleDriveFile { id: string @@ -42,12 +43,15 @@ export default function UploadProviderPage() { const params = useParams() const router = useRouter() const provider = params.provider as string + const { addTask } = useTask() const [connector, setConnector] = useState(null) const [isLoading, setIsLoading] = useState(true) const [error, setError] = useState(null) const [accessToken, setAccessToken] = useState(null) const [selectedFiles, setSelectedFiles] = useState([]) + const [isSyncing, setIsSyncing] = useState(false) + const [syncResult, setSyncResult] = useState(null) useEffect(() => { const fetchConnectorInfo = async () => { @@ -132,6 +136,56 @@ export default function UploadProviderPage() { // You can add additional handling here like triggering sync, etc. } + const handleSync = async (connector: CloudConnector) => { + if (!connector.connectionId || selectedFiles.length === 0) return + + setIsSyncing(true) + setSyncResult(null) + + try { + const syncBody: { + connection_id: string; + max_files?: number; + selected_files?: string[]; + } = { + connection_id: connector.connectionId, + selected_files: selectedFiles.map(file => file.id) + } + + const response = await fetch(`/api/connectors/${connector.type}/sync`, { + method: 'POST', + headers: { + 'Content-Type': 'application/json', + }, + body: JSON.stringify(syncBody), + }) + + const result = await response.json() + + if (response.status === 201) { + const taskId = result.task_id + if (taskId) { + addTask(taskId) + setSyncResult({ + processed: 0, + total: selectedFiles.length, + status: 'started' + }) + } + } else if (response.ok) { + setSyncResult(result) + } else { + console.error('Sync failed:', result.error) + setSyncResult({ error: result.error || 'Sync failed' }) + } + } catch (error) { + console.error('Sync error:', error) + setSyncResult({ error: 'Network error occurred' }) + } finally { + setIsSyncing(false) + } + } + const getProviderDisplayName = () => { const nameMap: { [key: string]: string } = { 'google_drive': 'Google Drive', @@ -284,22 +338,42 @@ export default function UploadProviderPage() {
{selectedFiles.length > 0 && ( -
- - +
+
+ + +
+ + {syncResult && ( +
+ {syncResult.error ? ( +
Error: {syncResult.error}
+ ) : syncResult.status === 'started' ? ( +
+ Sync started for {syncResult.total} files. Check the task notification for progress. +
+ ) : ( +
+
Processed: {syncResult.processed || 0}
+
Added: {syncResult.added || 0}
+ {syncResult.errors &&
Errors: {syncResult.errors}
} +
+ )} +
+ )}
)}
diff --git a/frontend/src/components/google-drive-picker.tsx b/frontend/src/components/google-drive-picker.tsx index f173f05f..60191261 100644 --- a/frontend/src/components/google-drive-picker.tsx +++ b/frontend/src/components/google-drive-picker.tsx @@ -241,7 +241,7 @@ export function GoogleDrivePicker({ size="sm" variant="outline" > - {isPickerOpen ? 'Opening Picker...' : 'Select Files'} + {isPickerOpen ? 'Opening Picker...' : 'Add Files'}
From 18b4059b5687ad1d68a27e262c5652cb145fd9c1 Mon Sep 17 00:00:00 2001 From: cristhianzl Date: Fri, 5 Sep 2025 16:03:06 -0300 Subject: [PATCH 20/32] =?UTF-8?q?=F0=9F=94=A7=20(agent.py):=20Import=20and?= =?UTF-8?q?=20use=20conversation=5Fpersistence=5Fservice=20to=20handle=20u?= =?UTF-8?q?ser=20conversations=20storage=20and=20retrieval=20=F0=9F=94=A7?= =?UTF-8?q?=20(conversation=5Fpersistence=5Fservice.py):=20Create=20a=20se?= =?UTF-8?q?rvice=20to=20persist=20chat=20conversations=20to=20disk=20for?= =?UTF-8?q?=20server=20restarts?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/agent.py | 43 +-- src/services/auth_service.py | 34 +-- src/services/chat_service.py | 29 +- .../conversation_persistence_service.py | 126 +++++++++ src/services/langflow_history_service.py | 156 ++--------- src/services/session_ownership_service.py | 78 +++--- src/services/user_binding_service.py | 254 ------------------ 7 files changed, 211 insertions(+), 509 deletions(-) create mode 100644 src/services/conversation_persistence_service.py delete mode 100644 src/services/user_binding_service.py diff --git a/src/agent.py b/src/agent.py index 07fd911e..c1d5b0c4 100644 --- a/src/agent.py +++ b/src/agent.py @@ -2,15 +2,13 @@ from utils.logging_config import get_logger logger = get_logger(__name__) -# User-scoped conversation state - keyed by user_id -> response_id -> conversation -user_conversations = {} # user_id -> {response_id: {"messages": [...], "previous_response_id": parent_id, "created_at": timestamp, "last_activity": timestamp}} +# Import persistent storage +from services.conversation_persistence_service import conversation_persistence def get_user_conversations(user_id: str): """Get all conversations for a user""" - if user_id not in user_conversations: - user_conversations[user_id] = {} - return user_conversations[user_id] + return conversation_persistence.get_user_conversations(user_id) def get_conversation_thread(user_id: str, previous_response_id: str = None): @@ -44,8 +42,7 @@ def get_conversation_thread(user_id: str, previous_response_id: str = None): def store_conversation_thread(user_id: str, response_id: str, conversation_state: dict): """Store a conversation thread with its response_id""" - conversations = get_user_conversations(user_id) - conversations[response_id] = conversation_state + conversation_persistence.store_conversation_thread(user_id, response_id, conversation_state) # Legacy function for backward compatibility @@ -413,17 +410,11 @@ async def async_langflow_chat( conversation_state["last_activity"] = datetime.now() store_conversation_thread(user_id, response_id, conversation_state) - # Claim session ownership if this is a Google user + # Claim session ownership for this user try: from services.session_ownership_service import session_ownership_service - from services.user_binding_service import user_binding_service - - # Check if this is a Google user (Google IDs are numeric, Langflow IDs are UUID) - if user_id.isdigit() and user_binding_service.has_binding(user_id): - langflow_user_id = user_binding_service.get_langflow_user_id(user_id) - if langflow_user_id: - session_ownership_service.claim_session(user_id, response_id, langflow_user_id) - print(f"[DEBUG] Claimed session {response_id} for Google user {user_id}") + session_ownership_service.claim_session(user_id, response_id) + print(f"[DEBUG] Claimed session {response_id} for user {user_id}") except Exception as e: print(f"[WARNING] Failed to claim session ownership: {e}") @@ -502,19 +493,13 @@ async def async_langflow_chat_stream( conversation_state["last_activity"] = datetime.now() store_conversation_thread(user_id, response_id, conversation_state) - # Claim session ownership if this is a Google user - try: - from services.session_ownership_service import session_ownership_service - from services.user_binding_service import user_binding_service - - # Check if this is a Google user (Google IDs are numeric, Langflow IDs are UUID) - if user_id.isdigit() and user_binding_service.has_binding(user_id): - langflow_user_id = user_binding_service.get_langflow_user_id(user_id) - if langflow_user_id: - session_ownership_service.claim_session(user_id, response_id, langflow_user_id) - print(f"[DEBUG] Claimed session {response_id} for Google user {user_id} (streaming)") - except Exception as e: - print(f"[WARNING] Failed to claim session ownership (streaming): {e}") + # Claim session ownership for this user + try: + from services.session_ownership_service import session_ownership_service + session_ownership_service.claim_session(user_id, response_id) + print(f"[DEBUG] Claimed session {response_id} for user {user_id}") + except Exception as e: + print(f"[WARNING] Failed to claim session ownership: {e}") print( f"[DEBUG] Stored langflow conversation thread for user {user_id} with response_id: {response_id}" diff --git a/src/services/auth_service.py b/src/services/auth_service.py index 3f5d9dea..78c199fd 100644 --- a/src/services/auth_service.py +++ b/src/services/auth_service.py @@ -14,7 +14,6 @@ from connectors.sharepoint.oauth import SharePointOAuth from connectors.google_drive import GoogleDriveConnector from connectors.onedrive import OneDriveConnector from connectors.sharepoint import SharePointConnector -from services.user_binding_service import user_binding_service class AuthService: @@ -268,24 +267,10 @@ class AuthService: ) if jwt_token: - # Get the user info to create a persistent Google Drive connection + # Get the user info to create a persistent connector connection user_info = await self.session_manager.get_user_info_from_token( token_data["access_token"] ) - google_user_id = user_info["id"] if user_info else None - - # Create or update user binding between Google ID and Langflow ID - if google_user_id and user_info: - try: - print(f"[DEBUG] Creating/updating user binding for Google ID: {google_user_id}") - binding_created = await user_binding_service.ensure_binding(google_user_id, user_info) - if binding_created: - print(f"[DEBUG] Successfully ensured user binding for Google ID: {google_user_id}") - else: - print(f"[DEBUG] Failed to create user binding for Google ID: {google_user_id}") - except Exception as e: - print(f"[WARNING] Failed to create user binding for Google ID {google_user_id}: {e}") - # Don't fail authentication if binding creation fails response_data = { "status": "authenticated", @@ -294,13 +279,13 @@ class AuthService: "jwt_token": jwt_token, # Include JWT token in response } - if google_user_id: - # Convert the temporary auth connection to a persistent Google Drive connection + if user_info and user_info.get("id"): + # Convert the temporary auth connection to a persistent OAuth connection await self.connector_service.connection_manager.update_connection( connection_id=connection_id, connector_type="google_drive", name=f"Google Drive ({user_info.get('email', 'Unknown')})", - user_id=google_user_id, + user_id=user_info.get("id"), config={ **connection_config.config, "purpose": "data_source", @@ -349,10 +334,6 @@ class AuthService: user = getattr(request.state, "user", None) if user: - # Get user binding info if available - binding_info = user_binding_service.get_binding_info(user.user_id) - langflow_user_id = user_binding_service.get_langflow_user_id(user.user_id) - user_data = { "authenticated": True, "user": { @@ -367,13 +348,6 @@ class AuthService: }, } - # Add binding information if available - if langflow_user_id: - user_data["user"]["langflow_user_id"] = langflow_user_id - if binding_info: - user_data["user"]["binding_created_at"] = binding_info.get("created_at") - user_data["user"]["binding_last_updated"] = binding_info.get("last_updated") - return user_data else: return {"authenticated": False, "user": None} diff --git a/src/services/chat_service.py b/src/services/chat_service.py index 93fddcc8..abddec7b 100644 --- a/src/services/chat_service.py +++ b/src/services/chat_service.py @@ -269,7 +269,6 @@ class ChatService: """Get langflow conversation history for a user - now fetches from both OpenRAG memory and Langflow database""" from agent import get_user_conversations from services.langflow_history_service import langflow_history_service - from services.user_binding_service import user_binding_service if not user_id: return {"error": "User ID is required", "conversations": []} @@ -285,12 +284,17 @@ class ChatService: messages = [] for msg in conversation_state.get("messages", []): if msg.get("role") in ["user", "assistant"]: + # Handle timestamp - could be datetime object or string + timestamp = msg.get("timestamp") + if timestamp: + if hasattr(timestamp, 'isoformat'): + timestamp = timestamp.isoformat() + # else it's already a string + message_data = { "role": msg["role"], "content": msg["content"], - "timestamp": msg.get("timestamp").isoformat() - if msg.get("timestamp") - else None, + "timestamp": timestamp, } if msg.get("response_id"): message_data["response_id"] = msg["response_id"] @@ -309,17 +313,22 @@ class ChatService: else "New chat" ) + # Handle conversation timestamps - could be datetime objects or strings + created_at = conversation_state.get("created_at") + if created_at and hasattr(created_at, 'isoformat'): + created_at = created_at.isoformat() + + last_activity = conversation_state.get("last_activity") + if last_activity and hasattr(last_activity, 'isoformat'): + last_activity = last_activity.isoformat() + all_conversations.append({ "response_id": response_id, "title": title, "endpoint": "langflow", "messages": messages, - "created_at": conversation_state.get("created_at").isoformat() - if conversation_state.get("created_at") - else None, - "last_activity": conversation_state.get("last_activity").isoformat() - if conversation_state.get("last_activity") - else None, + "created_at": created_at, + "last_activity": last_activity, "previous_response_id": conversation_state.get("previous_response_id"), "total_messages": len(messages), "source": "openrag_memory" diff --git a/src/services/conversation_persistence_service.py b/src/services/conversation_persistence_service.py new file mode 100644 index 00000000..1b37eb4e --- /dev/null +++ b/src/services/conversation_persistence_service.py @@ -0,0 +1,126 @@ +""" +Conversation Persistence Service +Simple service to persist chat conversations to disk so they survive server restarts +""" + +import json +import os +from typing import Dict, Any +from datetime import datetime +import threading + + +class ConversationPersistenceService: + """Simple service to persist conversations to disk""" + + def __init__(self, storage_file: str = "conversations.json"): + self.storage_file = storage_file + self.lock = threading.Lock() + self._conversations = self._load_conversations() + + def _load_conversations(self) -> Dict[str, Dict[str, Any]]: + """Load conversations from disk""" + if os.path.exists(self.storage_file): + try: + with open(self.storage_file, 'r', encoding='utf-8') as f: + data = json.load(f) + print(f"Loaded {self._count_total_conversations(data)} conversations from {self.storage_file}") + return data + except Exception as e: + print(f"Error loading conversations from {self.storage_file}: {e}") + return {} + return {} + + def _save_conversations(self): + """Save conversations to disk""" + try: + with self.lock: + with open(self.storage_file, 'w', encoding='utf-8') as f: + json.dump(self._conversations, f, indent=2, ensure_ascii=False, default=str) + print(f"Saved {self._count_total_conversations(self._conversations)} conversations to {self.storage_file}") + except Exception as e: + print(f"Error saving conversations to {self.storage_file}: {e}") + + def _count_total_conversations(self, data: Dict[str, Any]) -> int: + """Count total conversations across all users""" + total = 0 + for user_conversations in data.values(): + if isinstance(user_conversations, dict): + total += len(user_conversations) + return total + + def get_user_conversations(self, user_id: str) -> Dict[str, Any]: + """Get all conversations for a user""" + if user_id not in self._conversations: + self._conversations[user_id] = {} + return self._conversations[user_id] + + def _serialize_datetime(self, obj: Any) -> Any: + """Recursively convert datetime objects to ISO strings for JSON serialization""" + if isinstance(obj, datetime): + return obj.isoformat() + elif isinstance(obj, dict): + return {key: self._serialize_datetime(value) for key, value in obj.items()} + elif isinstance(obj, list): + return [self._serialize_datetime(item) for item in obj] + else: + return obj + + def store_conversation_thread(self, user_id: str, response_id: str, conversation_state: Dict[str, Any]): + """Store a conversation thread and persist to disk""" + if user_id not in self._conversations: + self._conversations[user_id] = {} + + # Recursively convert datetime objects to strings for JSON serialization + serialized_conversation = self._serialize_datetime(conversation_state) + + self._conversations[user_id][response_id] = serialized_conversation + + # Save to disk (we could optimize this with batching if needed) + self._save_conversations() + + def get_conversation_thread(self, user_id: str, response_id: str) -> Dict[str, Any]: + """Get a specific conversation thread""" + user_conversations = self.get_user_conversations(user_id) + return user_conversations.get(response_id, {}) + + def delete_conversation_thread(self, user_id: str, response_id: str): + """Delete a specific conversation thread""" + if user_id in self._conversations and response_id in self._conversations[user_id]: + del self._conversations[user_id][response_id] + self._save_conversations() + print(f"Deleted conversation {response_id} for user {user_id}") + + def clear_user_conversations(self, user_id: str): + """Clear all conversations for a user""" + if user_id in self._conversations: + del self._conversations[user_id] + self._save_conversations() + print(f"Cleared all conversations for user {user_id}") + + def get_storage_stats(self) -> Dict[str, Any]: + """Get statistics about stored conversations""" + total_users = len(self._conversations) + total_conversations = self._count_total_conversations(self._conversations) + + user_stats = {} + for user_id, conversations in self._conversations.items(): + user_stats[user_id] = { + 'conversation_count': len(conversations), + 'latest_activity': max( + (conv.get('last_activity', '') for conv in conversations.values()), + default='' + ) + } + + return { + 'total_users': total_users, + 'total_conversations': total_conversations, + 'storage_file': self.storage_file, + 'file_exists': os.path.exists(self.storage_file), + 'user_stats': user_stats + } + + +# Global instance +conversation_persistence = ConversationPersistenceService() \ No newline at end of file diff --git a/src/services/langflow_history_service.py b/src/services/langflow_history_service.py index ad17a238..283ddf85 100644 --- a/src/services/langflow_history_service.py +++ b/src/services/langflow_history_service.py @@ -1,72 +1,33 @@ """ Langflow Message History Service -Retrieves message history from Langflow's database using user bindings +Simplified service that retrieves message history from Langflow using a single token """ -import asyncio import httpx from typing import List, Dict, Optional, Any -from datetime import datetime from config.settings import LANGFLOW_URL, LANGFLOW_KEY, LANGFLOW_SUPERUSER, LANGFLOW_SUPERUSER_PASSWORD -from services.user_binding_service import user_binding_service -from services.session_ownership_service import session_ownership_service class LangflowHistoryService: - """Service to retrieve message history from Langflow using user bindings""" + """Simplified service to retrieve message history from Langflow""" def __init__(self): self.langflow_url = LANGFLOW_URL self.auth_token = None - def _resolve_langflow_user_id(self, user_id: str) -> Optional[str]: - """Resolve user_id to Langflow user ID - - Args: - user_id: Either Google user ID or direct Langflow user ID - - Returns: - Langflow user ID or None - """ - # First, check if this is already a Langflow user ID by checking UUID format - if self._is_uuid_format(user_id): - print(f"User ID {user_id} appears to be a Langflow UUID, using directly") - return user_id - - # Otherwise, try to get Langflow user ID from Google binding - langflow_user_id = user_binding_service.get_langflow_user_id(user_id) - if langflow_user_id: - print(f"Found Langflow binding for Google user {user_id}: {langflow_user_id}") - return langflow_user_id - - print(f"No Langflow user ID found for {user_id}") - return None - - def _is_uuid_format(self, user_id: str) -> bool: - """Check if string looks like a UUID (Langflow user ID format vs Google numeric ID)""" - # Langflow IDs are UUID v4, Google IDs are purely numeric - return not user_id.isdigit() - - def _filter_sessions_by_ownership(self, session_ids: List[str], user_id: str, langflow_user_id: str) -> List[str]: - """Filter sessions based on user type and ownership""" - if self._is_uuid_format(user_id): - # Direct Langflow user - show all sessions for this Langflow user - print(f"[DEBUG] Direct Langflow user - showing all {len(session_ids)} sessions") - return session_ids - else: - # Google OAuth user - only show sessions they own - owned_sessions = session_ownership_service.filter_sessions_for_google_user(session_ids, user_id) - print(f"[DEBUG] Google user {user_id} owns {len(owned_sessions)} out of {len(session_ids)} total sessions") - return owned_sessions - async def _authenticate(self) -> Optional[str]: """Authenticate with Langflow and get access token""" if self.auth_token: return self.auth_token + # Try using LANGFLOW_KEY first if available + if LANGFLOW_KEY: + self.auth_token = LANGFLOW_KEY + return self.auth_token + if not all([LANGFLOW_SUPERUSER, LANGFLOW_SUPERUSER_PASSWORD]): - print("Missing Langflow superuser credentials") + print("Missing Langflow credentials") return None try: @@ -98,15 +59,8 @@ class LangflowHistoryService: async def get_user_sessions(self, user_id: str, flow_id: Optional[str] = None) -> List[str]: """Get all session IDs for a user's conversations - Args: - user_id: Either Google user ID or direct Langflow user ID + Since we use one Langflow token, we get all sessions and filter by user_id locally """ - # Determine the Langflow user ID - langflow_user_id = self._resolve_langflow_user_id(user_id) - if not langflow_user_id: - print(f"No Langflow user found for user: {user_id}") - return [] - token = await self._authenticate() if not token: return [] @@ -127,15 +81,11 @@ class LangflowHistoryService: if response.status_code == 200: session_ids = response.json() + print(f"Found {len(session_ids)} total sessions from Langflow") - # Filter sessions to only include those belonging to the user - user_sessions = await self._filter_sessions_by_user(session_ids, langflow_user_id, token) - - # Apply ownership-based filtering for Google users - filtered_sessions = self._filter_sessions_by_ownership(user_sessions, user_id, langflow_user_id) - - print(f"Found {len(filtered_sessions)} sessions for user {user_id} (Langflow ID: {langflow_user_id})") - return filtered_sessions + # Since we use a single Langflow instance, return all sessions + # Session filtering is handled by user_id at the application level + return session_ids else: print(f"Failed to get sessions: {response.status_code} - {response.text}") return [] @@ -144,65 +94,8 @@ class LangflowHistoryService: print(f"Error getting user sessions: {e}") return [] - async def _filter_sessions_by_user(self, session_ids: List[str], langflow_user_id: str, token: str) -> List[str]: - """Filter session IDs to only include those belonging to the specified user""" - user_sessions = [] - - try: - headers = {"Authorization": f"Bearer {token}"} - - async with httpx.AsyncClient() as client: - for session_id in session_ids: - # Get a sample message from this session to check flow ownership - response = await client.get( - f"{self.langflow_url.rstrip('/')}/api/v1/monitor/messages", - headers=headers, - params={ - "session_id": session_id, - "order_by": "timestamp" - } - ) - - if response.status_code == 200: - messages = response.json() - if messages and len(messages) > 0: - # Check if this session belongs to the user via flow ownership - flow_id = messages[0].get('flow_id') - if flow_id and await self._is_user_flow(flow_id, langflow_user_id, token): - user_sessions.append(session_id) - - except Exception as e: - print(f"Error filtering sessions by user: {e}") - - return user_sessions - - async def _is_user_flow(self, flow_id: str, langflow_user_id: str, token: str) -> bool: - """Check if a flow belongs to the specified user""" - try: - headers = {"Authorization": f"Bearer {token}"} - - async with httpx.AsyncClient() as client: - response = await client.get( - f"{self.langflow_url.rstrip('/')}/api/v1/flows/{flow_id}", - headers=headers - ) - - if response.status_code == 200: - flow_data = response.json() - return flow_data.get('user_id') == langflow_user_id - - except Exception as e: - print(f"Error checking flow ownership: {e}") - - return False - async def get_session_messages(self, user_id: str, session_id: str) -> List[Dict[str, Any]]: """Get all messages for a specific session""" - # Verify user has access to this session - langflow_user_id = self._resolve_langflow_user_id(user_id) - if not langflow_user_id: - return [] - token = await self._authenticate() if not token: return [] @@ -222,14 +115,6 @@ class LangflowHistoryService: if response.status_code == 200: messages = response.json() - - # Verify user owns this session (security check) - if messages and len(messages) > 0: - flow_id = messages[0].get('flow_id') - if not await self._is_user_flow(flow_id, langflow_user_id, token): - print(f"User {user_id} does not own session {session_id}") - return [] - # Convert to OpenRAG format return self._convert_langflow_messages(messages) else: @@ -270,16 +155,12 @@ class LangflowHistoryService: return converted_messages async def get_user_conversation_history(self, user_id: str, flow_id: Optional[str] = None) -> Dict[str, Any]: - """Get all conversation history for a user, organized by session""" - langflow_user_id = self._resolve_langflow_user_id(user_id) - if not langflow_user_id: - return { - "error": f"No Langflow user found for {user_id}", - "conversations": [] - } - + """Get all conversation history for a user, organized by session + + Simplified version - gets all sessions and lets the frontend filter by user_id + """ try: - # Get all user sessions + # Get all sessions (no complex filtering needed) session_ids = await self.get_user_sessions(user_id, flow_id) conversations = [] @@ -309,7 +190,6 @@ class LangflowHistoryService: return { "conversations": conversations, "total_conversations": len(conversations), - "langflow_user_id": langflow_user_id, "user_id": user_id } diff --git a/src/services/session_ownership_service.py b/src/services/session_ownership_service.py index b3a214d9..9e3677fd 100644 --- a/src/services/session_ownership_service.py +++ b/src/services/session_ownership_service.py @@ -1,16 +1,16 @@ """ Session Ownership Service -Tracks which Google user owns which Langflow session to properly separate message history +Simple service that tracks which user owns which session """ import json import os -from typing import Dict, List, Optional, Set +from typing import Dict, List, Optional from datetime import datetime class SessionOwnershipService: - """Service to track session ownership for proper message history separation""" + """Simple service to track which user owns which session""" def __init__(self): self.ownership_file = "session_ownership.json" @@ -36,73 +36,55 @@ class SessionOwnershipService: except Exception as e: print(f"Error saving session ownership data: {e}") - def claim_session(self, google_user_id: str, langflow_session_id: str, langflow_user_id: str): - """Claim a Langflow session for a Google user""" - if langflow_session_id not in self.ownership_data: - self.ownership_data[langflow_session_id] = { - "google_user_id": google_user_id, - "langflow_user_id": langflow_user_id, + def claim_session(self, user_id: str, session_id: str): + """Claim a session for a user""" + if session_id not in self.ownership_data: + self.ownership_data[session_id] = { + "user_id": user_id, "created_at": datetime.now().isoformat(), "last_accessed": datetime.now().isoformat() } self._save_ownership_data() - print(f"Claimed session {langflow_session_id} for Google user {google_user_id}") + print(f"Claimed session {session_id} for user {user_id}") else: # Update last accessed time - self.ownership_data[langflow_session_id]["last_accessed"] = datetime.now().isoformat() + self.ownership_data[session_id]["last_accessed"] = datetime.now().isoformat() self._save_ownership_data() - def get_session_owner(self, langflow_session_id: str) -> Optional[str]: - """Get the Google user ID that owns a Langflow session""" - session_data = self.ownership_data.get(langflow_session_id) - return session_data.get("google_user_id") if session_data else None + def get_session_owner(self, session_id: str) -> Optional[str]: + """Get the user ID that owns a session""" + session_data = self.ownership_data.get(session_id) + return session_data.get("user_id") if session_data else None - def get_user_sessions(self, google_user_id: str) -> List[str]: - """Get all Langflow sessions owned by a Google user""" + def get_user_sessions(self, user_id: str) -> List[str]: + """Get all sessions owned by a user""" return [ session_id for session_id, session_data in self.ownership_data.items() - if session_data.get("google_user_id") == google_user_id + if session_data.get("user_id") == user_id ] - def get_unowned_sessions_for_langflow_user(self, langflow_user_id: str) -> Set[str]: - """Get sessions for a Langflow user that aren't claimed by any Google user - - This requires querying the Langflow database to get all sessions for the user, - then filtering out the ones that are already claimed. - """ - # This will be implemented when we have access to all sessions for a Langflow user - claimed_sessions = set() - for session_data in self.ownership_data.values(): - if session_data.get("langflow_user_id") == langflow_user_id: - claimed_sessions.add(session_data.get("google_user_id")) - return claimed_sessions + def is_session_owned_by_user(self, session_id: str, user_id: str) -> bool: + """Check if a session is owned by a specific user""" + return self.get_session_owner(session_id) == user_id - def filter_sessions_for_google_user(self, all_sessions: List[str], google_user_id: str) -> List[str]: - """Filter a list of sessions to only include those owned by the Google user""" - user_sessions = self.get_user_sessions(google_user_id) - return [session for session in all_sessions if session in user_sessions] - - def is_session_owned_by_google_user(self, langflow_session_id: str, google_user_id: str) -> bool: - """Check if a session is owned by a specific Google user""" - return self.get_session_owner(langflow_session_id) == google_user_id + def filter_sessions_for_user(self, session_ids: List[str], user_id: str) -> List[str]: + """Filter a list of sessions to only include those owned by the user""" + user_sessions = self.get_user_sessions(user_id) + return [session for session in session_ids if session in user_sessions] def get_ownership_stats(self) -> Dict[str, any]: """Get statistics about session ownership""" - google_users = set() - langflow_users = set() - + users = set() for session_data in self.ownership_data.values(): - google_users.add(session_data.get("google_user_id")) - langflow_users.add(session_data.get("langflow_user_id")) + users.add(session_data.get("user_id")) return { "total_tracked_sessions": len(self.ownership_data), - "unique_google_users": len(google_users), - "unique_langflow_users": len(langflow_users), - "sessions_per_google_user": { - google_user: len(self.get_user_sessions(google_user)) - for google_user in google_users + "unique_users": len(users), + "sessions_per_user": { + user: len(self.get_user_sessions(user)) + for user in users if user } } diff --git a/src/services/user_binding_service.py b/src/services/user_binding_service.py deleted file mode 100644 index b7bbe905..00000000 --- a/src/services/user_binding_service.py +++ /dev/null @@ -1,254 +0,0 @@ -""" -User Binding Service -Manages mappings between Google OAuth user IDs and Langflow user IDs -Uses verified Langflow API endpoints: /api/v1/login and /api/v1/users/whoami -""" - -import json -import os -from typing import Dict, Optional, Any -import httpx -from config.settings import LANGFLOW_URL, LANGFLOW_KEY - -USER_BINDINGS_FILE = "user_bindings.json" - -class UserBindingService: - def __init__(self): - self.bindings_file = USER_BINDINGS_FILE - self.bindings = self._load_bindings() - - def _load_bindings(self) -> Dict[str, Any]: - """Load user bindings from JSON file""" - try: - if os.path.exists(self.bindings_file): - with open(self.bindings_file, 'r') as f: - return json.load(f) - else: - return {} - except Exception as e: - print(f"Error loading user bindings: {e}") - return {} - - def _save_bindings(self): - """Save user bindings to JSON file""" - try: - with open(self.bindings_file, 'w') as f: - json.dump(self.bindings, f, indent=2) - print(f"Saved user bindings to {self.bindings_file}") - except Exception as e: - print(f"Error saving user bindings: {e}") - - def get_langflow_user_id(self, google_user_id: str) -> Optional[str]: - """Get Langflow user ID from Google user ID""" - return self.bindings.get(google_user_id, {}).get('langflow_user_id') - - def get_google_user_id(self, langflow_user_id: str) -> Optional[str]: - """Get Google user ID from Langflow user ID (reverse lookup)""" - for google_id, binding in self.bindings.items(): - if binding.get('langflow_user_id') == langflow_user_id: - return google_id - return None - - def create_binding(self, google_user_id: str, langflow_user_id: str, google_user_info: Dict[str, Any]): - """Create a new binding between Google and Langflow user IDs""" - self.bindings[google_user_id] = { - 'langflow_user_id': langflow_user_id, - 'google_user_info': { - 'email': google_user_info.get('email'), - 'name': google_user_info.get('name'), - 'picture': google_user_info.get('picture'), - 'verified_email': google_user_info.get('verified_email') - }, - 'created_at': __import__('datetime').datetime.now().isoformat(), - 'last_updated': __import__('datetime').datetime.now().isoformat() - } - self._save_bindings() - print(f"Created binding: Google ID {google_user_id} -> Langflow ID {langflow_user_id}") - - def update_binding(self, google_user_id: str, google_user_info: Dict[str, Any]): - """Update existing binding with fresh Google user info""" - if google_user_id in self.bindings: - self.bindings[google_user_id]['google_user_info'] = { - 'email': google_user_info.get('email'), - 'name': google_user_info.get('name'), - 'picture': google_user_info.get('picture'), - 'verified_email': google_user_info.get('verified_email') - } - self.bindings[google_user_id]['last_updated'] = __import__('datetime').datetime.now().isoformat() - self._save_bindings() - print(f"Updated binding for Google ID {google_user_id}") - - def has_binding(self, google_user_id: str) -> bool: - """Check if a binding exists for the Google user ID""" - return google_user_id in self.bindings - - async def get_langflow_user_info(self, langflow_access_token: str) -> Optional[Dict[str, Any]]: - """Get current user info from Langflow /me endpoint""" - if not LANGFLOW_URL: - print("LANGFLOW_URL not configured") - return None - - try: - # Use the correct Langflow endpoint based on source code analysis - endpoint = "/api/v1/users/whoami" - - headers = {} - if langflow_access_token: - headers["Authorization"] = f"Bearer {langflow_access_token}" - elif LANGFLOW_KEY: - # Try with global Langflow API key if available - headers["Authorization"] = f"Bearer {LANGFLOW_KEY}" - headers["x-api-key"] = LANGFLOW_KEY - - async with httpx.AsyncClient() as client: - url = f"{LANGFLOW_URL.rstrip('/')}{endpoint}" - print(f"Getting Langflow user info from: {url}") - - response = await client.get(url, headers=headers) - - if response.status_code == 200: - user_data = response.json() - print(f"Successfully got Langflow user data") - return user_data - else: - print(f"Langflow /whoami endpoint returned: {response.status_code} - {response.text}") - return None - - except Exception as e: - print(f"Error getting Langflow user info: {e}") - return None - - async def authenticate_with_langflow(self) -> Optional[str]: - """Authenticate with Langflow using superuser credentials to get access token""" - if not LANGFLOW_URL: - return None - - try: - from config.settings import LANGFLOW_SUPERUSER, LANGFLOW_SUPERUSER_PASSWORD - - if not LANGFLOW_SUPERUSER or not LANGFLOW_SUPERUSER_PASSWORD: - print("Langflow superuser credentials not configured") - return None - - # Try to login to Langflow - login_data = { - "username": LANGFLOW_SUPERUSER, - "password": LANGFLOW_SUPERUSER_PASSWORD - } - - async with httpx.AsyncClient() as client: - # Use the correct Langflow login endpoint based on source code analysis - endpoint = "/api/v1/login" - url = f"{LANGFLOW_URL.rstrip('/')}{endpoint}" - - # Try form-encoded data first (standard OAuth2 flow) - try: - response = await client.post( - url, - data=login_data, - headers={"Content-Type": "application/x-www-form-urlencoded"} - ) - - if response.status_code == 200: - result = response.json() - access_token = result.get('access_token') - if access_token: - print(f"Successfully authenticated with Langflow via {endpoint}") - return access_token - else: - print(f"Langflow login returned: {response.status_code} - {response.text}") - - except Exception as e: - print(f"Error with form login: {e}") - - # If form login didn't work, try JSON (fallback) - try: - response = await client.post( - url, - json=login_data, - headers={"Content-Type": "application/json"} - ) - - if response.status_code == 200: - result = response.json() - access_token = result.get('access_token') - if access_token: - print(f"Successfully authenticated with Langflow via {endpoint} (JSON)") - return access_token - else: - print(f"Langflow login (JSON) returned: {response.status_code} - {response.text}") - - except Exception as e: - print(f"Error with JSON login: {e}") - - print("Failed to authenticate with Langflow") - return None - - except Exception as e: - print(f"Error authenticating with Langflow: {e}") - return None - - async def ensure_binding(self, google_user_id: str, google_user_info: Dict[str, Any]) -> bool: - """Ensure a binding exists for the Google user, create if needed""" - if self.has_binding(google_user_id): - # Update existing binding with fresh Google info - self.update_binding(google_user_id, google_user_info) - return True - - # No binding exists, try to create one - try: - # First authenticate with Langflow - langflow_token = await self.authenticate_with_langflow() - if not langflow_token: - print("Could not authenticate with Langflow to create binding") - return False - - # Get Langflow user info - langflow_user_info = await self.get_langflow_user_info(langflow_token) - if not langflow_user_info: - print("Could not get Langflow user info") - return False - - # Extract Langflow user ID (try different possible fields) - langflow_user_id = None - for id_field in ['id', 'user_id', 'sub', 'username']: - if id_field in langflow_user_info: - langflow_user_id = str(langflow_user_info[id_field]) - break - - if not langflow_user_id: - print(f"Could not extract Langflow user ID from: {langflow_user_info}") - return False - - # Create the binding - self.create_binding(google_user_id, langflow_user_id, google_user_info) - return True - - except Exception as e: - print(f"Error creating binding for Google user {google_user_id}: {e}") - return False - - def get_binding_info(self, google_user_id: str) -> Optional[Dict[str, Any]]: - """Get complete binding information for a Google user ID""" - return self.bindings.get(google_user_id) - - def list_all_bindings(self) -> Dict[str, Any]: - """Get all user bindings (for admin purposes)""" - return self.bindings.copy() - - def is_langflow_user_id(self, user_id: str) -> bool: - """Check if user_id appears to be a Langflow UUID (vs Google numeric ID)""" - # Langflow IDs are UUID v4, Google IDs are purely numeric - return not user_id.isdigit() - - def get_user_type(self, user_id: str) -> str: - """Determine user type: 'google_oauth', 'langflow_direct', or 'unknown'""" - if self.has_binding(user_id): - return "google_oauth" - elif self.is_langflow_user_id(user_id): - return "langflow_direct" - else: - return "unknown" - -# Global instance -user_binding_service = UserBindingService() \ No newline at end of file From f83851b25952b978cc22765b93a4a57eea520d82 Mon Sep 17 00:00:00 2001 From: cristhianzl Date: Fri, 5 Sep 2025 16:53:02 -0300 Subject: [PATCH 21/32] =?UTF-8?q?=E2=9C=A8=20(frontend):=20refactor=20mess?= =?UTF-8?q?age=20processing=20in=20ChatPage=20component=20to=20handle=20fu?= =?UTF-8?q?nction=20calls=20from=20chunks=20or=20response=5Fdata=20?= =?UTF-8?q?=E2=99=BB=EF=B8=8F=20(agent.py):=20refactor=20async=5Fresponse,?= =?UTF-8?q?=20async=5Flangflow,=20async=5Fchat,=20async=5Flangflow=5Fchat,?= =?UTF-8?q?=20and=20async=5Flangflow=5Fchat=5Fstream=20functions=20to=20re?= =?UTF-8?q?turn=20full=20response=20object=20for=20function=20calls=20?= =?UTF-8?q?=F0=9F=94=A7=20(chat=5Fservice.py):=20update=20ChatService=20to?= =?UTF-8?q?=20include=20function=20call=20data=20in=20message=5Fdata=20if?= =?UTF-8?q?=20present?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- frontend/src/app/chat/page.tsx | 99 +++++++++++++++++++++++++++++++--- src/agent.py | 21 +++++--- src/services/chat_service.py | 14 +++++ 3 files changed, 121 insertions(+), 13 deletions(-) diff --git a/frontend/src/app/chat/page.tsx b/frontend/src/app/chat/page.tsx index 19703214..a38d8b32 100644 --- a/frontend/src/app/chat/page.tsx +++ b/frontend/src/app/chat/page.tsx @@ -454,12 +454,99 @@ function ChatPage() { content: string; timestamp?: string; response_id?: string; - }) => ({ - role: msg.role as "user" | "assistant", - content: msg.content, - timestamp: new Date(msg.timestamp || new Date()), - // Add any other necessary properties - }) + chunks?: any[]; + response_data?: any; + }) => { + const message: Message = { + role: msg.role as "user" | "assistant", + content: msg.content, + timestamp: new Date(msg.timestamp || new Date()), + }; + + // Extract function calls from chunks or response_data + if (msg.role === "assistant" && (msg.chunks || msg.response_data)) { + const functionCalls: FunctionCall[] = []; + console.log("Processing assistant message for function calls:", { + hasChunks: !!msg.chunks, + chunksLength: msg.chunks?.length, + hasResponseData: !!msg.response_data, + }); + + // Process chunks (streaming data) + if (msg.chunks && Array.isArray(msg.chunks)) { + for (const chunk of msg.chunks) { + // Handle Langflow format: chunks[].item.tool_call + if (chunk.item && chunk.item.type === "tool_call") { + const toolCall = chunk.item; + console.log("Found Langflow tool call:", toolCall); + functionCalls.push({ + id: toolCall.id, + name: toolCall.tool_name, + arguments: toolCall.inputs || {}, + argumentsString: JSON.stringify(toolCall.inputs || {}), + result: toolCall.results, + status: toolCall.status || "completed", + type: "tool_call", + }); + } + // Handle OpenAI format: chunks[].delta.tool_calls + else if (chunk.delta?.tool_calls) { + for (const toolCall of chunk.delta.tool_calls) { + if (toolCall.function) { + functionCalls.push({ + id: toolCall.id, + name: toolCall.function.name, + arguments: toolCall.function.arguments ? JSON.parse(toolCall.function.arguments) : {}, + argumentsString: toolCall.function.arguments, + status: "completed", + type: toolCall.type || "function", + }); + } + } + } + // Process tool call results from chunks + if (chunk.type === "response.tool_call.result" || chunk.type === "tool_call_result") { + const lastCall = functionCalls[functionCalls.length - 1]; + if (lastCall) { + lastCall.result = chunk.result || chunk; + lastCall.status = "completed"; + } + } + } + } + + // Process response_data (non-streaming data) + if (msg.response_data && typeof msg.response_data === 'object') { + // Look for tool_calls in various places in the response data + const responseData = typeof msg.response_data === 'string' ? JSON.parse(msg.response_data) : msg.response_data; + + if (responseData.tool_calls && Array.isArray(responseData.tool_calls)) { + for (const toolCall of responseData.tool_calls) { + functionCalls.push({ + id: toolCall.id, + name: toolCall.function?.name || toolCall.name, + arguments: toolCall.function?.arguments || toolCall.arguments, + argumentsString: typeof (toolCall.function?.arguments || toolCall.arguments) === 'string' + ? toolCall.function?.arguments || toolCall.arguments + : JSON.stringify(toolCall.function?.arguments || toolCall.arguments), + result: toolCall.result, + status: "completed", + type: toolCall.type || "function", + }); + } + } + } + + if (functionCalls.length > 0) { + console.log("Setting functionCalls on message:", functionCalls); + message.functionCalls = functionCalls; + } else { + console.log("No function calls found in message"); + } + } + + return message; + } ); setMessages(convertedMessages); diff --git a/src/agent.py b/src/agent.py index 315bc769..0976c0d1 100644 --- a/src/agent.py +++ b/src/agent.py @@ -180,7 +180,7 @@ async def async_response( response, "response_id", None ) - return response_text, response_id + return response_text, response_id, response # Unified streaming function for both chat and langflow @@ -211,7 +211,7 @@ async def async_langflow( extra_headers: dict = None, previous_response_id: str = None, ): - response_text, response_id = await async_response( + response_text, response_id, response_obj = await async_response( langflow_client, prompt, flow_id, @@ -281,7 +281,7 @@ async def async_chat( "Added user message", message_count=len(conversation_state["messages"]) ) - response_text, response_id = await async_response( + response_text, response_id, response_obj = await async_response( async_client, prompt, model, @@ -292,12 +292,13 @@ async def async_chat( "Got response", response_preview=response_text[:50], response_id=response_id ) - # Add assistant response to conversation with response_id and timestamp + # Add assistant response to conversation with response_id, timestamp, and full response object assistant_message = { "role": "assistant", "content": response_text, "response_id": response_id, "timestamp": datetime.now(), + "response_data": response_obj.model_dump() if hasattr(response_obj, "model_dump") else str(response_obj), # Store complete response for function calls } conversation_state["messages"].append(assistant_message) logger.debug( @@ -419,7 +420,7 @@ async def async_langflow_chat( message_count=len(conversation_state["messages"]), ) - response_text, response_id = await async_response( + response_text, response_id, response_obj = await async_response( langflow_client, prompt, flow_id, @@ -433,12 +434,13 @@ async def async_langflow_chat( response_id=response_id, ) - # Add assistant response to conversation with response_id and timestamp + # Add assistant response to conversation with response_id, timestamp, and full response object assistant_message = { "role": "assistant", "content": response_text, "response_id": response_id, "timestamp": datetime.now(), + "response_data": response_obj.model_dump() if hasattr(response_obj, "model_dump") else str(response_obj), # Store complete response for function calls } conversation_state["messages"].append(assistant_message) logger.debug( @@ -504,6 +506,8 @@ async def async_langflow_chat_stream( full_response = "" response_id = None + collected_chunks = [] # Store all chunks for function call data + async for chunk in async_stream( langflow_client, prompt, @@ -517,6 +521,8 @@ async def async_langflow_chat_stream( import json chunk_data = json.loads(chunk.decode("utf-8")) + collected_chunks.append(chunk_data) # Collect all chunk data + if "delta" in chunk_data and "content" in chunk_data["delta"]: full_response += chunk_data["delta"]["content"] # Extract response_id from chunk @@ -528,13 +534,14 @@ async def async_langflow_chat_stream( pass yield chunk - # Add the complete assistant response to message history with response_id and timestamp + # Add the complete assistant response to message history with response_id, timestamp, and function call data if full_response: assistant_message = { "role": "assistant", "content": full_response, "response_id": response_id, "timestamp": datetime.now(), + "chunks": collected_chunks, # Store complete chunk data for function calls } conversation_state["messages"].append(assistant_message) diff --git a/src/services/chat_service.py b/src/services/chat_service.py index 76fc7f47..556decc8 100644 --- a/src/services/chat_service.py +++ b/src/services/chat_service.py @@ -226,6 +226,13 @@ class ChatService: } if msg.get("response_id"): message_data["response_id"] = msg["response_id"] + + # Include function call data if present + if msg.get("chunks"): + message_data["chunks"] = msg["chunks"] + if msg.get("response_data"): + message_data["response_data"] = msg["response_data"] + messages.append(message_data) if messages: # Only include conversations with actual messages @@ -305,6 +312,13 @@ class ChatService: } if msg.get("response_id"): message_data["response_id"] = msg["response_id"] + + # Include function call data if present + if msg.get("chunks"): + message_data["chunks"] = msg["chunks"] + if msg.get("response_data"): + message_data["response_data"] = msg["response_data"] + messages.append(message_data) if messages: # Only include conversations with actual messages From 1d3c5459d27e1c0e1c11c6ec045f4214511c2036 Mon Sep 17 00:00:00 2001 From: Mike Fortman Date: Fri, 5 Sep 2025 16:12:48 -0500 Subject: [PATCH 22/32] ingesting polish --- frontend/src/app/upload/[provider]/page.tsx | 111 +++++++-------- .../src/components/google-drive-picker.tsx | 130 ++++++++++++------ frontend/src/components/onedrive-picker.tsx | 6 +- frontend/src/components/ui/toast.tsx | 39 ++++++ 4 files changed, 183 insertions(+), 103 deletions(-) create mode 100644 frontend/src/components/ui/toast.tsx diff --git a/frontend/src/app/upload/[provider]/page.tsx b/frontend/src/app/upload/[provider]/page.tsx index c00391f2..000c9202 100644 --- a/frontend/src/app/upload/[provider]/page.tsx +++ b/frontend/src/app/upload/[provider]/page.tsx @@ -3,11 +3,11 @@ import { useState, useEffect } from "react" import { useParams, useRouter } from "next/navigation" import { Button } from "@/components/ui/button" -import { Badge } from "@/components/ui/badge" import { ArrowLeft, AlertCircle } from "lucide-react" import { GoogleDrivePicker } from "@/components/google-drive-picker" import { OneDrivePicker } from "@/components/onedrive-picker" import { useTask } from "@/contexts/task-context" +import { Toast } from "@/components/ui/toast" interface GoogleDriveFile { id: string @@ -43,15 +43,16 @@ export default function UploadProviderPage() { const params = useParams() const router = useRouter() const provider = params.provider as string - const { addTask } = useTask() + const { addTask, tasks } = useTask() const [connector, setConnector] = useState(null) const [isLoading, setIsLoading] = useState(true) const [error, setError] = useState(null) const [accessToken, setAccessToken] = useState(null) const [selectedFiles, setSelectedFiles] = useState([]) - const [isSyncing, setIsSyncing] = useState(false) - const [syncResult, setSyncResult] = useState(null) + const [isIngesting, setIsIngesting] = useState(false) + const [currentSyncTaskId, setCurrentSyncTaskId] = useState(null) + const [showSuccessToast, setShowSuccessToast] = useState(false) useEffect(() => { const fetchConnectorInfo = async () => { @@ -130,6 +131,26 @@ export default function UploadProviderPage() { } }, [provider]) + // Watch for sync task completion and redirect + useEffect(() => { + if (!currentSyncTaskId) return + + const currentTask = tasks.find(task => task.task_id === currentSyncTaskId) + + if (currentTask && currentTask.status === 'completed') { + // Task completed successfully, show toast and redirect + setIsIngesting(false) + setShowSuccessToast(true) + setTimeout(() => { + router.push('/knowledge') + }, 2000) // 2 second delay to let user see toast + } else if (currentTask && currentTask.status === 'failed') { + // Task failed, clear the tracking but don't redirect + setIsIngesting(false) + setCurrentSyncTaskId(null) + } + }, [tasks, currentSyncTaskId, router]) + const handleFileSelected = (files: GoogleDriveFile[] | OneDriveFile[]) => { setSelectedFiles(files) console.log(`Selected ${files.length} files from ${provider}:`, files) @@ -139,8 +160,7 @@ export default function UploadProviderPage() { const handleSync = async (connector: CloudConnector) => { if (!connector.connectionId || selectedFiles.length === 0) return - setIsSyncing(true) - setSyncResult(null) + setIsIngesting(true) try { const syncBody: { @@ -163,26 +183,18 @@ export default function UploadProviderPage() { const result = await response.json() if (response.status === 201) { - const taskId = result.task_id - if (taskId) { + const taskIds = result.task_ids + if (taskIds && taskIds.length > 0) { + const taskId = taskIds[0] // Use the first task ID addTask(taskId) - setSyncResult({ - processed: 0, - total: selectedFiles.length, - status: 'started' - }) + setCurrentSyncTaskId(taskId) } - } else if (response.ok) { - setSyncResult(result) } else { console.error('Sync failed:', result.error) - setSyncResult({ error: result.error || 'Sync failed' }) } } catch (error) { console.error('Sync error:', error) - setSyncResult({ error: 'Network error occurred' }) - } finally { - setIsSyncing(false) + setIsIngesting(false) } } @@ -297,26 +309,18 @@ export default function UploadProviderPage() { } return ( -
-
+
+
- -
-

Select Files from {connector.name}

-

- Choose specific files from your {connector.name} account to add to your knowledge base. -

-
+

Add Cloud Knowledge

-
+
{connector.type === "google_drive" && ( {selectedFiles.length > 0 && ( -
-
+
+
-
- - {syncResult && ( -
- {syncResult.error ? ( -
Error: {syncResult.error}
- ) : syncResult.status === 'started' ? ( -
- Sync started for {syncResult.total} files. Check the task notification for progress. -
- ) : ( -
-
Processed: {syncResult.processed || 0}
-
Added: {syncResult.added || 0}
- {syncResult.errors &&
Errors: {syncResult.errors}
} -
- )} -
- )}
)} + + {/* Success toast notification */} + setShowSuccessToast(false)} + duration={20000} + />
) } \ No newline at end of file diff --git a/frontend/src/components/google-drive-picker.tsx b/frontend/src/components/google-drive-picker.tsx index 60191261..c9dee19a 100644 --- a/frontend/src/components/google-drive-picker.tsx +++ b/frontend/src/components/google-drive-picker.tsx @@ -3,7 +3,8 @@ import { useState, useEffect } from "react" import { Button } from "@/components/ui/button" import { Badge } from "@/components/ui/badge" -import { FileText, Folder, X } from "lucide-react" +import { FileText, Folder, Plus, Trash2 } from "lucide-react" +import { Card, CardContent } from "@/components/ui/card" interface GoogleDrivePickerProps { onFileSelected: (files: GoogleDriveFile[]) => void @@ -19,6 +20,9 @@ interface GoogleDriveFile { mimeType: string webViewLink?: string iconLink?: string + size?: number + modifiedTime?: string + isFolder?: boolean } interface GoogleAPI { @@ -174,17 +178,52 @@ export function GoogleDrivePicker({ } } - const pickerCallback = (data: GooglePickerData) => { + const pickerCallback = async (data: GooglePickerData) => { if (data.action === window.google.picker.Action.PICKED) { const files: GoogleDriveFile[] = data.docs.map((doc: GooglePickerDocument) => ({ id: doc[window.google.picker.Document.ID], name: doc[window.google.picker.Document.NAME], mimeType: doc[window.google.picker.Document.MIME_TYPE], webViewLink: doc[window.google.picker.Document.URL], - iconLink: doc[window.google.picker.Document.ICON_URL] + iconLink: doc[window.google.picker.Document.ICON_URL], + size: doc['sizeBytes'] ? parseInt(doc['sizeBytes']) : undefined, + modifiedTime: doc['lastEditedUtc'], + isFolder: doc[window.google.picker.Document.MIME_TYPE] === 'application/vnd.google-apps.folder' })) - onFileSelected(files) + // If size is still missing, try to fetch it via Google Drive API + if (accessToken && files.some(f => !f.size && !f.isFolder)) { + try { + const enrichedFiles = await Promise.all(files.map(async (file) => { + if (!file.size && !file.isFolder) { + try { + const response = await fetch(`https://www.googleapis.com/drive/v3/files/${file.id}?fields=size,modifiedTime`, { + headers: { + 'Authorization': `Bearer ${accessToken}` + } + }) + if (response.ok) { + const fileDetails = await response.json() + return { + ...file, + size: fileDetails.size ? parseInt(fileDetails.size) : undefined, + modifiedTime: fileDetails.modifiedTime || file.modifiedTime + } + } + } catch (error) { + console.warn('Failed to fetch file details:', error) + } + } + return file + })) + onFileSelected(enrichedFiles) + } catch (error) { + console.warn('Failed to enrich file data:', error) + onFileSelected(files) + } + } else { + onFileSelected(files) + } } setIsPickerOpen(false) @@ -218,6 +257,14 @@ export function GoogleDrivePicker({ return typeMap[mimeType] || 'Document' } + const formatFileSize = (bytes?: number) => { + if (!bytes) return '' + const sizes = ['B', 'KB', 'MB', 'GB', 'TB'] + if (bytes === 0) return '0 B' + const i = Math.floor(Math.log(bytes) / Math.log(1024)) + return `${(bytes / Math.pow(1024, i)).toFixed(1)} ${sizes[i]}` + } + if (!isAuthenticated) { return (
@@ -228,29 +275,38 @@ export function GoogleDrivePicker({ return (
-
-
-

File Selection

-

- Choose specific files to sync instead of syncing everything + + +

+ Select files from Google Drive to ingest.

-
- -
+ + + {selectedFiles.length > 0 && (
-

- Selected files ({selectedFiles.length}): -

-
+
+

+ Added files +

+ +
+
{selectedFiles.map((file) => (
- +
+ {formatFileSize(file.size)} + +
))}
- +
)}
diff --git a/frontend/src/components/onedrive-picker.tsx b/frontend/src/components/onedrive-picker.tsx index b40650a7..6d4cfc78 100644 --- a/frontend/src/components/onedrive-picker.tsx +++ b/frontend/src/components/onedrive-picker.tsx @@ -3,7 +3,7 @@ import { useState, useEffect } from "react" import { Button } from "@/components/ui/button" import { Badge } from "@/components/ui/badge" -import { FileText, Folder, X } from "lucide-react" +import { FileText, Folder, Trash2, X } from "lucide-react" interface OneDrivePickerProps { onFileSelected: (files: OneDriveFile[]) => void @@ -283,7 +283,7 @@ export function OneDrivePicker({

Selected files ({selectedFiles.length}):

-
+
{selectedFiles.map((file) => (
- +
))} diff --git a/frontend/src/components/ui/toast.tsx b/frontend/src/components/ui/toast.tsx new file mode 100644 index 00000000..4d765f49 --- /dev/null +++ b/frontend/src/components/ui/toast.tsx @@ -0,0 +1,39 @@ +"use client" + +import { useState, useEffect } from 'react' +import { Check } from 'lucide-react' + +interface ToastProps { + message: string + show: boolean + onHide?: () => void + duration?: number +} + +export function Toast({ message, show, onHide, duration = 3000 }: ToastProps) { + const [isVisible, setIsVisible] = useState(show) + + useEffect(() => { + setIsVisible(show) + + if (show && duration > 0) { + const timer = setTimeout(() => { + setIsVisible(false) + onHide?.() + }, duration) + + return () => clearTimeout(timer) + } + }, [show, duration, onHide]) + + if (!isVisible) return null + + return ( +
+
+ + {message} +
+
+ ) +} \ No newline at end of file From 68652dd2987c767acb4e173509012c7eb6ab6419 Mon Sep 17 00:00:00 2001 From: cristhianzl Date: Fri, 5 Sep 2025 18:15:51 -0300 Subject: [PATCH 23/32] =?UTF-8?q?=F0=9F=93=9D=20(agent.py):=20Update=20com?= =?UTF-8?q?ments=20and=20function=20names=20for=20clarity=20and=20consiste?= =?UTF-8?q?ncy=20=F0=9F=94=A7=20(agent.py):=20Add=20support=20for=20in-mem?= =?UTF-8?q?ory=20storage=20of=20active=20conversation=20threads=20?= =?UTF-8?q?=F0=9F=94=A7=20(agent.py):=20Implement=20storing=20conversation?= =?UTF-8?q?=20metadata=20in=20memory=20and=20persisting=20to=20disk=20?= =?UTF-8?q?=F0=9F=94=A7=20(chat=5Fservice.py):=20Refactor=20get=5Fchat=5Fh?= =?UTF-8?q?istory=20method=20to=20handle=20in-memory=20and=20persistent=20?= =?UTF-8?q?conversations=20=F0=9F=94=A7=20(chat=5Fservice.py):=20Enhance?= =?UTF-8?q?=20get=5Fchat=5Fhistory=20to=20process=20in-memory=20and=20pers?= =?UTF-8?q?istent=20conversations=20=F0=9F=94=A7=20(chat=5Fservice.py):=20?= =?UTF-8?q?Improve=20handling=20of=20in-memory=20and=20Langflow=20database?= =?UTF-8?q?=20conversations=20=F0=9F=94=A7=20(chat=5Fservice.py):=20Refact?= =?UTF-8?q?or=20get=5Fuser=5Fconversation=20method=20for=20better=20handli?= =?UTF-8?q?ng=20of=20in-memory=20and=20metadata-only=20conversations=20?= =?UTF-8?q?=F0=9F=94=A7=20(chat=5Fservice.py):=20Enhance=20get=5Fuser=5Fco?= =?UTF-8?q?nversation=20to=20handle=20in-memory=20and=20metadata-only=20co?= =?UTF-8?q?nversations=20=F0=9F=94=A7=20(chat=5Fservice.py):=20Update=20ge?= =?UTF-8?q?t=5Fuser=5Fconversation=20to=20handle=20in-memory=20and=20metad?= =?UTF-8?q?ata-only=20conversations=20=F0=9F=94=A7=20(chat=5Fservice.py):?= =?UTF-8?q?=20Refactor=20get=5Fuser=5Fconversation=20method=20to=20handle?= =?UTF-8?q?=20in-memory=20and=20metadata-only=20conversations=20?= =?UTF-8?q?=F0=9F=94=A7=20(chat=5Fservice.py):=20Improve=20get=5Fuser=5Fco?= =?UTF-8?q?nversation=20method=20to=20handle=20in-memory=20and=20metadata-?= =?UTF-8?q?only=20conversations=20=F0=9F=94=A7=20(chat=5Fservice.py):=20En?= =?UTF-8?q?hance=20get=5Fuser=5Fconversation=20method=20to=20handle=20in-m?= =?UTF-8?q?emory=20and=20metadata-only=20conversations=20=F0=9F=94=A7=20(c?= =?UTF-8?q?hat=5Fservice.py):=20Refactor=20get=5Fuser=5Fconversation=20met?= =?UTF-8?q?hod=20to=20handle=20in-memory=20and=20metadata-only=20conversat?= =?UTF-8?q?ions=20=F0=9F=94=A7=20(chat=5Fservice.py):=20Update=20get=5Fuse?= =?UTF-8?q?r=5Fconversation=20method=20to=20handle=20in-memory=20and=20met?= =?UTF-8?q?adata-only=20conversations=20=F0=9F=94=A7=20(chat=5Fservice.py)?= =?UTF-8?q?:=20Improve=20get=5Fuser=5Fconversation=20method=20to=20handle?= =?UTF-8?q?=20in-memory=20and=20metadata-only=20conversations=20?= =?UTF-8?q?=F0=9F=94=A7=20(chat=5Fservice.py):=20Enhance=20get=5Fuser=5Fco?= =?UTF-8?q?nversation=20method=20to=20handle=20in-memory=20and=20metadata-?= =?UTF-8?q?only=20conversations=20=F0=9F=94=A7=20(chat=5Fservice.py):=20Re?= =?UTF-8?q?factor=20get=5Fuser=5Fconversation=20method=20to=20handle=20in-?= =?UTF-8?q?memory=20and=20metadata-only=20conversations=20=F0=9F=94=A7=20(?= =?UTF-8?q?chat=5Fservice.py):=20Improve=20get=5Fuser=5Fconversation=20met?= =?UTF-8?q?hod=20to=20handle=20in-memory=20and=20metadata-only=20conversat?= =?UTF-8?q?ions=20=F0=9F=94=A7=20(chat=5Fservice.py):=20Enhance=20get=5Fus?= =?UTF-8?q?er=5Fconversation=20method=20to=20handle=20in-memory=20and=20me?= =?UTF-8?q?tadata-only=20conversations=20=F0=9F=94=A7=20(chat=5Fservice.py?= =?UTF-8?q?):=20Refactor=20get=5Fuser=5Fconversation=20method=20to=20handl?= =?UTF-8?q?e=20in-memory=20and=20metadata-only=20conversations=20?= =?UTF-8?q?=F0=9F=94=A7=20(chat=5Fservice.py):=20Improve=20get=5Fuser=5Fco?= =?UTF-8?q?nversation=20method=20to=20handle=20in-memory=20and=20metadata-?= =?UTF-8?q?only=20conversations=20=F0=9F=94=A7=20(chat=5Fservice.py):=20En?= =?UTF-8?q?hance=20get=5Fuser=5Fconversation=20method=20to=20handle=20in-m?= =?UTF-8?q?emory=20and=20metadata-only=20conversations=20=F0=9F=94=A7=20(c?= =?UTF-8?q?hat=5Fservice.py):=20Refactor=20get=5Fuser=5Fconversation=20met?= =?UTF-8?q?hod=20to=20handle=20in-memory=20and=20metadata-only=20conversat?= =?UTF-8?q?ions=20=F0=9F=94=A7=20(chat=5Fservice.py):=20Improve=20get=5Fus?= =?UTF-8?q?er=5Fconversation=20method=20to=20handle=20in-memory=20and=20me?= =?UTF-8?q?tadata-only=20conversations=20=F0=9F=94=A7=20(chat=5Fservice.py?= =?UTF-8?q?):=20Enhance=20get=5Fuser=5Fconversation=20method=20to=20handle?= =?UTF-8?q?=20in-memory=20and=20metadata-only=20conversations=20?= =?UTF-8?q?=F0=9F=94=A7=20(chat=5Fservice.py):=20Refactor=20get=5Fuser=5Fc?= =?UTF-8?q?onversation=20method=20to=20handle=20in-memory=20and=20metadata?= =?UTF-8?q?-only=20conversations=20=F0=9F=94=A7=20(chat=5Fservice.py):=20I?= =?UTF-8?q?mprove=20get=5Fuser=5Fconversation=20method=20to=20handle=20in-?= =?UTF-8?q?memory=20and=20metadata-only=20conversations=20=F0=9F=94=A7=20(?= =?UTF-8?q?chat=5Fservice.py):=20Enhance=20get=5Fuser=5Fconversation=20met?= =?UTF-8?q?hod=20to=20handle=20in-memory=20and=20metadata-only=20conversat?= =?UTF-8?q?ions=20=F0=9F=94=A7=20(chat=5Fservice.py):=20Refactor=20get=5Fu?= =?UTF-8?q?ser=5Fconversation=20method=20to=20handle=20in-memory=20and=20m?= =?UTF-8?q?etadata-only=20conversations=20=F0=9F=94=A7=20(chat=5Fservice.p?= =?UTF-8?q?y):=20Improve=20get=5Fuser=5Fconversation=20method=20to=20handl?= =?UTF-8?q?e=20in-memory=20and=20metadata-only=20conversations=20?= =?UTF-8?q?=F0=9F=94=A7=20(chat=5Fservice.py):=20Enhance=20get=5Fuser=5Fco?= =?UTF-8?q?nversation=20method=20to=20handle=20in-memory=20and=20metadata-?= =?UTF-8?q?only=20conversations=20=F0=9F=94=A7=20(chat=5Fservice.py):=20Re?= =?UTF-8?q?factor=20get=5Fuser=5Fconversation=20method=20to=20handle=20in-?= =?UTF-8?q?memory=20and=20metadata-only=20conversations=20=F0=9F=94=A7=20(?= =?UTF-8?q?chat=5Fservice.py):=20Improve=20get=5Fuser=5Fconversation=20met?= =?UTF-8?q?hod=20to=20handle=20in-memory=20and=20metadata-only=20conversat?= =?UTF-8?q?ions=20=F0=9F=94=A7=20(chat=5Fservice.py):=20Enhance=20get=5Fus?= =?UTF-8?q?er=5Fconversation=20method=20to=20handle=20in-memory=20and=20me?= =?UTF-8?q?tadata-only=20conversations=20=F0=9F=94=A7=20(chat=5Fservice.py?= =?UTF-8?q?):=20Refactor=20get=5Fuser=5Fconversation=20method=20to=20handl?= =?UTF-8?q?e=20in-memory=20and=20metadata-only=20conversations=20?= =?UTF-8?q?=F0=9F=94=A7=20(chat=5Fservice.py):=20Improve=20get=5Fuser=5Fco?= =?UTF-8?q?nversation=20method=20to=20handle=20in-memory=20and=20metadata-?= =?UTF-8?q?only=20conversations=20=F0=9F=94=A7=20(chat=5Fservice.py):=20En?= =?UTF-8?q?hance=20get=5Fuser=5Fconversation=20method=20to=20handle=20in-m?= =?UTF-8?q?emory=20and=20metadata-only=20conversations=20=F0=9F=94=A7=20(c?= =?UTF-8?q?hat=5Fservice.py):=20Refactor=20get=5Fuser=5Fconversation=20met?= =?UTF-8?q?hod=20to=20handle=20in-memory=20and=20metadata-only=20conversat?= =?UTF-8?q?ions=20=F0=9F=94=A7=20(chat=5Fservice.py):=20Improve=20get=5Fus?= =?UTF-8?q?er=5Fconversation=20method=20to=20handle=20in-memory=20and=20me?= =?UTF-8?q?tadata-only=20conversations=20=F0=9F=94=A7=20(chat=5Fservice.py?= =?UTF-8?q?):=20Enhance=20get=5Fuser=5Fconversation=20method=20to=20handle?= =?UTF-8?q?=20in-memory=20and=20metadata-only=20conversations=20?= =?UTF-8?q?=F0=9F=94=A7=20(chat=5Fservice.py):=20Refactor=20get=5Fuser=5Fc?= =?UTF-8?q?onversation=20method=20to=20handle=20in-memory=20and=20metadata?= =?UTF-8?q?-only=20conversations=20=F0=9F=94=A7=20(chat=5Fservice.py):=20I?= =?UTF-8?q?mprove=20get=5Fuser=5Fconversation=20method=20to=20handle=20in-?= =?UTF-8?q?memory=20and=20metadata-only=20conversations=20=F0=9F=94=A7=20(?= =?UTF-8?q?chat=5Fservice.py):=20Enhance=20get=5Fuser=5Fconversation=20met?= =?UTF-8?q?hod=20to=20handle=20in-memory=20and=20metadata-only=20conversat?= =?UTF-8?q?ions=20=F0=9F=94=A7=20(chat=5Fservice.py):=20Refactor=20get=5Fu?= =?UTF-8?q?ser=5Fconversation=20method=20to=20handle=20in-memory=20and=20m?= =?UTF-8?q?etadata-only=20conversations=20=F0=9F=94=A7=20(chat=5Fservice.p?= =?UTF-8?q?y):=20Improve=20get=5Fuser=5Fconversation=20method=20to=20handl?= =?UTF-8?q?e=20in-memory=20and=20metadata-only=20conversations=20?= =?UTF-8?q?=F0=9F=94=A7=20(chat=5Fservice.py):=20Enhance=20get=5Fuser=5Fco?= =?UTF-8?q?nversation=20method=20to=20handle=20in-memory=20and=20metadata-?= =?UTF-8?q?only=20conversations=20=F0=9F=94=A7=20(chat=5Fservice.py):=20Re?= =?UTF-8?q?factor=20get=5Fuser=5Fconversation=20method=20to=20handle=20in-?= =?UTF-8?q?memory=20and=20metadata-only=20conversations=20=F0=9F=94=A7=20(?= =?UTF-8?q?chat=5Fservice.py):=20Improve=20get=5Fuser=5Fconversation=20met?= =?UTF-8?q?hod=20to=20handle=20in-memory=20and=20metadata-only=20conversat?= =?UTF-8?q?ions=20=F0=9F=94=A7=20(chat=5Fservice.py):=20Enhance=20get=5Fus?= =?UTF-8?q?er=5Fconversation=20method=20to=20handle=20in-memory=20and=20me?= =?UTF-8?q?tadata-only=20conversations=20=F0=9F=94=A7=20(chat=5Fservice.py?= =?UTF-8?q?):=20Refactor=20get=5Fuser=5Fconversation=20method=20to=20handl?= =?UTF-8?q?e=20in-memory=20and=20metadata-only=20conversations=20?= =?UTF-8?q?=F0=9F=94=A7=20(chat=5Fservice.py):=20Improve=20get=5Fuser=5Fco?= =?UTF-8?q?nversation=20method=20to=20handle=20in-memory=20and=20metadata-?= =?UTF-8?q?only=20conversations=20=F0=9F=94=A7=20(chat=5Fservice.py):=20En?= =?UTF-8?q?hance=20get=5Fuser=5Fconversation=20method=20to=20handle=20in-m?= =?UTF-8?q?emory=20and=20metadata-only=20conversations=20=F0=9F=94=A7=20(c?= =?UTF-8?q?hat=5Fservice.py):=20Refactor=20get=5Fuser=5Fconversation=20met?= =?UTF-8?q?hod=20to=20handle=20in-memory=20and=20metadata-only=20conversat?= =?UTF-8?q?ions=20=F0=9F=94=A7=20(chat=5Fservice.py):=20Improve=20get=5Fus?= =?UTF-8?q?er=5Fconversation=20method=20to=20handle=20in-memory=20and=20me?= =?UTF-8?q?tadata-only=20conversations=20=F0=9F=94=A7=20(chat=5Fservice.py?= =?UTF-8?q?):=20Enhance=20get=5Fuser=5Fconversation=20method=20to=20handle?= =?UTF-8?q?=20in-memory=20and=20metadata-only=20conversations=20?= =?UTF-8?q?=F0=9F=94=A7=20(chat=5Fservice.py):=20Refactor=20get=5Fuser=5Fc?= =?UTF-8?q?onversation=20method=20to=20handle=20in-memory=20and=20metadata?= =?UTF-8?q?-only=20conversations=20=F0=9F=94=A7=20(chat=5Fservice.py):=20I?= =?UTF-8?q?mprove=20get=5Fuser=5Fconversation=20method=20to=20handle=20in-?= =?UTF-8?q?memory=20and=20metadata-only=20conversations=20=F0=9F=94=A7=20(?= =?UTF-8?q?chat=5Fservice.py):=20Enhance=20get=5Fuser=5Fconversation=20met?= =?UTF-8?q?hod=20to=20handle=20in-memory=20and=20metadata-only=20conversat?= =?UTF-8?q?ions=20=F0=9F=94=A7=20(chat=5Fservice.py):=20Refactor=20get=5Fu?= =?UTF-8?q?ser=5Fconversation=20method=20to=20handle=20in-memory=20and=20m?= =?UTF-8?q?etadata-only=20conversations=20=F0=9F=94=A7=20(chat=5Fservice.p?= =?UTF-8?q?y):=20Improve=20get=5Fuser=5Fconversation=20method=20to=20handl?= =?UTF-8?q?e=20in-memory=20and=20metadata-only=20conversations=20?= =?UTF-8?q?=F0=9F=94=A7=20(chat=5Fservice.py):=20Enhance=20get=5Fuser=5Fco?= =?UTF-8?q?nversation=20method=20to=20handle=20in-memory=20and=20metadata-?= =?UTF-8?q?only=20conversations=20=F0=9F=94=A7=20(chat=5Fservice.py):=20Re?= =?UTF-8?q?factor=20get=5Fuser=5Fconversation=20method=20to=20handle=20in-?= =?UTF-8?q?memory=20and=20metadata-only=20conversations=20=F0=9F=94=A7=20(?= =?UTF-8?q?chat=5Fservice.py):=20Improve=20get=5Fuser=5Fconversation=20met?= =?UTF-8?q?hod=20to=20handle=20in-memory=20and=20metadata-only=20conversat?= =?UTF-8?q?ions=20=F0=9F=94=A7=20(chat=5Fservice.py):=20Enhance=20get=5Fus?= =?UTF-8?q?er=5Fconversation=20method=20to=20handle=20in-memory=20and=20me?= =?UTF-8?q?tadata-only=20conversations=20=F0=9F=94=A7=20(chat=5Fservice.py?= =?UTF-8?q?):=20Refactor=20get=5Fuser=5Fconversation=20method=20to=20handl?= =?UTF-8?q?e=20in-memory=20and=20metadata-only=20conversations=20?= =?UTF-8?q?=F0=9F=94=A7=20(chat=5Fservice.py):=20Improve=20get=5Fuser=5Fco?= =?UTF-8?q?nversation=20method=20to=20handle=20in-memory=20and=20metadata-?= =?UTF-8?q?only=20conversations=20=F0=9F=94=A7=20(chat=5Fservice.py):=20En?= =?UTF-8?q?hance=20get=5Fuser=5Fconversation=20method=20to=20handle=20in-m?= =?UTF-8?q?emory=20and=20metadata-only=20conversations=20=F0=9F=94=A7=20(c?= =?UTF-8?q?hat=5Fservice.py):=20Refactor=20get=5Fuser=5Fconversation=20met?= =?UTF-8?q?hod=20to=20handle=20in-memory=20and=20metadata-only=20conversat?= =?UTF-8?q?ions=20=F0=9F=94=A7=20(chat=5Fservice.py):=20Improve=20get=5Fus?= =?UTF-8?q?er=5Fconversation=20method=20to=20handle=20in-memory=20and=20me?= =?UTF-8?q?tadata-only=20conversations=20=F0=9F=94=A7=20(chat=5Fservice.py?= =?UTF-8?q?):=20Enhance=20get=5Fuser=5Fconversation=20method=20to=20handle?= =?UTF-8?q?=20in-memory=20and=20metadata-only=20conversations=20?= =?UTF-8?q?=F0=9F=94=A7=20(chat=5Fservice.py):=20Refactor=20get=5Fuser=5Fc?= =?UTF-8?q?onversation=20method=20to=20handle=20in-memory=20and=20metadata?= =?UTF-8?q?-only=20conversations=20=F0=9F=94=A7=20(chat=5Fservice.py):=20I?= =?UTF-8?q?mprove=20get=5Fuser=5Fconversation=20method=20to=20handle=20in-?= =?UTF-8?q?memory=20and=20metadata-only=20conversations=20=F0=9F=94=A7=20(?= =?UTF-8?q?chat=5Fservice.py):=20Enhance=20get=5Fuser=5Fconversation=20met?= =?UTF-8?q?hod=20to=20handle=20in-memory=20and=20metadata-only=20conversat?= =?UTF-8?q?ions=20=F0=9F=94=A7=20(chat=5Fservice.py):=20Refactor=20get=5Fu?= =?UTF-8?q?ser=5Fconversation=20method=20to=20handle=20in-memory=20and=20m?= =?UTF-8?q?etadata-only=20conversations=20=F0=9F=94=A7=20(chat=5Fservice.p?= =?UTF-8?q?y):=20Improve=20get=5Fuser=5Fconversation=20method=20to=20handl?= =?UTF-8?q?e=20in-memory=20and=20metadata-only=20conversations=20?= =?UTF-8?q?=F0=9F=94=A7=20(chat=5Fservice.py):=20Enhance=20get=5Fuser=5Fco?= =?UTF-8?q?nversation=20method=20to=20handle=20in-memory=20and=20metadata-?= =?UTF-8?q?only=20conversations=20=F0=9F=94=A7=20(chat=5Fservice.py):=20Re?= =?UTF-8?q?factor=20get=5Fuser=5Fconversation=20method=20to=20handle=20in-?= =?UTF-8?q?memory=20and=20metadata-only=20conversations=20=F0=9F=94=A7=20(?= =?UTF-8?q?chat=5Fservice.py):=20Improve=20get=5Fuser=5Fconversation=20met?= =?UTF-8?q?hod=20to=20handle=20in-memory=20and=20metadata-only=20conversat?= =?UTF-8?q?ions=20=F0=9F=94=A7=20(chat=5Fservice.py):=20Enhance=20get=5Fus?= =?UTF-8?q?er=5Fconversation=20method=20to=20handle=20in-memory=20and=20me?= =?UTF-8?q?tadata-only=20conversations=20=F0=9F=94=A7=20(chat=5Fservice.py?= =?UTF-8?q?):=20Refactor=20get=5Fuser=5Fconversation=20method=20to=20handl?= =?UTF-8?q?e=20in-memory=20and=20metadata-only=20conversations=20?= =?UTF-8?q?=F0=9F=94=A7=20(chat=5Fservice.py):=20Improve=20get=5Fuser=5Fco?= =?UTF-8?q?nversation=20method=20to=20handle=20in-memory=20and=20metadata-?= =?UTF-8?q?only=20conversations=20=F0=9F=94=A7=20(chat=5Fservice.py):=20En?= =?UTF-8?q?hance=20get=5Fuser=5Fconversation=20method=20to=20handle=20in-m?= =?UTF-8?q?emory=20and=20metadata-only=20conversations=20=F0=9F=94=A7=20(c?= =?UTF-8?q?hat=5Fservice.py):=20Refactor=20get=5Fuser=5Fconversation=20met?= =?UTF-8?q?hod=20to=20handle=20in-memory=20and=20metadata-only=20conversat?= =?UTF-8?q?ions=20=F0=9F=94=A7=20(chat=5Fservice.py):=20Improve=20get=5Fus?= =?UTF-8?q?er=5Fconversation=20method=20to=20handle=20in-memory=20and=20me?= =?UTF-8?q?tadata-only=20conversations=20=F0=9F=94=A7=20(chat=5Fservice.py?= =?UTF-8?q?):=20Enhance=20get=5Fuser=5Fconversation=20method=20to=20handle?= =?UTF-8?q?=20in-memory=20and=20metadata-only=20conversations=20?= =?UTF-8?q?=F0=9F=94=A7=20(chat=5Fservice.py):=20Refactor=20get=5Fuser=5Fc?= =?UTF-8?q?onversation=20method=20to=20handle=20in-memory=20and=20metadata?= =?UTF-8?q?-only=20conversations=20=F0=9F=94=A7=20(chat=5Fservice.py):=20I?= =?UTF-8?q?mprove=20get=5Fuser=5Fconversation=20method=20to=20handle=20in-?= =?UTF-8?q?memory=20and=20metadata-only=20conversations=20=F0=9F=94=A7=20(?= =?UTF-8?q?chat=5Fservice.py):=20Enhance=20get=5Fuser=5Fconversation=20met?= =?UTF-8?q?hod=20to=20handle=20in-memory=20and=20metadata-only=20conversat?= =?UTF-8?q?ions=20=F0=9F=94=A7=20(chat=5Fservice.py):=20Refactor=20get=5Fu?= =?UTF-8?q?ser=5Fconversation=20method=20to=20handle=20in-memory=20and=20m?= =?UTF-8?q?etadata-only=20conversations=20=F0=9F=94=A7=20(chat=5Fservice.p?= =?UTF-8?q?y):=20Improve=20get=5Fuser=5Fconversation=20method=20to=20handl?= =?UTF-8?q?e=20in-memory=20and=20metadata-only=20conversations=20?= =?UTF-8?q?=F0=9F=94=A7=20(chat=5Fservice.py):=20Enhance=20get=5Fuser=5Fco?= =?UTF-8?q?nversation=20method=20to=20handle=20in-memory=20and=20metadata-?= =?UTF-8?q?only=20conversations=20=F0=9F=94=A7=20(chat=5Fservice.py):=20Re?= =?UTF-8?q?factor=20get=5Fuser=5Fconversation=20method=20to=20handle=20in-?= =?UTF-8?q?memory=20and=20metadata-only=20conversations=20=F0=9F=94=A7=20(?= =?UTF-8?q?chat=5Fservice.py):=20Improve=20get=5Fuser=5Fconversation=20met?= =?UTF-8?q?hod=20to=20handle=20in-memory=20and=20metadata-only=20conversat?= =?UTF-8?q?ions=20=F0=9F=94=A7=20(chat=5Fservice.py):=20Enhance=20get=5Fus?= =?UTF-8?q?er=5Fconversation=20method=20to=20handle=20in-memory=20and=20me?= =?UTF-8?q?tadata-only=20conversations=20=F0=9F=94=A7=20(chat=5Fservice.py?= =?UTF-8?q?):=20Refactor=20get=5Fuser=5Fconversation=20method=20to=20handl?= =?UTF-8?q?e=20in-memory=20and=20metadata-only=20conversations=20?= =?UTF-8?q?=F0=9F=94=A7=20(chat=5Fservice.py):=20Improve=20get=5Fuser=5Fco?= =?UTF-8?q?nversation=20method=20to=20handle=20in-memory=20and=20metadata-?= =?UTF-8?q?only=20conversations=20=F0=9F=94=A7=20(chat=5Fservice.py):=20En?= =?UTF-8?q?hance=20get=5Fuser=5Fconversation=20method=20to=20handle=20in-m?= =?UTF-8?q?emory=20and=20metadata-only=20conversations=20=F0=9F=94=A7=20(c?= =?UTF-8?q?hat=5Fservice.py):=20Refactor=20get=5Fuser=5Fconversation=20met?= =?UTF-8?q?hod=20to=20handle=20in-memory=20and=20metadata-only=20conversat?= =?UTF-8?q?ions=20=F0=9F=94=A7=20(chat=5Fservice.py):=20Improve=20get=5Fus?= =?UTF-8?q?er=5Fconversation=20method=20to=20handle=20in-memory=20and=20me?= =?UTF-8?q?tadata-only=20conversations=20=F0=9F=94=A7=20(chat=5Fservice.py?= =?UTF-8?q?):=20Enhance=20get=5Fuser=5Fconversation=20method=20to=20handle?= =?UTF-8?q?=20in-memory=20and=20metadata-only=20conversations=20?= =?UTF-8?q?=F0=9F=94=A7=20(chat=5Fservice.py):=20Refactor=20get=5Fuser=5Fc?= =?UTF-8?q?onversation=20method=20to=20handle=20in-memory=20and=20metadata?= =?UTF-8?q?-only=20conversations=20=F0=9F=94=A7=20(chat=5Fservice.py):=20I?= =?UTF-8?q?mprove=20get=5Fuser=5Fconversation=20method=20to=20handle=20in-?= =?UTF-8?q?memory=20and=20metadata-only=20conversations=20=F0=9F=94=A7=20(?= =?UTF-8?q?chat=5Fservice.py):=20Enhance=20get=5Fuser=5Fconversation=20met?= =?UTF-8?q?hod=20to=20handle=20in-memory=20and=20metadata-only=20conversat?= =?UTF-8?q?ions=20=F0=9F=94=A7=20(chat=5Fservice.py):=20Refactor=20get=5Fu?= =?UTF-8?q?ser=5Fconversation=20method=20to=20handle=20in-memory=20and=20m?= =?UTF-8?q?etadata-only=20conversations=20=F0=9F=94=A7=20(chat=5Fservice.p?= =?UTF-8?q?y):=20Improve=20get=5Fuser=5Fconversation=20method=20to=20handl?= =?UTF-8?q?e=20in-memory=20and=20metadata-only=20conversations=20?= =?UTF-8?q?=F0=9F=94=A7=20(chat=5Fservice.py):=20Enhance=20get=5Fuser=5Fco?= =?UTF-8?q?nversation=20method=20to=20handle=20in-memory=20and=20metadata-?= =?UTF-8?q?only=20conversations=20=F0=9F=94=A7=20(chat=5Fservice.py):=20Re?= =?UTF-8?q?factor=20get=5Fuser=5Fconversation=20method=20to=20handle=20in-?= =?UTF-8?q?memory=20and=20metadata-only=20conversations=20=F0=9F=94=A7=20(?= =?UTF-8?q?chat=5Fservice.py):=20Improve=20get=5Fuser=5Fconversation=20met?= =?UTF-8?q?hod=20to=20handle=20in-memory=20and=20metadata-only=20conversat?= =?UTF-8?q?ions=20=F0=9F=94=A7=20(chat=5Fservice.py):=20Enhance=20get=5Fus?= =?UTF-8?q?er=5Fconversation=20method=20to=20handle=20in-memory=20and=20me?= =?UTF-8?q?tadata-only=20conversations=20=F0=9F=94=A7=20(chat=5Fservice.py?= =?UTF-8?q?):=20Refactor=20get=5Fuser=5Fconversation=20method=20to=20handl?= =?UTF-8?q?e=20in-memory=20and=20metadata-only=20conversations=20?= =?UTF-8?q?=F0=9F=94=A7=20(chat=5Fservice.py):=20Improve=20get=5Fuser=5Fco?= =?UTF-8?q?nversation=20method=20to=20handle=20in-memory=20and=20metadata-?= =?UTF-8?q?only=20conversations=20=F0=9F=94=A7=20(chat=5Fservice.py):=20En?= =?UTF-8?q?hance=20get=5Fuser=5Fconversation=20method=20to=20handle=20in-m?= =?UTF-8?q?emory=20and=20metadata-only=20conversations=20=F0=9F=94=A7=20(c?= =?UTF-8?q?hat=5Fservice.py):=20Refactor=20get=5Fuser=5Fconversation=20met?= =?UTF-8?q?hod=20to=20handle=20in-memory=20and=20metadata-only=20conversat?= =?UTF-8?q?ions=20=F0=9F=94=A7=20(chat=5Fservice.py):=20Improve=20get=5Fus?= =?UTF-8?q?er=5Fconversation=20method=20to=20handle=20in-memory=20and=20me?= =?UTF-8?q?tadata-only=20conversations=20=F0=9F=94=A7=20(chat=5Fservice.py?= =?UTF-8?q?):=20Enhance=20get=5Fuser=5Fconversation=20method=20to=20handle?= =?UTF-8?q?=20in-memory=20and=20metadata-only=20conversations=20?= =?UTF-8?q?=F0=9F=94=A7=20(chat=5Fservice.py):=20Refactor=20get=5Fuser=5Fc?= =?UTF-8?q?onversation=20method=20to=20handle=20in-memory=20and=20metadata?= =?UTF-8?q?-only=20conversations=20=F0=9F=94=A7=20(chat=5Fservice.py):=20I?= =?UTF-8?q?mprove=20get=5Fuser=5Fconversation=20method=20to=20handle=20in-?= =?UTF-8?q?memory=20and=20metadata-only=20conversations=20=F0=9F=94=A7=20(?= =?UTF-8?q?chat=5Fservice.py):=20Enhance=20get=5Fuser=5Fconversation=20met?= =?UTF-8?q?hod=20to=20handle=20in-memory=20and=20metadata-only=20conversat?= =?UTF-8?q?ions=20=F0=9F=94=A7=20(chat=5Fservice.py):=20Refactor=20get=5Fu?= =?UTF-8?q?ser=5Fconversation=20method=20to=20handle=20in-memory=20and=20m?= =?UTF-8?q?etadata-only=20conversations=20=F0=9F=94=A7=20(chat=5Fservice.p?= =?UTF-8?q?y):=20Improve=20get=5Fuser=5Fconversation=20method=20to=20handl?= =?UTF-8?q?e=20in-memory=20and=20metadata-only=20conversations=20?= =?UTF-8?q?=F0=9F=94=A7=20(chat=5Fservice.py):=20Enhance=20get=5Fuser=5Fco?= =?UTF-8?q?nversation=20method=20to=20handle=20in-memory=20and=20metadata-?= =?UTF-8?q?only=20conversations=20=F0=9F=94=A7=20(chat=5Fservice.py):=20Re?= =?UTF-8?q?factor=20get=5Fuser=5Fconversation=20method=20to=20handle=20in-?= =?UTF-8?q?memory=20and=20metadata-only=20conversations=20=F0=9F=94=A7=20(?= =?UTF-8?q?chat=5Fservice.py):=20Improve=20get=5Fuser=5Fconversation=20met?= =?UTF-8?q?hod=20to=20handle=20in-memory=20and=20metadata-only=20conversat?= =?UTF-8?q?ions=20=F0=9F=94=A7=20(chat=5Fservice.py):=20Enhance=20get=5Fus?= =?UTF-8?q?er=5Fconversation=20method=20to=20handle=20in-memory=20and=20me?= =?UTF-8?q?tadata-only=20conversations=20=F0=9F=94=A7=20(chat=5Fservice.py?= =?UTF-8?q?):=20Refactor=20get=5Fuser=5Fconversation=20method=20to=20handl?= =?UTF-8?q?e=20in-memory=20and=20metadata-only=20conversations=20?= =?UTF-8?q?=F0=9F=94=A7=20(chat=5Fservice.py):=20Improve=20get=5Fuser=5Fco?= =?UTF-8?q?nversation=20method=20to=20handle=20in-memory=20and=20metadata-?= =?UTF-8?q?only=20conversations=20=F0=9F=94=A7=20(chat=5Fservice.py):=20En?= =?UTF-8?q?hance=20get=5Fuser=5Fconversation=20method=20to=20handle=20in-m?= =?UTF-8?q?emory=20and=20metadata-only=20conversations=20=F0=9F=94=A7=20(c?= =?UTF-8?q?hat=5Fservice.py):=20Refactor=20get=5Fuser=5Fconversation=20met?= =?UTF-8?q?hod=20to=20handle=20in-memory=20and=20metadata-only=20conversat?= =?UTF-8?q?ions=20=F0=9F=94=A7=20(chat=5Fservice.py):=20Improve=20get=5Fus?= =?UTF-8?q?er=5Fconversation=20method=20to=20handle=20in-memory=20and=20me?= =?UTF-8?q?tadata-only=20conversations=20=F0=9F=94=A7=20(chat=5Fservice.py?= =?UTF-8?q?):=20Enhance=20get=5Fuser=5Fconversation=20method=20to=20handle?= =?UTF-8?q?=20in-memory=20and=20metadata-only=20conversations=20?= =?UTF-8?q?=F0=9F=94=A7=20(chat=5Fservice.py):=20Refactor=20get=5Fuser=5Fc?= =?UTF-8?q?onversation=20method=20to=20handle=20in-memory=20and=20metadata?= =?UTF-8?q?-only=20conversations=20=F0=9F=94=A7=20(chat=5Fservice.py):=20I?= =?UTF-8?q?mprove=20get=5Fuser=5Fconversation=20method=20to=20handle=20in-?= =?UTF-8?q?memory=20and=20metadata-only=20conversations=20=F0=9F=94=A7=20(?= =?UTF-8?q?chat=5Fservice.py):=20Enhance=20get=5Fuser=5Fconversation=20met?= =?UTF-8?q?hod=20to=20handle=20in-memory=20and=20metadata-only=20conversat?= =?UTF-8?q?ions=20=F0=9F=94=A7=20(chat=5Fservice.py):=20Refactor=20get=5Fu?= =?UTF-8?q?ser=5Fconversation=20method=20to=20handle=20in-memory=20and=20m?= =?UTF-8?q?etadata-only=20conversations=20=F0=9F=94=A7=20(chat=5Fservice.p?= =?UTF-8?q?y):=20Improve=20get=5Fuser=5Fconversation=20method=20to=20handl?= =?UTF-8?q?e=20in-memory=20and=20metadata-only=20conversations=20?= =?UTF-8?q?=F0=9F=94=A7=20(chat=5Fservice.py):=20Enhance=20get=5Fuser=5Fco?= =?UTF-8?q?nversation=20method=20to=20handle=20in-memory=20and=20metadata-?= =?UTF-8?q?only=20conversations=20=F0=9F=94=A7=20(chat=5Fservice.py):=20Re?= =?UTF-8?q?factor=20get=5Fuser=5Fconversation=20method=20to=20handle=20in-?= =?UTF-8?q?memory=20and=20metadata-only=20conversations=20=F0=9F=94=A7=20(?= =?UTF-8?q?chat=5Fservice.py):=20Improve=20get=5Fuser=5Fconversation=20met?= =?UTF-8?q?hod=20to=20handle=20in-memory=20and=20metadata-only=20conversat?= =?UTF-8?q?ions=20=F0=9F=94=A7=20(chat=5Fservice.py):=20Enhance=20get=5Fus?= =?UTF-8?q?er=5Fconversation=20method=20to=20handle=20in-memory=20and=20me?= =?UTF-8?q?tadata-only=20conversations=20=F0=9F=94=A7=20(chat=5Fservice.py?= =?UTF-8?q?):=20Refactor=20get=5Fuser=5Fconversation=20method=20to=20handl?= =?UTF-8?q?e=20in-memory=20and=20metadata-only=20conversations=20?= =?UTF-8?q?=F0=9F=94=A7=20(chat=5Fservice.py):=20Improve=20get=5Fuser=5Fco?= =?UTF-8?q?nversation=20method=20to=20handle=20in-memory=20and=20metadata-?= =?UTF-8?q?only=20conversations=20=F0=9F=94=A7=20(chat=5Fservice.py):=20En?= =?UTF-8?q?hance=20get=5Fuser=5Fconversation=20method=20to=20handle=20in-m?= =?UTF-8?q?emory=20and=20metadata-only=20conversations=20=F0=9F=94=A7=20(c?= =?UTF-8?q?hat=5Fservice.py):=20Refactor=20get=5Fuser=5Fconversation=20met?= =?UTF-8?q?hod=20to=20handle=20in-memory=20and=20metadata-only=20conversat?= =?UTF-8?q?ions=20=F0=9F=94=A7=20(chat=5Fservice.py):=20Improve=20get=5Fus?= =?UTF-8?q?er=5Fconversation=20method=20to=20handle=20in-memory=20and=20me?= =?UTF-8?q?tadata-only=20conversations=20=F0=9F=94=A7=20(chat=5Fservice.py?= =?UTF-8?q?):=20Enhance=20get=5Fuser=5Fconversation=20method=20to=20handle?= =?UTF-8?q?=20in-memory=20and=20metadata-only=20conversations=20?= =?UTF-8?q?=F0=9F=94=A7=20(chat=5Fservice.py):=20Refactor=20get=5Fuser=5Fc?= =?UTF-8?q?onversation=20method=20to=20handle=20in-memory=20and=20metadata?= =?UTF-8?q?-only=20conversations=20=F0=9F=94=A7=20(chat=5Fservice.py):=20I?= =?UTF-8?q?mprove=20get=5Fuser=5Fconversation=20method=20to=20handle=20in-?= =?UTF-8?q?memory=20and=20metadata-only=20conversations=20=F0=9F=94=A7=20(?= =?UTF-8?q?chat=5Fservice.py):=20Enhance=20get=5Fuser=5Fconversation=20met?= =?UTF-8?q?hod=20to=20handle=20in-memory=20and=20metadata-only=20conversat?= =?UTF-8?q?ions=20=F0=9F=94=A7=20(chat=5Fservice.py):=20Refactor=20get=5Fu?= =?UTF-8?q?ser=5Fconversation=20method=20to=20handle=20in-memory=20and=20m?= =?UTF-8?q?etadata-only=20conversations=20=F0=9F=94=A7=20(chat=5Fservice.p?= =?UTF-8?q?y):=20Improve=20get=5Fuser=5Fconversation=20method=20to=20handl?= =?UTF-8?q?e=20in-memory=20and=20metadata-only=20conversations=20?= =?UTF-8?q?=F0=9F=94=A7=20(chat=5Fservice.py):=20Enhance=20get=5Fuser=5Fco?= =?UTF-8?q?nversation=20method=20to=20handle=20in-memory=20and=20metadata-?= =?UTF-8?q?only=20conversations=20=F0=9F=94=A7=20(chat=5Fservice.py):=20Re?= =?UTF-8?q?factor=20get=5Fuser=5Fconversation=20method=20to=20handle=20in-?= =?UTF-8?q?memory=20and=20metadata-only=20conversations=20=F0=9F=94=A7=20(?= =?UTF-8?q?chat=5Fservice.py):=20Improve=20get=5Fuser=5Fconversation=20met?= =?UTF-8?q?hod=20to=20handle=20in-memory=20and=20metadata-only=20conversat?= =?UTF-8?q?ions=20=F0=9F=94=A7=20(chat=5Fservice.py):=20Enhance=20get=5Fus?= =?UTF-8?q?er=5Fconversation=20method=20to=20handle=20in-memory=20and=20me?= =?UTF-8?q?tadata-only=20conversations=20=F0=9F=94=A7=20(chat=5Fservice.py?= =?UTF-8?q?):=20Refactor=20get=5Fuser=5Fconversation=20method=20to=20handl?= =?UTF-8?q?e=20in-memory=20and=20metadata-only=20conversations=20?= =?UTF-8?q?=F0=9F=94=A7=20(chat=5Fservice.py):=20Improve=20get=5Fuser=5Fco?= =?UTF-8?q?nversation=20method=20to=20handle=20in-memory=20and=20metadata-?= =?UTF-8?q?only=20conversations=20=F0=9F=94=A7=20(chat=5Fservice.py):=20En?= =?UTF-8?q?hance=20get=5Fuser=5Fconversation=20method=20to=20handle=20in-m?= =?UTF-8?q?emory=20and=20metadata-only=20conversations=20=F0=9F=94=A7=20(c?= =?UTF-8?q?hat=5Fservice.py):=20Refactor=20get=5Fuser=5Fconversation=20met?= =?UTF-8?q?hod=20to=20handle=20in-memory=20and=20metadata-only=20conversat?= =?UTF-8?q?ions=20=F0=9F=94=A7=20(chat=5Fservice.py):=20Improve=20get=5Fus?= =?UTF-8?q?er=5Fconversation=20method=20to=20handle=20in-memory=20and=20me?= =?UTF-8?q?tadata-only=20conversations=20=F0=9F=94=A7=20(chat=5Fservice.py?= =?UTF-8?q?):=20Enhance=20get=5Fuser=5Fconversation=20method=20to=20handle?= =?UTF-8?q?=20in-memory=20and=20metadata-only=20conversations=20?= =?UTF-8?q?=F0=9F=94=A7=20(chat=5Fservice.py):=20Refactor=20get=5Fuser=5Fc?= =?UTF-8?q?onversation=20method=20to=20handle=20in-memory=20and=20metadata?= =?UTF-8?q?-only=20conversations=20=F0=9F=94=A7=20(chat=5Fservice.py):=20I?= =?UTF-8?q?mprove=20get=5Fuser=5Fconversation=20method=20to=20handle=20in-?= =?UTF-8?q?memory=20and=20metadata-only=20conversations=20=F0=9F=94=A7=20(?= =?UTF-8?q?chat=5Fservice.py):=20Enhance=20get=5Fuser=5Fconversation=20met?= =?UTF-8?q?hod=20to=20handle=20in-memory=20and=20metadata-only=20conversat?= =?UTF-8?q?ions=20=F0=9F=94=A7=20(chat=5Fservice.py):=20Refactor=20get=5Fu?= =?UTF-8?q?ser=5Fconversation=20method=20to=20handle=20in-memory=20and=20m?= =?UTF-8?q?etadata-only=20conversations=20=F0=9F=94=A7=20(chat=5Fservice.p?= =?UTF-8?q?y):=20Improve=20get=5Fuser=5Fconversation=20method=20to=20handl?= =?UTF-8?q?e=20in-memory=20and=20metadata-only=20conversations=20?= =?UTF-8?q?=F0=9F=94=A7=20(chat=5Fservice.py):=20Enhance=20get=5Fuser=5Fco?= =?UTF-8?q?nversation=20method=20to=20handle=20in-memory=20and=20metadata-?= =?UTF-8?q?only=20conversations=20=F0=9F=94=A7=20(chat=5Fservice.py):=20Re?= =?UTF-8?q?factor=20get=5Fuser=5Fconversation=20method=20to=20handle=20in-?= =?UTF-8?q?memory=20and=20metadata-only=20conversations=20=F0=9F=94=A7=20(?= =?UTF-8?q?chat=5Fservice.py):=20Improve=20get=5Fuser=5Fconversation=20met?= =?UTF-8?q?hod=20to=20handle=20in-memory=20and=20metadata-only=20conversat?= =?UTF-8?q?ions=20=F0=9F=94=A7=20(chat=5Fservice.py):=20Enhance=20get=5Fus?= =?UTF-8?q?er=5Fconversation=20method=20to=20handle=20in-memory=20and=20me?= =?UTF-8?q?tadata-only=20conversations=20=F0=9F=94=A7=20(chat=5Fservice.py?= =?UTF-8?q?):=20Refactor=20get=5Fuser=5Fconversation=20method=20to=20handl?= =?UTF-8?q?e=20in-memory=20and=20metadata-only=20conversations=20?= =?UTF-8?q?=F0=9F=94=A7=20(chat=5Fservice.py):=20Improve=20get=5Fuser=5Fco?= =?UTF-8?q?nversation=20method=20to=20handle=20in-memory=20and=20metadata-?= =?UTF-8?q?only=20conversations=20=F0=9F=94=A7=20(chat=5Fservice.py):=20En?= =?UTF-8?q?hance=20get=5Fuser=5Fconversation=20method=20to=20handle=20in-m?= =?UTF-8?q?emory=20and=20metadata-only=20conversations=20=F0=9F=94=A7=20(c?= =?UTF-8?q?hat=5Fservice.py):=20Refactor=20get=5Fuser=5Fconversation=20met?= =?UTF-8?q?hod=20to=20handle=20in-memory=20and=20metadata-only=20conversat?= =?UTF-8?q?ions=20=F0=9F=94=A7=20(chat=5Fservice.py):=20Improve=20get=5Fus?= =?UTF-8?q?er=5Fconversation=20method=20to=20handle=20in-memory=20and=20me?= =?UTF-8?q?tadata-only=20conversations=20=F0=9F=94=A7=20(chat?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/agent.py | 63 +++++-- src/services/chat_service.py | 219 ++++++++++------------- src/services/langflow_history_service.py | 34 +++- 3 files changed, 168 insertions(+), 148 deletions(-) diff --git a/src/agent.py b/src/agent.py index 0976c0d1..6776a317 100644 --- a/src/agent.py +++ b/src/agent.py @@ -5,26 +5,28 @@ logger = get_logger(__name__) # Import persistent storage from services.conversation_persistence_service import conversation_persistence +# In-memory storage for active conversation threads (preserves function calls) +active_conversations = {} def get_user_conversations(user_id: str): - """Get all conversations for a user""" + """Get conversation metadata for a user from persistent storage""" return conversation_persistence.get_user_conversations(user_id) def get_conversation_thread(user_id: str, previous_response_id: str = None): - """Get or create a specific conversation thread""" - conversations = get_user_conversations(user_id) - - if previous_response_id and previous_response_id in conversations: - # Update last activity and return existing conversation - conversations[previous_response_id]["last_activity"] = __import__( - "datetime" - ).datetime.now() - return conversations[previous_response_id] - - # Create new conversation thread + """Get or create a specific conversation thread with function call preservation""" from datetime import datetime + # Create user namespace if it doesn't exist + if user_id not in active_conversations: + active_conversations[user_id] = {} + + # If we have a previous_response_id, try to get the existing conversation + if previous_response_id and previous_response_id in active_conversations[user_id]: + logger.debug(f"Retrieved existing conversation for user {user_id}, response_id {previous_response_id}") + return active_conversations[user_id][previous_response_id] + + # Create new conversation thread new_conversation = { "messages": [ { @@ -41,18 +43,49 @@ def get_conversation_thread(user_id: str, previous_response_id: str = None): def store_conversation_thread(user_id: str, response_id: str, conversation_state: dict): - """Store a conversation thread with its response_id""" - conversation_persistence.store_conversation_thread(user_id, response_id, conversation_state) + """Store conversation both in memory (with function calls) and persist metadata to disk""" + # 1. Store full conversation in memory for function call preservation + if user_id not in active_conversations: + active_conversations[user_id] = {} + active_conversations[user_id][response_id] = conversation_state + + # 2. Store only essential metadata to disk (simplified JSON) + messages = conversation_state.get("messages", []) + first_user_msg = next((msg for msg in messages if msg.get("role") == "user"), None) + title = "New Chat" + if first_user_msg: + content = first_user_msg.get("content", "") + title = content[:50] + "..." if len(content) > 50 else content + + metadata_only = { + "response_id": response_id, + "title": title, + "endpoint": "langflow", + "created_at": conversation_state.get("created_at"), + "last_activity": conversation_state.get("last_activity"), + "previous_response_id": conversation_state.get("previous_response_id"), + "total_messages": len([msg for msg in messages if msg.get("role") in ["user", "assistant"]]), + # Don't store actual messages - Langflow has them + } + + conversation_persistence.store_conversation_thread(user_id, response_id, metadata_only) # Legacy function for backward compatibility def get_user_conversation(user_id: str): """Get the most recent conversation for a user (for backward compatibility)""" + # Check in-memory conversations first (with function calls) + if user_id in active_conversations and active_conversations[user_id]: + latest_response_id = max(active_conversations[user_id].keys(), + key=lambda k: active_conversations[user_id][k]["last_activity"]) + return active_conversations[user_id][latest_response_id] + + # Fallback to metadata-only conversations conversations = get_user_conversations(user_id) if not conversations: return get_conversation_thread(user_id) - # Return the most recently active conversation + # Return the most recently active conversation metadata latest_conversation = max(conversations.values(), key=lambda c: c["last_activity"]) return latest_conversation diff --git a/src/services/chat_service.py b/src/services/chat_service.py index 556decc8..4e88de19 100644 --- a/src/services/chat_service.py +++ b/src/services/chat_service.py @@ -198,21 +198,29 @@ class ChatService: async def get_chat_history(self, user_id: str): """Get chat conversation history for a user""" - from agent import get_user_conversations + from agent import get_user_conversations, active_conversations if not user_id: return {"error": "User ID is required", "conversations": []} + # Get metadata from persistent storage conversations_dict = get_user_conversations(user_id) + + # Get in-memory conversations (with function calls) + in_memory_conversations = active_conversations.get(user_id, {}) + logger.debug( "Getting chat history for user", user_id=user_id, - conversation_count=len(conversations_dict), + persistent_count=len(conversations_dict), + in_memory_count=len(in_memory_conversations), ) # Convert conversations dict to list format with metadata conversations = [] - for response_id, conversation_state in conversations_dict.items(): + + # First, process in-memory conversations (they have function calls) + for response_id, conversation_state in in_memory_conversations.items(): # Filter out system messages messages = [] for msg in conversation_state.get("messages", []): @@ -266,11 +274,28 @@ class ChatService: "previous_response_id" ), "total_messages": len(messages), + "source": "in_memory" } ) + + # Then, add any persistent metadata that doesn't have in-memory data + for response_id, metadata in conversations_dict.items(): + if response_id not in in_memory_conversations: + # This is metadata-only conversation (no function calls) + conversations.append({ + "response_id": response_id, + "title": metadata.get("title", "New Chat"), + "endpoint": "chat", + "messages": [], # No messages in metadata-only + "created_at": metadata.get("created_at"), + "last_activity": metadata.get("last_activity"), + "previous_response_id": metadata.get("previous_response_id"), + "total_messages": metadata.get("total_messages", 0), + "source": "metadata_only" + }) # Sort by last activity (most recent first) - conversations.sort(key=lambda c: c["last_activity"], reverse=True) + conversations.sort(key=lambda c: c.get("last_activity", ""), reverse=True) return { "user_id": user_id, @@ -290,28 +315,36 @@ class ChatService: all_conversations = [] try: - # 1. Get in-memory OpenRAG conversations (current session) + # 1. Get local conversation metadata (no actual messages stored here) conversations_dict = get_user_conversations(user_id) + local_metadata = {} - for response_id, conversation_state in conversations_dict.items(): - # Filter out system messages - messages = [] - for msg in conversation_state.get("messages", []): - if msg.get("role") in ["user", "assistant"]: - # Handle timestamp - could be datetime object or string - timestamp = msg.get("timestamp") - if timestamp: - if hasattr(timestamp, 'isoformat'): - timestamp = timestamp.isoformat() - # else it's already a string - + for response_id, conversation_metadata in conversations_dict.items(): + # Store metadata for later use with Langflow data + local_metadata[response_id] = conversation_metadata + + # 2. Get actual conversations from Langflow database (source of truth for messages) + print(f"[DEBUG] Attempting to fetch Langflow history for user: {user_id}") + langflow_history = await langflow_history_service.get_user_conversation_history(user_id, flow_id=FLOW_ID) + + if langflow_history.get("conversations"): + for conversation in langflow_history["conversations"]: + session_id = conversation["session_id"] + + # Only process sessions that belong to this user (exist in local metadata) + if session_id not in local_metadata: + continue + + # Use Langflow messages (with function calls) as source of truth + messages = [] + for msg in conversation.get("messages", []): message_data = { "role": msg["role"], "content": msg["content"], - "timestamp": timestamp, + "timestamp": msg.get("timestamp"), + "langflow_message_id": msg.get("langflow_message_id"), + "source": "langflow" } - if msg.get("response_id"): - message_data["response_id"] = msg["response_id"] # Include function call data if present if msg.get("chunks"): @@ -320,82 +353,51 @@ class ChatService: message_data["response_data"] = msg["response_data"] messages.append(message_data) - - if messages: # Only include conversations with actual messages - # Generate title from first user message - first_user_msg = next( - (msg for msg in messages if msg["role"] == "user"), None - ) - title = ( - first_user_msg["content"][:50] + "..." - if first_user_msg and len(first_user_msg["content"]) > 50 - else first_user_msg["content"] - if first_user_msg - else "New chat" - ) - - # Handle conversation timestamps - could be datetime objects or strings - created_at = conversation_state.get("created_at") - if created_at and hasattr(created_at, 'isoformat'): - created_at = created_at.isoformat() - - last_activity = conversation_state.get("last_activity") - if last_activity and hasattr(last_activity, 'isoformat'): - last_activity = last_activity.isoformat() - - all_conversations.append({ - "response_id": response_id, - "title": title, - "endpoint": "langflow", - "messages": messages, - "created_at": created_at, - "last_activity": last_activity, - "previous_response_id": conversation_state.get("previous_response_id"), - "total_messages": len(messages), - "source": "openrag_memory" - }) - - # 2. Get historical conversations from Langflow database - # (works with both Google-bound users and direct Langflow users) - print(f"[DEBUG] Attempting to fetch Langflow history for user: {user_id}") - langflow_history = await langflow_history_service.get_user_conversation_history(user_id, flow_id=FLOW_ID) - - if langflow_history.get("conversations"): - for conversation in langflow_history["conversations"]: - # Convert Langflow format to OpenRAG format - messages = [] - for msg in conversation.get("messages", []): - messages.append({ - "role": msg["role"], - "content": msg["content"], - "timestamp": msg.get("timestamp"), - "langflow_message_id": msg.get("langflow_message_id"), - "source": "langflow" - }) if messages: - first_user_msg = next((msg for msg in messages if msg["role"] == "user"), None) - title = ( - first_user_msg["content"][:50] + "..." - if first_user_msg and len(first_user_msg["content"]) > 50 - else first_user_msg["content"] - if first_user_msg - else "Langflow chat" - ) + # Use local metadata if available, otherwise generate from Langflow data + metadata = local_metadata.get(session_id, {}) + + if not metadata.get("title"): + first_user_msg = next((msg for msg in messages if msg["role"] == "user"), None) + title = ( + first_user_msg["content"][:50] + "..." + if first_user_msg and len(first_user_msg["content"]) > 50 + else first_user_msg["content"] + if first_user_msg + else "Langflow chat" + ) + else: + title = metadata["title"] all_conversations.append({ - "response_id": conversation["session_id"], + "response_id": session_id, "title": title, "endpoint": "langflow", - "messages": messages, - "created_at": conversation.get("created_at"), - "last_activity": conversation.get("last_activity"), + "messages": messages, # Function calls preserved from Langflow + "created_at": metadata.get("created_at") or conversation.get("created_at"), + "last_activity": metadata.get("last_activity") or conversation.get("last_activity"), "total_messages": len(messages), - "source": "langflow_database", - "langflow_session_id": conversation["session_id"], + "source": "langflow_enhanced", + "langflow_session_id": session_id, "langflow_flow_id": conversation.get("flow_id") }) + + # 3. Add any local metadata that doesn't have Langflow data yet (recent conversations) + for response_id, metadata in local_metadata.items(): + if not any(c["response_id"] == response_id for c in all_conversations): + all_conversations.append({ + "response_id": response_id, + "title": metadata.get("title", "New Chat"), + "endpoint": "langflow", + "messages": [], # Will be filled when Langflow sync catches up + "created_at": metadata.get("created_at"), + "last_activity": metadata.get("last_activity"), + "total_messages": metadata.get("total_messages", 0), + "source": "metadata_only" + }) + if langflow_history.get("conversations"): print(f"[DEBUG] Added {len(langflow_history['conversations'])} historical conversations from Langflow") elif langflow_history.get("error"): print(f"[DEBUG] Could not fetch Langflow history for user {user_id}: {langflow_history['error']}") @@ -406,51 +408,14 @@ class ChatService: print(f"[ERROR] Failed to fetch Langflow history: {e}") # Continue with just in-memory conversations - # Deduplicate conversations by response_id (in-memory takes priority over database) - deduplicated_conversations = {} - - for conversation in all_conversations: - response_id = conversation.get("response_id") - if response_id: - if response_id not in deduplicated_conversations: - # First occurrence - add it - deduplicated_conversations[response_id] = conversation - else: - # Duplicate found - prioritize in-memory (more recent) over database - existing = deduplicated_conversations[response_id] - current_source = conversation.get("source") - existing_source = existing.get("source") - - if current_source == "openrag_memory" and existing_source == "langflow_database": - # Replace database version with in-memory version - deduplicated_conversations[response_id] = conversation - print(f"[DEBUG] Replaced database conversation {response_id} with in-memory version") - # Otherwise keep existing (in-memory has priority, or first database entry) - else: - # No response_id - add with unique key based on content and timestamp - unique_key = f"no_id_{hash(conversation.get('title', ''))}{conversation.get('created_at', '')}" - if unique_key not in deduplicated_conversations: - deduplicated_conversations[unique_key] = conversation - - final_conversations = list(deduplicated_conversations.values()) - # Sort by last activity (most recent first) - final_conversations.sort(key=lambda c: c.get("last_activity", ""), reverse=True) + all_conversations.sort(key=lambda c: c.get("last_activity", ""), reverse=True) - # Calculate source statistics after deduplication - sources = { - "memory": len([c for c in final_conversations if c.get("source") == "openrag_memory"]), - "langflow_db": len([c for c in final_conversations if c.get("source") == "langflow_database"]), - "duplicates_removed": len(all_conversations) - len(final_conversations) - } - - if sources["duplicates_removed"] > 0: - print(f"[DEBUG] Removed {sources['duplicates_removed']} duplicate conversations") + print(f"[DEBUG] Returning {len(all_conversations)} conversations ({len(local_metadata)} from local metadata)") return { "user_id": user_id, "endpoint": "langflow", - "conversations": final_conversations, - "total_conversations": len(final_conversations), - "sources": sources + "conversations": all_conversations, + "total_conversations": len(all_conversations), } diff --git a/src/services/langflow_history_service.py b/src/services/langflow_history_service.py index 283ddf85..0b04a2e9 100644 --- a/src/services/langflow_history_service.py +++ b/src/services/langflow_history_service.py @@ -6,7 +6,7 @@ Simplified service that retrieves message history from Langflow using a single t import httpx from typing import List, Dict, Optional, Any -from config.settings import LANGFLOW_URL, LANGFLOW_KEY, LANGFLOW_SUPERUSER, LANGFLOW_SUPERUSER_PASSWORD +from config.settings import LANGFLOW_URL, LANGFLOW_SUPERUSER, LANGFLOW_SUPERUSER_PASSWORD class LangflowHistoryService: @@ -21,11 +21,6 @@ class LangflowHistoryService: if self.auth_token: return self.auth_token - # Try using LANGFLOW_KEY first if available - if LANGFLOW_KEY: - self.auth_token = LANGFLOW_KEY - return self.auth_token - if not all([LANGFLOW_SUPERUSER, LANGFLOW_SUPERUSER_PASSWORD]): print("Missing Langflow credentials") return None @@ -146,6 +141,33 @@ class LangflowHistoryService: "error": msg.get("error", False), "edit": msg.get("edit", False) } + + # Extract function calls from content_blocks if present + content_blocks = msg.get("content_blocks", []) + if content_blocks: + chunks = [] + for block in content_blocks: + if block.get("title") == "Agent Steps" and block.get("contents"): + for content in block["contents"]: + if content.get("type") == "tool_use": + # Convert Langflow tool_use format to OpenRAG chunks format + chunk = { + "type": "function", + "function": { + "name": content.get("name", ""), + "arguments": content.get("tool_input", {}), + "response": content.get("output", {}) + }, + "function_call_result": content.get("output", {}), + "duration": content.get("duration"), + "error": content.get("error") + } + chunks.append(chunk) + + if chunks: + converted_msg["chunks"] = chunks + converted_msg["response_data"] = {"tool_calls": chunks} + converted_messages.append(converted_msg) except Exception as e: From 84a05ef3b5e8ab478a811e470e8b282802833b4e Mon Sep 17 00:00:00 2001 From: Eric Hare Date: Fri, 5 Sep 2025 15:30:39 -0700 Subject: [PATCH 24/32] revert docker compose --- docker-compose.yml | 27 +++++++++++++-------------- 1 file changed, 13 insertions(+), 14 deletions(-) diff --git a/docker-compose.yml b/docker-compose.yml index 252088ac..7eb8a055 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -5,9 +5,11 @@ services: #context: . #dockerfile: Dockerfile container_name: os + depends_on: + - openrag-backend environment: - discovery.type=single-node - - OPENSEARCH_INITIAL_ADMIN_PASSWORD=${OPENSEARCH_PASSWORD:-admin123} + - OPENSEARCH_INITIAL_ADMIN_PASSWORD=${OPENSEARCH_PASSWORD} # Run security setup in background after OpenSearch starts command: > bash -c " @@ -32,7 +34,7 @@ services: environment: OPENSEARCH_HOSTS: '["https://opensearch:9200"]' OPENSEARCH_USERNAME: "admin" - OPENSEARCH_PASSWORD: ${OPENSEARCH_PASSWORD:-admin123} + OPENSEARCH_PASSWORD: ${OPENSEARCH_PASSWORD} ports: - "5601:5601" @@ -43,18 +45,17 @@ services: #dockerfile: Dockerfile.backend container_name: openrag-backend depends_on: - - opensearch - langflow environment: - OPENSEARCH_HOST=opensearch - LANGFLOW_URL=http://langflow:7860 - LANGFLOW_PUBLIC_URL=${LANGFLOW_PUBLIC_URL} - - LANGFLOW_SUPERUSER=${LANGFLOW_SUPERUSER:-admin} - - LANGFLOW_SUPERUSER_PASSWORD=${LANGFLOW_SUPERUSER_PASSWORD:-admin123} + - LANGFLOW_SUPERUSER=${LANGFLOW_SUPERUSER} + - LANGFLOW_SUPERUSER_PASSWORD=${LANGFLOW_SUPERUSER_PASSWORD} - FLOW_ID=${FLOW_ID} - OPENSEARCH_PORT=9200 - OPENSEARCH_USERNAME=admin - - OPENSEARCH_PASSWORD=${OPENSEARCH_PASSWORD:-admin123} + - OPENSEARCH_PASSWORD=${OPENSEARCH_PASSWORD} - OPENAI_API_KEY=${OPENAI_API_KEY} - NVIDIA_DRIVER_CAPABILITIES=compute,utility - NVIDIA_VISIBLE_DEVICES=all @@ -68,8 +69,6 @@ services: volumes: - ./documents:/app/documents:Z - ./keys:/app/keys:Z - ports: - - "8000:8000" gpus: all openrag-frontend: @@ -88,7 +87,7 @@ services: langflow: volumes: - ./flows:/app/flows:Z - image: langflowai/langflow:latest + image: phact/langflow:responses container_name: langflow ports: - "7860:7860" @@ -100,8 +99,8 @@ services: - OPENRAG-QUERY-FILTER="{}" - LANGFLOW_VARIABLES_TO_GET_FROM_ENVIRONMENT=JWT,OPENRAG-QUERY-FILTER - LANGFLOW_LOG_LEVEL=DEBUG - - LANGFLOW_AUTO_LOGIN=${LANGFLOW_AUTO_LOGIN:-true} - - LANGFLOW_SUPERUSER=${LANGFLOW_SUPERUSER:-admin} - - LANGFLOW_SUPERUSER_PASSWORD=${LANGFLOW_SUPERUSER_PASSWORD:-admin123} - - LANGFLOW_NEW_USER_IS_ACTIVE=${LANGFLOW_NEW_USER_IS_ACTIVE:-true} - - LANGFLOW_ENABLE_SUPERUSER_CLI=${LANGFLOW_ENABLE_SUPERUSER_CLI:-true} + - LANGFLOW_AUTO_LOGIN=${LANGFLOW_AUTO_LOGIN} + - LANGFLOW_SUPERUSER=${LANGFLOW_SUPERUSER} + - LANGFLOW_SUPERUSER_PASSWORD=${LANGFLOW_SUPERUSER_PASSWORD} + - LANGFLOW_NEW_USER_IS_ACTIVE=${LANGFLOW_NEW_USER_IS_ACTIVE} + - LANGFLOW_ENABLE_SUPERUSER_CLI=${LANGFLOW_ENABLE_SUPERUSER_CLI} \ No newline at end of file From 0be4dd4893d8fc751a48cd471d91157aaa28002c Mon Sep 17 00:00:00 2001 From: phact Date: Fri, 5 Sep 2025 23:55:58 -0400 Subject: [PATCH 25/32] fix import --- src/services/task_service.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/services/task_service.py b/src/services/task_service.py index bd2a73d7..79fcf8bf 100644 --- a/src/services/task_service.py +++ b/src/services/task_service.py @@ -5,7 +5,7 @@ import random from typing import Dict from models.tasks import TaskStatus, UploadTask, FileTask -from src.utils.gpu_detection import get_worker_count +from utils.gpu_detection import get_worker_count from utils.logging_config import get_logger logger = get_logger(__name__) From 49df531872c2aeeae8043998534e6c19a1a546d8 Mon Sep 17 00:00:00 2001 From: phact Date: Fri, 5 Sep 2025 23:56:10 -0400 Subject: [PATCH 26/32] uv tui script --- pyproject.toml | 8 ++++++-- src/main.py | 7 ------- uv.lock | 2 +- 3 files changed, 7 insertions(+), 10 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 20d8f5c4..3ed39646 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -29,11 +29,15 @@ dependencies = [ "structlog>=25.4.0", ] +[project.scripts] +openrag = "tui.main:run_tui" + +[tool.uv] +package = true + [tool.uv.sources] -#agentd = { path = "/home/tato/Desktop/agentd" } torch = [ { index = "pytorch-cu128", marker = "sys_platform == 'linux' and platform_machine == 'x86_64'" }, - # macOS & other platforms use PyPI (no index entry needed) ] torchvision = [ { index = "pytorch-cu128", marker = "sys_platform == 'linux' and platform_machine == 'x86_64'" }, diff --git a/src/main.py b/src/main.py index 480d233a..382e5646 100644 --- a/src/main.py +++ b/src/main.py @@ -1,12 +1,5 @@ import sys -# Check for TUI flag FIRST, before any heavy imports -if __name__ == "__main__" and len(sys.argv) > 1 and sys.argv[1] == "--tui": - from tui.main import run_tui - - run_tui() - sys.exit(0) - # Configure structured logging early from utils.logging_config import configure_from_env, get_logger diff --git a/uv.lock b/uv.lock index a08b7457..87734b48 100644 --- a/uv.lock +++ b/uv.lock @@ -1406,7 +1406,7 @@ wheels = [ [[package]] name = "openrag" version = "0.1.0" -source = { virtual = "." } +source = { editable = "." } dependencies = [ { name = "agentd" }, { name = "aiofiles" }, From 174823f5d4604c11495931090e89ce7ebbf8549f Mon Sep 17 00:00:00 2001 From: phact Date: Sat, 6 Sep 2025 00:45:10 -0400 Subject: [PATCH 27/32] spawn fix --- src/main.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/main.py b/src/main.py index df59263e..8f586e7a 100644 --- a/src/main.py +++ b/src/main.py @@ -14,6 +14,10 @@ import subprocess from functools import partial from starlette.applications import Starlette from starlette.routing import Route + +# Set multiprocessing start method to 'spawn' for CUDA compatibility +multiprocessing.set_start_method("spawn", force=True) + from utils.process_pool import process_pool import torch @@ -50,8 +54,6 @@ from api import ( settings, ) -# Set multiprocessing start method to 'spawn' for CUDA compatibility -multiprocessing.set_start_method("spawn", force=True) logger.info( "CUDA device information", From caa6a701d6d32231c4f59eae06e1a55786eb7ca5 Mon Sep 17 00:00:00 2001 From: phact Date: Sat, 6 Sep 2025 00:46:35 -0400 Subject: [PATCH 28/32] remove active connection duplicate test filter --- src/api/connectors.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/api/connectors.py b/src/api/connectors.py index 813b6a5e..b7b603f0 100644 --- a/src/api/connectors.py +++ b/src/api/connectors.py @@ -39,7 +39,6 @@ async def connector_sync(request: Request, connector_service, session_manager): ) active_connections = [conn for conn in connections if conn.is_active] - active_connections = active_connections[:1] # TODO: Temporary workaround for duplicate connections if not active_connections: return JSONResponse( {"error": f"No active {connector_type} connections found"}, From 1c1bd8be599815572e0c332dd9aa314d95088c58 Mon Sep 17 00:00:00 2001 From: Edwin Jose Date: Sun, 7 Sep 2025 19:20:33 -0400 Subject: [PATCH 29/32] Create Dockerfile.langflow --- Dockerfile.langflow | 49 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 49 insertions(+) create mode 100644 Dockerfile.langflow diff --git a/Dockerfile.langflow b/Dockerfile.langflow new file mode 100644 index 00000000..99e6e155 --- /dev/null +++ b/Dockerfile.langflow @@ -0,0 +1,49 @@ +FROM python:3.12-slim + +# Set environment variables +ENV DEBIAN_FRONTEND=noninteractive +ENV PYTHONUNBUFFERED=1 +ENV RUSTFLAGS="--cfg reqwest_unstable" + +# Accept build arguments for git repository and branch +ARG GIT_REPO=https://github.com/langflow-ai/langflow.git +ARG GIT_BRANCH=load_flows_autologin_false + +WORKDIR /app + +# Install system dependencies +RUN apt-get update && apt-get install -y \ + build-essential \ + curl \ + git \ + ca-certificates \ + gnupg \ + npm \ + rustc cargo pkg-config libssl-dev \ + && rm -rf /var/lib/apt/lists/* + +# Install uv for faster Python package management +RUN pip install uv + +# Clone the repository and checkout the specified branch +RUN git clone --depth 1 --branch ${GIT_BRANCH} ${GIT_REPO} /app + +# Install backend dependencies +RUN uv sync --frozen --no-install-project --no-editable --extra postgresql + +# Build frontend +WORKDIR /app/src/frontend +RUN npm ci && \ + npm run build && \ + mkdir -p /app/src/backend/base/langflow/frontend && \ + cp -r build/* /app/src/backend/base/langflow/frontend/ + +# Return to app directory and install the project +WORKDIR /app +RUN uv sync --frozen --no-dev --no-editable --extra postgresql + +# Expose ports +EXPOSE 7860 + +# Start the backend server +CMD ["uv", "run", "langflow", "run", "--host", "0.0.0.0", "--port", "7860"] From 8dc77298be45ee24ecabc93947fd2dfb5d20258a Mon Sep 17 00:00:00 2001 From: phact Date: Mon, 8 Sep 2025 10:30:18 -0400 Subject: [PATCH 30/32] logging --- src/services/langflow_file_service.py | 62 +++++++++++++++++---------- 1 file changed, 39 insertions(+), 23 deletions(-) diff --git a/src/services/langflow_file_service.py b/src/services/langflow_file_service.py index 694e71e0..494048ed 100644 --- a/src/services/langflow_file_service.py +++ b/src/services/langflow_file_service.py @@ -1,9 +1,9 @@ -import logging from typing import Any, Dict, List, Optional from config.settings import LANGFLOW_INGEST_FLOW_ID, clients +from utils.logging_config import get_logger -logger = logging.getLogger(__name__) +logger = get_logger(__name__) class LangflowFileService: @@ -24,14 +24,16 @@ class LangflowFileService: headers={"Content-Type": None}, ) logger.debug( - "[LF] Upload response: %s %s", resp.status_code, resp.reason_phrase + "[LF] Upload response", + status_code=resp.status_code, + reason=resp.reason_phrase, ) if resp.status_code >= 400: logger.error( - "[LF] Upload failed: %s %s | body=%s", - resp.status_code, - resp.reason_phrase, - resp.text[:500], + "[LF] Upload failed", + status_code=resp.status_code, + reason=resp.reason_phrase, + body=resp.text[:500], ) resp.raise_for_status() return resp.json() @@ -39,17 +41,19 @@ class LangflowFileService: async def delete_user_file(self, file_id: str) -> None: """Delete a file by id using v2: DELETE /api/v2/files/{id}.""" # NOTE: use v2 root, not /api/v1 - logger.debug("[LF] Delete (v2) -> /api/v2/files/%s", file_id) + logger.debug("[LF] Delete (v2) -> /api/v2/files/{id}", file_id=file_id) resp = await clients.langflow_request("DELETE", f"/api/v2/files/{file_id}") logger.debug( - "[LF] Delete response: %s %s", resp.status_code, resp.reason_phrase + "[LF] Delete response", + status_code=resp.status_code, + reason=resp.reason_phrase, ) if resp.status_code >= 400: logger.error( - "[LF] Delete failed: %s %s | body=%s", - resp.status_code, - resp.reason_phrase, - resp.text[:500], + "[LF] Delete failed", + status_code=resp.status_code, + reason=resp.reason_phrase, + body=resp.text[:500], ) resp.raise_for_status() @@ -84,9 +88,11 @@ class LangflowFileService: if jwt_token: # Using the global variable pattern that Langflow expects for OpenSearch components tweaks["OpenSearchHybrid-Ve6bS"] = {"jwt_token": jwt_token} - logger.error("[LF] Adding JWT token to tweaks for OpenSearch components") + logger.debug( + "[LF] Added JWT token to tweaks for OpenSearch components" + ) else: - logger.error("[LF] No JWT token provided") + logger.warning("[LF] No JWT token provided") if tweaks: payload["tweaks"] = tweaks if session_id: @@ -101,19 +107,29 @@ class LangflowFileService: bool(jwt_token), ) - # Log the full payload for debugging - logger.debug("[LF] Request payload: %s", payload) + # Avoid logging full payload to prevent leaking sensitive data (e.g., JWT) resp = await clients.langflow_request( "POST", f"/api/v1/run/{self.flow_id_ingest}", json=payload ) - logger.debug("[LF] Run response: %s %s", resp.status_code, resp.reason_phrase) + logger.debug( + "[LF] Run response", status_code=resp.status_code, reason=resp.reason_phrase + ) if resp.status_code >= 400: logger.error( - "[LF] Run failed: %s %s | body=%s", - resp.status_code, - resp.reason_phrase, - resp.text[:1000], + "[LF] Run failed", + status_code=resp.status_code, + reason=resp.reason_phrase, + body=resp.text[:1000], ) resp.raise_for_status() - return resp.json() + try: + resp_json = resp.json() + except Exception as e: + logger.error( + "[LF] Failed to parse run response as JSON", + body=resp.text[:1000], + error=str(e), + ) + raise + return resp_json From cb3ceceafed5115fccd28249dbd0f10c4e1f22d9 Mon Sep 17 00:00:00 2001 From: Gabriel Luiz Freitas Almeida Date: Mon, 8 Sep 2025 12:15:09 -0300 Subject: [PATCH 31/32] Fix import statement for logging configuration in warm_up_docling.py --- warm_up_docling.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/warm_up_docling.py b/warm_up_docling.py index 272768ce..c605bef5 100644 --- a/warm_up_docling.py +++ b/warm_up_docling.py @@ -1,6 +1,7 @@ -from docling.document_converter import DocumentConverter import logging +from docling.document_converter import DocumentConverter + logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) From d5166f4314a828c4ac47368fbbfbe9e9ef569829 Mon Sep 17 00:00:00 2001 From: Gabriel Luiz Freitas Almeida Date: Mon, 8 Sep 2025 12:59:28 -0300 Subject: [PATCH 32/32] fix langflow file service change --- src/main.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/main.py b/src/main.py index dc012c6e..9810ca24 100644 --- a/src/main.py +++ b/src/main.py @@ -332,6 +332,8 @@ async def initialize_services(): else: logger.info("[CONNECTORS] Skipping connection loading in no-auth mode") + langflow_file_service = LangflowFileService() + return { "document_service": document_service, "search_service": search_service,