diff --git a/.dockerignore b/.dockerignore index 4c49bd78..8e0ed179 100644 --- a/.dockerignore +++ b/.dockerignore @@ -1 +1,2 @@ .env +.drive.json diff --git a/.gitignore b/.gitignore index 32f3d866..827b1ed4 100644 --- a/.gitignore +++ b/.gitignore @@ -13,3 +13,4 @@ wheels/ .idea/ 1001*.pdf +*.json diff --git a/frontend/components/navigation.tsx b/frontend/components/navigation.tsx index b8219b8d..bc18f511 100644 --- a/frontend/components/navigation.tsx +++ b/frontend/components/navigation.tsx @@ -2,7 +2,7 @@ import Link from "next/link" import { usePathname } from "next/navigation" -import { Search, Settings, MessageCircle } from "lucide-react" +import { Search, Settings, MessageCircle, PlugZap } from "lucide-react" import { cn } from "@/lib/utils" export function Navigation() { @@ -27,6 +27,12 @@ export function Navigation() { href: "/chat", active: pathname === "/chat", }, + { + label: "Connectors", + icon: PlugZap, + href: "/connectors", + active: pathname.startsWith("/connectors"), + }, ] return ( diff --git a/frontend/package-lock.json b/frontend/package-lock.json index 84d2f8e6..c53a7cc4 100644 --- a/frontend/package-lock.json +++ b/frontend/package-lock.json @@ -8,6 +8,8 @@ "name": "frontend", "version": "0.1.0", "dependencies": { + "@radix-ui/react-avatar": "^1.1.10", + "@radix-ui/react-dropdown-menu": "^2.1.15", "@radix-ui/react-label": "^2.1.7", "@radix-ui/react-navigation-menu": "^1.2.13", "@radix-ui/react-select": "^2.2.5", @@ -1056,6 +1058,33 @@ } } }, + "node_modules/@radix-ui/react-avatar": { + "version": "1.1.10", + "resolved": "https://registry.npmjs.org/@radix-ui/react-avatar/-/react-avatar-1.1.10.tgz", + "integrity": "sha512-V8piFfWapM5OmNCXTzVQY+E1rDa53zY+MQ4Y7356v4fFz6vqCyUtIz2rUD44ZEdwg78/jKmMJHj07+C/Z/rcog==", + "license": "MIT", + "dependencies": { + "@radix-ui/react-context": "1.1.2", + "@radix-ui/react-primitive": "2.1.3", + "@radix-ui/react-use-callback-ref": "1.1.1", + "@radix-ui/react-use-is-hydrated": "0.1.0", + "@radix-ui/react-use-layout-effect": "1.1.1" + }, + "peerDependencies": { + "@types/react": "*", + "@types/react-dom": "*", + "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc", + "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" + }, + "peerDependenciesMeta": { + "@types/react": { + "optional": true + }, + "@types/react-dom": { + "optional": true + } + } + }, "node_modules/@radix-ui/react-collection": { "version": "1.1.7", "resolved": "https://registry.npmjs.org/@radix-ui/react-collection/-/react-collection-1.1.7.tgz", @@ -1154,6 +1183,35 @@ } } }, + "node_modules/@radix-ui/react-dropdown-menu": { + "version": "2.1.15", + "resolved": "https://registry.npmjs.org/@radix-ui/react-dropdown-menu/-/react-dropdown-menu-2.1.15.tgz", + "integrity": "sha512-mIBnOjgwo9AH3FyKaSWoSu/dYj6VdhJ7frEPiGTeXCdUFHjl9h3mFh2wwhEtINOmYXWhdpf1rY2minFsmaNgVQ==", + "license": "MIT", + "dependencies": { + "@radix-ui/primitive": "1.1.2", + "@radix-ui/react-compose-refs": "1.1.2", + "@radix-ui/react-context": "1.1.2", + "@radix-ui/react-id": "1.1.1", + "@radix-ui/react-menu": "2.1.15", + "@radix-ui/react-primitive": "2.1.3", + "@radix-ui/react-use-controllable-state": "1.2.2" + }, + "peerDependencies": { + "@types/react": "*", + "@types/react-dom": "*", + "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc", + "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" + }, + "peerDependenciesMeta": { + "@types/react": { + "optional": true + }, + "@types/react-dom": { + "optional": true + } + } + }, "node_modules/@radix-ui/react-focus-guards": { "version": "1.1.2", "resolved": "https://registry.npmjs.org/@radix-ui/react-focus-guards/-/react-focus-guards-1.1.2.tgz", @@ -1235,6 +1293,46 @@ } } }, + "node_modules/@radix-ui/react-menu": { + "version": "2.1.15", + "resolved": "https://registry.npmjs.org/@radix-ui/react-menu/-/react-menu-2.1.15.tgz", + "integrity": "sha512-tVlmA3Vb9n8SZSd+YSbuFR66l87Wiy4du+YE+0hzKQEANA+7cWKH1WgqcEX4pXqxUFQKrWQGHdvEfw00TjFiew==", + "license": "MIT", + "dependencies": { + "@radix-ui/primitive": "1.1.2", + "@radix-ui/react-collection": "1.1.7", + "@radix-ui/react-compose-refs": "1.1.2", + "@radix-ui/react-context": "1.1.2", + "@radix-ui/react-direction": "1.1.1", + "@radix-ui/react-dismissable-layer": "1.1.10", + "@radix-ui/react-focus-guards": "1.1.2", + "@radix-ui/react-focus-scope": "1.1.7", + "@radix-ui/react-id": "1.1.1", + "@radix-ui/react-popper": "1.2.7", + "@radix-ui/react-portal": "1.1.9", + "@radix-ui/react-presence": "1.1.4", + "@radix-ui/react-primitive": "2.1.3", + "@radix-ui/react-roving-focus": "1.1.10", + "@radix-ui/react-slot": "1.2.3", + "@radix-ui/react-use-callback-ref": "1.1.1", + "aria-hidden": "^1.2.4", + "react-remove-scroll": "^2.6.3" + }, + "peerDependencies": { + "@types/react": "*", + "@types/react-dom": "*", + "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc", + "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" + }, + "peerDependenciesMeta": { + "@types/react": { + "optional": true + }, + "@types/react-dom": { + "optional": true + } + } + }, "node_modules/@radix-ui/react-navigation-menu": { "version": "1.2.13", "resolved": "https://registry.npmjs.org/@radix-ui/react-navigation-menu/-/react-navigation-menu-1.2.13.tgz", @@ -1374,6 +1472,37 @@ } } }, + "node_modules/@radix-ui/react-roving-focus": { + "version": "1.1.10", + "resolved": "https://registry.npmjs.org/@radix-ui/react-roving-focus/-/react-roving-focus-1.1.10.tgz", + "integrity": "sha512-dT9aOXUen9JSsxnMPv/0VqySQf5eDQ6LCk5Sw28kamz8wSOW2bJdlX2Bg5VUIIcV+6XlHpWTIuTPCf/UNIyq8Q==", + "license": "MIT", + "dependencies": { + "@radix-ui/primitive": "1.1.2", + "@radix-ui/react-collection": "1.1.7", + "@radix-ui/react-compose-refs": "1.1.2", + "@radix-ui/react-context": "1.1.2", + "@radix-ui/react-direction": "1.1.1", + "@radix-ui/react-id": "1.1.1", + "@radix-ui/react-primitive": "2.1.3", + "@radix-ui/react-use-callback-ref": "1.1.1", + "@radix-ui/react-use-controllable-state": "1.2.2" + }, + "peerDependencies": { + "@types/react": "*", + "@types/react-dom": "*", + "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc", + "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" + }, + "peerDependenciesMeta": { + "@types/react": { + "optional": true + }, + "@types/react-dom": { + "optional": true + } + } + }, "node_modules/@radix-ui/react-select": { "version": "2.2.5", "resolved": "https://registry.npmjs.org/@radix-ui/react-select/-/react-select-2.2.5.tgz", @@ -1534,6 +1663,24 @@ } } }, + "node_modules/@radix-ui/react-use-is-hydrated": { + "version": "0.1.0", + "resolved": "https://registry.npmjs.org/@radix-ui/react-use-is-hydrated/-/react-use-is-hydrated-0.1.0.tgz", + "integrity": "sha512-U+UORVEq+cTnRIaostJv9AGdV3G6Y+zbVd+12e18jQ5A3c0xL03IhnHuiU4UV69wolOQp5GfR58NW/EgdQhwOA==", + "license": "MIT", + "dependencies": { + "use-sync-external-store": "^1.5.0" + }, + "peerDependencies": { + "@types/react": "*", + "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" + }, + "peerDependenciesMeta": { + "@types/react": { + "optional": true + } + } + }, "node_modules/@radix-ui/react-use-layout-effect": { "version": "1.1.1", "resolved": "https://registry.npmjs.org/@radix-ui/react-use-layout-effect/-/react-use-layout-effect-1.1.1.tgz", @@ -7096,6 +7243,15 @@ } } }, + "node_modules/use-sync-external-store": { + "version": "1.5.0", + "resolved": "https://registry.npmjs.org/use-sync-external-store/-/use-sync-external-store-1.5.0.tgz", + "integrity": "sha512-Rb46I4cGGVBmjamjphe8L/UnvJD+uPPtTkNvX5mZgqdbavhI4EbgIWJiIHXJ8bc/i9EQGPRh4DwEURJ552Do0A==", + "license": "MIT", + "peerDependencies": { + "react": "^16.8.0 || ^17.0.0 || ^18.0.0 || ^19.0.0" + } + }, "node_modules/util-deprecate": { "version": "1.0.2", "resolved": "https://registry.npmjs.org/util-deprecate/-/util-deprecate-1.0.2.tgz", diff --git a/frontend/package.json b/frontend/package.json index 4d0463b9..69201d67 100644 --- a/frontend/package.json +++ b/frontend/package.json @@ -9,6 +9,8 @@ "lint": "next lint" }, "dependencies": { + "@radix-ui/react-avatar": "^1.1.10", + "@radix-ui/react-dropdown-menu": "^2.1.15", "@radix-ui/react-label": "^2.1.7", "@radix-ui/react-navigation-menu": "^1.2.13", "@radix-ui/react-select": "^2.2.5", diff --git a/frontend/src/app/admin/page.tsx b/frontend/src/app/admin/page.tsx index 6588f230..b4255159 100644 --- a/frontend/src/app/admin/page.tsx +++ b/frontend/src/app/admin/page.tsx @@ -6,8 +6,9 @@ import { Card, CardContent, CardDescription, CardHeader, CardTitle } from "@/com import { Input } from "@/components/ui/input" import { Label } from "@/components/ui/label" import { Upload, FolderOpen, Loader2 } from "lucide-react" +import { ProtectedRoute } from "@/components/protected-route" -export default function AdminPage() { +function AdminPage() { const [fileUploadLoading, setFileUploadLoading] = useState(false) const [pathUploadLoading, setPathUploadLoading] = useState(false) const [selectedFile, setSelectedFile] = useState(null) @@ -259,4 +260,12 @@ export default function AdminPage() { ) +} + +export default function ProtectedAdminPage() { + return ( + + + + ) } \ No newline at end of file diff --git a/frontend/src/app/auth/callback/page.tsx b/frontend/src/app/auth/callback/page.tsx new file mode 100644 index 00000000..22d3c25f --- /dev/null +++ b/frontend/src/app/auth/callback/page.tsx @@ -0,0 +1,169 @@ +"use client" + +import { useEffect, useState, useRef } from "react" +import { useRouter, useSearchParams } from "next/navigation" +import { Card, CardContent, CardDescription, CardHeader, CardTitle } from "@/components/ui/card" +import { Button } from "@/components/ui/button" +import { Loader2, CheckCircle, XCircle, ArrowLeft } from "lucide-react" +import { useAuth } from "@/contexts/auth-context" + +export default function AuthCallbackPage() { + const router = useRouter() + const searchParams = useSearchParams() + const { refreshAuth } = useAuth() + const [status, setStatus] = useState<"processing" | "success" | "error">("processing") + const [error, setError] = useState(null) + const hasProcessed = useRef(false) + + useEffect(() => { + // Prevent double execution in React Strict Mode + if (hasProcessed.current) return + hasProcessed.current = true + + const handleCallback = async () => { + try { + // Get parameters from URL + const code = searchParams.get('code') + const state = searchParams.get('state') + const errorParam = searchParams.get('error') + + // Get stored auth info + const connectorId = localStorage.getItem('connecting_connector_id') + const storedConnectorType = localStorage.getItem('connecting_connector_type') + const authPurpose = localStorage.getItem('auth_purpose') + + // Debug logging + console.log('Auth Callback Debug:', { + urlParams: { code: !!code, state: !!state, error: errorParam }, + localStorage: { connectorId, storedConnectorType, authPurpose }, + fullUrl: window.location.href + }) + + // Use state parameter as connection_id if localStorage is missing + const finalConnectorId = connectorId || state + + if (errorParam) { + throw new Error(`OAuth error: ${errorParam}`) + } + + if (!code || !state || !finalConnectorId) { + console.error('Missing auth callback parameters:', { + code: !!code, + state: !!state, + finalConnectorId: !!finalConnectorId + }) + throw new Error('Missing required parameters for OAuth callback') + } + + // Send callback data to backend + const response = await fetch('/api/auth/callback', { + method: 'POST', + headers: { + 'Content-Type': 'application/json', + }, + body: JSON.stringify({ + connection_id: finalConnectorId, + authorization_code: code, + state: state + }), + }) + + const result = await response.json() + + if (response.ok && result.purpose === 'app_auth') { + setStatus("success") + + // Refresh auth context to pick up the new user + await refreshAuth() + + // Clean up localStorage + localStorage.removeItem('connecting_connector_id') + localStorage.removeItem('connecting_connector_type') + localStorage.removeItem('auth_purpose') + + // Get redirect URL from login page + const redirectTo = searchParams.get('redirect') || '/' + + // Redirect to the original page or home + setTimeout(() => { + router.push(redirectTo) + }, 2000) + } else { + throw new Error(result.error || 'Authentication failed') + } + + } catch (err) { + console.error('Auth callback error:', err) + setError(err instanceof Error ? err.message : 'Unknown error occurred') + setStatus("error") + + // Clean up localStorage on error too + localStorage.removeItem('connecting_connector_id') + localStorage.removeItem('connecting_connector_type') + localStorage.removeItem('auth_purpose') + } + } + + handleCallback() + }, [searchParams, router, refreshAuth]) + + return ( +
+ + + + {status === "processing" && ( + <> + + Signing you in... + + )} + {status === "success" && ( + <> + + Welcome to GenDB! + + )} + {status === "error" && ( + <> + + Sign In Failed + + )} + + + {status === "processing" && "Please wait while we complete your sign in..."} + {status === "success" && "You will be redirected shortly."} + {status === "error" && "There was an issue signing you in."} + + + + {status === "error" && ( +
+
+

{error}

+
+ +
+ )} + {status === "success" && ( +
+
+

+ Redirecting you to the app... +

+
+
+ )} +
+
+
+ ) +} \ No newline at end of file diff --git a/frontend/src/app/chat/page.tsx b/frontend/src/app/chat/page.tsx index 5b193e30..3e2153df 100644 --- a/frontend/src/app/chat/page.tsx +++ b/frontend/src/app/chat/page.tsx @@ -5,6 +5,7 @@ import { Button } from "@/components/ui/button" import { Card, CardContent, CardDescription, CardHeader, CardTitle } from "@/components/ui/card" import { Input } from "@/components/ui/input" import { MessageCircle, Send, Loader2, User, Bot, Zap, Settings, ChevronDown, ChevronRight, Upload } from "lucide-react" +import { ProtectedRoute } from "@/components/protected-route" interface Message { role: "user" | "assistant" @@ -43,7 +44,7 @@ interface RequestBody { previous_response_id?: string } -export default function ChatPage() { +function ChatPage() { const [messages, setMessages] = useState([]) const [input, setInput] = useState("") const [loading, setLoading] = useState(false) @@ -1104,4 +1105,12 @@ export default function ChatPage() { ) +} + +export default function ProtectedChatPage() { + return ( + + + + ) } \ No newline at end of file diff --git a/frontend/src/app/connectors/callback/page.tsx b/frontend/src/app/connectors/callback/page.tsx new file mode 100644 index 00000000..f0b5cf1a --- /dev/null +++ b/frontend/src/app/connectors/callback/page.tsx @@ -0,0 +1,203 @@ +"use client" + +import { useEffect, useState } from "react" +import { useRouter, useSearchParams } from "next/navigation" +import { Card, CardContent, CardDescription, CardHeader, CardTitle } from "@/components/ui/card" +import { Button } from "@/components/ui/button" +import { Loader2, CheckCircle, XCircle, ArrowLeft } from "lucide-react" +import { useAuth } from "@/contexts/auth-context" + +export default function ConnectorCallbackPage() { + const router = useRouter() + const searchParams = useSearchParams() + const { refreshAuth } = useAuth() + const [status, setStatus] = useState<"processing" | "success" | "error">("processing") + const [error, setError] = useState(null) + const [connectorType, setConnectorType] = useState(null) + const [isAppAuth, setIsAppAuth] = useState(false) + + useEffect(() => { + const handleCallback = async () => { + try { + // Get parameters from URL + const code = searchParams.get('code') + const state = searchParams.get('state') + const errorParam = searchParams.get('error') + + // Get stored connector info + const connectorId = localStorage.getItem('connecting_connector_id') + const storedConnectorType = localStorage.getItem('connecting_connector_type') + const authPurpose = localStorage.getItem('auth_purpose') + + // Debug logging + console.log('OAuth Callback Debug:', { + urlParams: { code: !!code, state: !!state, error: errorParam }, + localStorage: { connectorId, storedConnectorType, authPurpose }, + fullUrl: window.location.href + }) + + // Use state parameter as connection_id if localStorage is missing + const finalConnectorId = connectorId || state + const finalConnectorType = storedConnectorType || 'app_auth' + const finalAuthPurpose = authPurpose || 'app_auth' + + setConnectorType(finalConnectorType) + setIsAppAuth(finalAuthPurpose === 'app_auth' || finalConnectorType === 'app_auth') + + if (errorParam) { + throw new Error(`OAuth error: ${errorParam}`) + } + + if (!code || !state || !finalConnectorId) { + console.error('Missing OAuth callback parameters:', { + code: !!code, + state: !!state, + finalConnectorId: !!finalConnectorId + }) + throw new Error('Missing required parameters for OAuth callback') + } + + // Send callback data to backend + const response = await fetch('/api/auth/callback', { + method: 'POST', + headers: { + 'Content-Type': 'application/json', + }, + body: JSON.stringify({ + connection_id: finalConnectorId, + authorization_code: code, + state: state + }), + }) + + const result = await response.json() + + if (response.ok) { + setStatus("success") + + if (result.purpose === 'app_auth' || isAppAuth) { + // App authentication - refresh auth context and redirect to home + await refreshAuth() + + // Clean up localStorage + localStorage.removeItem('connecting_connector_id') + localStorage.removeItem('connecting_connector_type') + localStorage.removeItem('auth_purpose') + + // Redirect to home page after app login + setTimeout(() => { + router.push('/') + }, 2000) + } else { + // Connector authentication - redirect to connectors page + // Clean up localStorage + localStorage.removeItem('connecting_connector_id') + localStorage.removeItem('connecting_connector_type') + localStorage.removeItem('auth_purpose') + + // Redirect to connectors page after a short delay + setTimeout(() => { + router.push('/connectors?oauth_success=true') + }, 2000) + } + } else { + throw new Error(result.error || 'Authentication failed') + } + + } catch (err) { + console.error('OAuth callback error:', err) + setError(err instanceof Error ? err.message : 'Unknown error occurred') + setStatus("error") + + // Clean up localStorage on error too + localStorage.removeItem('connecting_connector_id') + localStorage.removeItem('connecting_connector_type') + localStorage.removeItem('auth_purpose') + } + } + + handleCallback() + }, [searchParams, router, refreshAuth]) + + const getTitle = () => { + if (status === "processing") { + return isAppAuth ? "Signing In..." : "Connecting..." + } + if (status === "success") { + return isAppAuth ? "Sign In Successful!" : "Connection Successful!" + } + if (status === "error") { + return isAppAuth ? "Sign In Failed" : "Connection Failed" + } + } + + const getDescription = () => { + if (status === "processing") { + return isAppAuth ? "Please wait while we sign you in..." : "Please wait while we complete the connection..." + } + if (status === "success") { + return "You will be redirected shortly." + } + if (status === "error") { + return isAppAuth ? "There was an issue signing you in." : "There was an issue with the connection." + } + } + + return ( +
+ + + + {status === "processing" && ( + <> + + {getTitle()} + + )} + {status === "success" && ( + <> + + {getTitle()} + + )} + {status === "error" && ( + <> + + {getTitle()} + + )} + + + {getDescription()} + + + + {status === "error" && ( +
+
+

{error}

+
+ +
+ )} + {status === "success" && ( +
+
+

+ {isAppAuth ? 'Redirecting to home...' : 'Redirecting to connectors...'} +

+
+
+ )} +
+
+
+ ) +} \ No newline at end of file diff --git a/frontend/src/app/connectors/page.tsx b/frontend/src/app/connectors/page.tsx new file mode 100644 index 00000000..a9a996e4 --- /dev/null +++ b/frontend/src/app/connectors/page.tsx @@ -0,0 +1,629 @@ +"use client" + +import { useState, useEffect } from "react" +import { useSearchParams } from "next/navigation" +import { Button } from "@/components/ui/button" +import { Card, CardContent, CardDescription, CardHeader, CardTitle } from "@/components/ui/card" +import { Badge } from "@/components/ui/badge" +import { Input } from "@/components/ui/input" +import { Label } from "@/components/ui/label" +import { Loader2, PlugZap, CheckCircle, XCircle, RefreshCw, FileText, Download, AlertCircle } from "lucide-react" +import { useAuth } from "@/contexts/auth-context" +import { ProtectedRoute } from "@/components/protected-route" + +interface Connector { + id: string + name: string + description: string + icon: React.ReactNode + status: "not_connected" | "connecting" | "connected" | "error" + type: string + connectionId?: string // Store the active connection ID for syncing +} + +interface ConnectorStatus { + authenticated: boolean + status: string + connections: Array<{ + connection_id: string + name: string + is_active: boolean + created_at: string + last_sync?: string + }> +} + +function ConnectorsPage() { + const { user, isAuthenticated } = useAuth() + const searchParams = useSearchParams() + const [connectors, setConnectors] = useState([ + { + id: "google_drive", + name: "Google Drive", + description: "Connect your Google Drive to automatically sync documents", + icon:
G
, + status: "not_connected", + type: "google_drive" + }, + // Future connectors can be added here + // { + // id: "dropbox", + // name: "Dropbox", + // description: "Connect your Dropbox to automatically sync documents", + // icon:
D
, + // status: "not_connected", + // type: "dropbox" + // } + ]) + + const [isConnecting, setIsConnecting] = useState(null) + const [isSyncing, setIsSyncing] = useState(null) + const [syncResults, setSyncResults] = useState<{ [key: string]: any }>({}) + const [syncProgress, setSyncProgress] = useState<{ [key: string]: any }>({}) + const [maxFiles, setMaxFiles] = useState(10) + + // Function definitions first + const checkConnectorStatuses = async () => { + for (const connector of connectors) { + try { + const response = await fetch(`/api/connectors/status/${connector.type}`) + if (response.ok) { + const status: ConnectorStatus = await response.json() + const isConnected = status.authenticated + + // Find the first active connection to use for syncing + const activeConnection = status.connections?.find(conn => conn.is_active) + + setConnectors(prev => prev.map(c => + c.id === connector.id + ? { + ...c, + status: isConnected ? "connected" : "not_connected", + connectionId: activeConnection?.connection_id + } + : c + )) + } + } catch (error) { + console.error(`Failed to check status for ${connector.name}:`, error) + } + } + } + + const refreshConnectorStatus = async (connectorId: string) => { + const connector = connectors.find(c => c.id === connectorId) + if (!connector) return + + try { + const response = await fetch(`/api/connectors/status/${connector.type}`) + if (response.ok) { + const status: ConnectorStatus = await response.json() + const isConnected = status.authenticated + + // Find the first active connection to use for syncing + const activeConnection = status.connections?.find(conn => conn.is_active) + + setConnectors(prev => prev.map(c => + c.id === connectorId + ? { + ...c, + status: isConnected ? "connected" : "not_connected", + connectionId: activeConnection?.connection_id + } + : c + )) + } + } catch (error) { + console.error(`Failed to refresh status for ${connector.name}:`, error) + } + } + + const handleConnect = async (connector: Connector) => { + setIsConnecting(connector.id) + setConnectors(prev => prev.map(c => + c.id === connector.id ? { ...c, status: "connecting" } : c + )) + + try { + // Frontend determines the correct redirect URI using its own origin + const redirectUri = `${window.location.origin}/connectors/callback` + + const response = await fetch('/api/auth/init', { + method: 'POST', + headers: { + 'Content-Type': 'application/json', + }, + body: JSON.stringify({ + provider: connector.type.replace('_drive', ''), // "google_drive" -> "google" + purpose: "data_source", + name: `${connector.name} Connection`, + redirect_uri: redirectUri + }), + }) + + const result = await response.json() + + if (response.ok) { + // Store connector ID for callback + localStorage.setItem('connecting_connector_id', result.connection_id) + localStorage.setItem('connecting_connector_type', connector.type) + + // Handle client-side OAuth with Google's library + if (result.oauth_config) { + // Use the redirect URI provided by the backend + const authUrl = `${result.oauth_config.authorization_endpoint}?` + + `client_id=${result.oauth_config.client_id}&` + + `response_type=code&` + + `scope=${result.oauth_config.scopes.join(' ')}&` + + `redirect_uri=${encodeURIComponent(result.oauth_config.redirect_uri)}&` + + `access_type=offline&` + + `prompt=consent&` + + `state=${result.connection_id}` + + window.location.href = authUrl + } + } else { + throw new Error(result.error || 'Failed to initialize OAuth') + } + } catch (error) { + console.error('OAuth initialization failed:', error) + setConnectors(prev => prev.map(c => + c.id === connector.id ? { ...c, status: "error" } : c + )) + } finally { + setIsConnecting(null) + } + } + + const pollTaskStatus = async (taskId: string, connectorId: string) => { + const maxAttempts = 120 // Poll for up to 10 minutes (120 * 5s intervals) + let attempts = 0 + + const poll = async (): Promise => { + try { + attempts++ + + const response = await fetch(`/api/tasks/${taskId}`) + + if (!response.ok) { + throw new Error(`Failed to check task status: ${response.status}`) + } + + const task = await response.json() + + if (task.status === 'completed') { + // Task completed successfully + setSyncResults(prev => ({ + ...prev, + [connectorId]: { + processed: task.total_files || 0, + added: task.successful_files || 0, + skipped: (task.total_files || 0) - (task.successful_files || 0), + errors: task.failed_files || 0 + } + })) + setSyncProgress(prev => ({ ...prev, [connectorId]: null })) + setIsSyncing(null) + + } else if (task.status === 'failed' || task.status === 'error') { + // Task failed + setSyncResults(prev => ({ + ...prev, + [connectorId]: { + error: task.error || 'Sync failed' + } + })) + setSyncProgress(prev => ({ ...prev, [connectorId]: null })) + setIsSyncing(null) + + } else if (task.status === 'pending' || task.status === 'running') { + // Still in progress, update progress and continue polling + const processed = task.processed_files || 0 + const total = task.total_files || 0 + const successful = task.successful_files || 0 + const failed = task.failed_files || 0 + + setSyncProgress(prev => ({ + ...prev, + [connectorId]: { + status: task.status, + processed, + total, + successful, + failed + } + })) + + // Continue polling if we haven't exceeded max attempts + if (attempts < maxAttempts) { + setTimeout(poll, 5000) // Poll every 5 seconds + } else { + setSyncResults(prev => ({ + ...prev, + [connectorId]: { + error: `Sync timeout after ${attempts} attempts. The task may still be running in the background.` + } + })) + setSyncProgress(prev => ({ ...prev, [connectorId]: null })) + setIsSyncing(null) + } + + } else { + // Unknown status + setSyncResults(prev => ({ + ...prev, + [connectorId]: { + error: `Unknown task status: ${task.status}` + } + })) + setSyncProgress(prev => ({ ...prev, [connectorId]: null })) + setIsSyncing(null) + } + + } catch (error) { + console.error('Task polling error:', error) + setSyncResults(prev => ({ + ...prev, + [connectorId]: { + error: error instanceof Error ? error.message : 'Failed to check sync status' + } + })) + setSyncProgress(prev => ({ ...prev, [connectorId]: null })) + setIsSyncing(null) + } + } + + // Start polling + await poll() + } + + const handleSync = async (connector: Connector) => { + setIsSyncing(connector.id) + setSyncResults(prev => ({ ...prev, [connector.id]: null })) + setSyncProgress(prev => ({ ...prev, [connector.id]: null })) + + if (!connector.connectionId) { + console.error('No connection ID available for syncing') + setSyncResults(prev => ({ + ...prev, + [connector.id]: { + error: 'No active connection found. Please reconnect and try again.' + } + })) + setIsSyncing(null) + return + } + + try { + const response = await fetch('/api/connectors/sync', { + method: 'POST', + headers: { + 'Content-Type': 'application/json', + }, + body: JSON.stringify({ + connection_id: connector.connectionId, + max_files: maxFiles + }), + }) + + const result = await response.json() + + if (response.status === 201 && result.task_id) { + // Async sync started, begin polling for status + setSyncProgress(prev => ({ + ...prev, + [connector.id]: { + status: 'pending', + processed: 0, + total: 0, + successful: 0, + failed: 0 + } + })) + + // Start polling for task status + await pollTaskStatus(result.task_id, connector.id) + + } else if (response.ok) { + // Legacy synchronous response (fallback) + setSyncResults(prev => ({ ...prev, [connector.id]: result })) + setIsSyncing(null) + } else { + throw new Error(result.error || 'Failed to sync') + } + } catch (error) { + console.error('Sync failed:', error) + setSyncResults(prev => ({ + ...prev, + [connector.id]: { + error: error instanceof Error ? error.message : 'Sync failed' + } + })) + setIsSyncing(null) + } + } + + const handleDisconnect = async (connector: Connector) => { + // This would call a disconnect endpoint when implemented + setConnectors(prev => prev.map(c => + c.id === connector.id ? { ...c, status: "not_connected", connectionId: undefined } : c + )) + setSyncResults(prev => ({ ...prev, [connector.id]: null })) + } + + const getStatusIcon = (status: Connector['status']) => { + switch (status) { + case "connected": + return + case "connecting": + return + case "error": + return + default: + return + } + } + + const getStatusBadge = (status: Connector['status']) => { + switch (status) { + case "connected": + return Connected + case "connecting": + return Connecting... + case "error": + return Error + default: + return Not Connected + } + } + + // Check connector status on mount and when returning from OAuth + useEffect(() => { + if (isAuthenticated) { + checkConnectorStatuses() + } + + // If we just returned from OAuth, clear the URL parameter + if (searchParams.get('oauth_success') === 'true') { + // Clear the URL parameter without causing a page reload + const url = new URL(window.location.href) + url.searchParams.delete('oauth_success') + window.history.replaceState({}, '', url.toString()) + } + }, [searchParams, isAuthenticated]) + + return ( +
+
+

Connectors

+

+ Connect external services to automatically sync and index your documents +

+
+ + {/* Sync Settings */} + + + + + Sync Settings + + + Configure how many files to sync when manually triggering a sync + + + +
+
+ + setMaxFiles(parseInt(e.target.value) || 10)} + className="w-24" + min="1" + max="100" + /> + + (Leave blank or set to 0 for unlimited) + +
+
+
+
+ + {/* Connectors Grid */} +
+ {connectors.map((connector) => ( + + +
+
+ {connector.icon} +
+ {connector.name} +
+ {getStatusIcon(connector.status)} + {getStatusBadge(connector.status)} +
+
+
+
+ + {connector.description} + +
+ +
+ {connector.status === "not_connected" && ( + + )} + + {connector.status === "connected" && ( + <> + + + + )} + + {connector.status === "error" && ( + + )} +
+ + {/* Sync Results and Progress */} + {(syncResults[connector.id] || syncProgress[connector.id]) && ( +
+ {syncProgress[connector.id] && ( +
+
+ + Sync in Progress +
+
+
Status: {syncProgress[connector.id].status}
+ {syncProgress[connector.id].total > 0 && ( + <> +
Progress: {syncProgress[connector.id].processed}/{syncProgress[connector.id].total} files
+
Successful: {syncProgress[connector.id].successful}
+ {syncProgress[connector.id].failed > 0 && ( +
+ Failed: {syncProgress[connector.id].failed} +
+ )} + + )} +
+
+ )} + + {syncResults[connector.id] && !syncProgress[connector.id] && ( + <> + {syncResults[connector.id].error ? ( +
+
Sync Failed
+
{syncResults[connector.id].error}
+
+ ) : ( +
+
+ + Sync Completed +
+
+
Processed: {syncResults[connector.id].processed || 0} files
+
Added: {syncResults[connector.id].added || 0} documents
+
Skipped: {syncResults[connector.id].skipped || 0} files
+ {syncResults[connector.id].errors > 0 && ( +
+ Errors: {syncResults[connector.id].errors} +
+ )} +
+
+ )} + + )} +
+ )} +
+
+ ))} +
+ + {/* Coming Soon Section */} + + + Coming Soon + + Additional connectors are in development + + + +
+
+
D
+
+
Dropbox
+
File storage
+
+
+
+
O
+
+
OneDrive
+
Microsoft cloud storage
+
+
+
+
B
+
+
Box
+
Enterprise file sharing
+
+
+
+
+
+
+ ) +} + +export default function ProtectedConnectorsPage() { + return ( + + + + ) +} \ No newline at end of file diff --git a/frontend/src/app/layout.tsx b/frontend/src/app/layout.tsx index 22a031f4..03412d7b 100644 --- a/frontend/src/app/layout.tsx +++ b/frontend/src/app/layout.tsx @@ -4,6 +4,9 @@ import "./globals.css"; import { ThemeProvider } from "@/components/theme-provider"; import { Navigation } from "@/components/navigation"; import { ModeToggle } from "@/components/mode-toggle"; +import { AuthProvider } from "@/contexts/auth-context"; +import { UserNav } from "@/components/user-nav"; +import { LayoutWrapper } from "@/components/layout-wrapper"; const geistSans = Geist({ variable: "--font-geist-sans", @@ -36,34 +39,11 @@ export default function RootLayout({ enableSystem disableTransitionOnChange > -
-
-
-
-

- GenDB -

-
-
- -
-
-
-
- -
-
-
-
-
- {children} -
-
-
-
-
+ + + {children} + + diff --git a/frontend/src/app/login/page.tsx b/frontend/src/app/login/page.tsx new file mode 100644 index 00000000..fca2c5b0 --- /dev/null +++ b/frontend/src/app/login/page.tsx @@ -0,0 +1,65 @@ +"use client" + +import { useEffect } from "react" +import { useRouter, useSearchParams } from "next/navigation" +import { Card, CardContent, CardDescription, CardHeader, CardTitle } from "@/components/ui/card" +import { Button } from "@/components/ui/button" +import { useAuth } from "@/contexts/auth-context" +import { Lock, LogIn, Loader2 } from "lucide-react" + +export default function LoginPage() { + const { isLoading, isAuthenticated, login } = useAuth() + const router = useRouter() + const searchParams = useSearchParams() + const redirect = searchParams.get('redirect') || '/' + + // Redirect if already authenticated + useEffect(() => { + if (!isLoading && isAuthenticated) { + router.push(redirect) + } + }, [isLoading, isAuthenticated, router, redirect]) + + if (isLoading) { + return ( +
+
+ +

Loading...

+
+
+ ) + } + + if (isAuthenticated) { + return null // Will redirect in useEffect + } + + return ( +
+ + +
+ +
+
+ Welcome to GenDB + + Sign in to access your documents and AI chat + +
+
+ + + +

+ By signing in, you agree to our terms of service and privacy policy. +

+
+
+
+ ) +} \ No newline at end of file diff --git a/frontend/src/app/page.tsx b/frontend/src/app/page.tsx index 2887530e..9f1c41d5 100644 --- a/frontend/src/app/page.tsx +++ b/frontend/src/app/page.tsx @@ -6,6 +6,7 @@ import { Card, CardContent, CardDescription, CardHeader, CardTitle } from "@/com import { Input } from "@/components/ui/input" import { Label } from "@/components/ui/label" import { Search, Loader2, FileText, Zap } from "lucide-react" +import { ProtectedRoute } from "@/components/protected-route" interface SearchResult { filename: string @@ -15,7 +16,7 @@ interface SearchResult { score: number } -export default function SearchPage() { +function SearchPage() { const [query, setQuery] = useState("") const [loading, setLoading] = useState(false) const [results, setResults] = useState([]) @@ -204,3 +205,11 @@ export default function SearchPage() { ) } + +export default function ProtectedSearchPage() { + return ( + + + + ) +} diff --git a/frontend/src/components/layout-wrapper.tsx b/frontend/src/components/layout-wrapper.tsx new file mode 100644 index 00000000..b0c89e9a --- /dev/null +++ b/frontend/src/components/layout-wrapper.tsx @@ -0,0 +1,56 @@ +"use client" + +import { usePathname } from "next/navigation" +import { Navigation } from "@/components/navigation" +import { ModeToggle } from "@/components/mode-toggle" +import { UserNav } from "@/components/user-nav" + +export function LayoutWrapper({ children }: { children: React.ReactNode }) { + const pathname = usePathname() + + // List of paths that should not show navigation + const authPaths = ['/login', '/auth/callback'] + const isAuthPage = authPaths.includes(pathname) + + if (isAuthPage) { + // For auth pages, render without navigation + return ( +
+ {children} +
+ ) + } + + // For all other pages, render with full navigation + return ( +
+
+
+
+

+ GenDB +

+
+
+ +
+
+
+
+ +
+
+
+
+
+ {children} +
+
+
+
+
+ ) +} \ No newline at end of file diff --git a/frontend/src/components/login-required.tsx b/frontend/src/components/login-required.tsx new file mode 100644 index 00000000..c5fa5f44 --- /dev/null +++ b/frontend/src/components/login-required.tsx @@ -0,0 +1,42 @@ +"use client" + +import { Card, CardContent, CardDescription, CardHeader, CardTitle } from "@/components/ui/card" +import { Button } from "@/components/ui/button" +import { useAuth } from "@/contexts/auth-context" +import { Lock, LogIn } from "lucide-react" + +interface LoginRequiredProps { + title?: string + description?: string + feature?: string +} + +export function LoginRequired({ + title = "Authentication Required", + description = "You need to sign in to access this feature", + feature +}: LoginRequiredProps) { + const { login } = useAuth() + + return ( +
+ + +
+ +
+ {title} + + {feature ? `You need to sign in to access ${feature}` : description} + +
+ + + +
+
+ ) +} \ No newline at end of file diff --git a/frontend/src/components/protected-route.tsx b/frontend/src/components/protected-route.tsx new file mode 100644 index 00000000..04719462 --- /dev/null +++ b/frontend/src/components/protected-route.tsx @@ -0,0 +1,44 @@ +"use client" + +import { useEffect } from "react" +import { useRouter, usePathname } from "next/navigation" +import { useAuth } from "@/contexts/auth-context" +import { Loader2 } from "lucide-react" + +interface ProtectedRouteProps { + children: React.ReactNode +} + +export function ProtectedRoute({ children }: ProtectedRouteProps) { + const { isLoading, isAuthenticated } = useAuth() + const router = useRouter() + const pathname = usePathname() + + useEffect(() => { + if (!isLoading && !isAuthenticated) { + // Redirect to login with current path as redirect parameter + const redirectUrl = `/login?redirect=${encodeURIComponent(pathname)}` + router.push(redirectUrl) + } + }, [isLoading, isAuthenticated, router, pathname]) + + // Show loading state while checking authentication + if (isLoading) { + return ( +
+
+ +

Loading...

+
+
+ ) + } + + // Don't render anything if not authenticated (will redirect) + if (!isAuthenticated) { + return null + } + + // Render protected content + return <>{children} +} \ No newline at end of file diff --git a/frontend/src/components/ui/avatar.tsx b/frontend/src/components/ui/avatar.tsx new file mode 100644 index 00000000..ee02f9e7 --- /dev/null +++ b/frontend/src/components/ui/avatar.tsx @@ -0,0 +1,50 @@ +"use client" + +import * as React from "react" +import * as AvatarPrimitive from "@radix-ui/react-avatar" + +import { cn } from "@/lib/utils" + +const Avatar = React.forwardRef< + React.ElementRef, + React.ComponentPropsWithoutRef +>(({ className, ...props }, ref) => ( + +)) +Avatar.displayName = AvatarPrimitive.Root.displayName + +const AvatarImage = React.forwardRef< + React.ElementRef, + React.ComponentPropsWithoutRef +>(({ className, ...props }, ref) => ( + +)) +AvatarImage.displayName = AvatarPrimitive.Image.displayName + +const AvatarFallback = React.forwardRef< + React.ElementRef, + React.ComponentPropsWithoutRef +>(({ className, ...props }, ref) => ( + +)) +AvatarFallback.displayName = AvatarPrimitive.Fallback.displayName + +export { Avatar, AvatarImage, AvatarFallback } \ No newline at end of file diff --git a/frontend/src/components/ui/dropdown-menu.tsx b/frontend/src/components/ui/dropdown-menu.tsx new file mode 100644 index 00000000..a0a8cdba --- /dev/null +++ b/frontend/src/components/ui/dropdown-menu.tsx @@ -0,0 +1,200 @@ +"use client" + +import * as React from "react" +import * as DropdownMenuPrimitive from "@radix-ui/react-dropdown-menu" +import { Check, ChevronRight, Circle } from "lucide-react" + +import { cn } from "@/lib/utils" + +const DropdownMenu = DropdownMenuPrimitive.Root + +const DropdownMenuTrigger = DropdownMenuPrimitive.Trigger + +const DropdownMenuGroup = DropdownMenuPrimitive.Group + +const DropdownMenuPortal = DropdownMenuPrimitive.Portal + +const DropdownMenuSub = DropdownMenuPrimitive.Sub + +const DropdownMenuRadioGroup = DropdownMenuPrimitive.RadioGroup + +const DropdownMenuSubTrigger = React.forwardRef< + React.ElementRef, + React.ComponentPropsWithoutRef & { + inset?: boolean + } +>(({ className, inset, children, ...props }, ref) => ( + + {children} + + +)) +DropdownMenuSubTrigger.displayName = + DropdownMenuPrimitive.SubTrigger.displayName + +const DropdownMenuSubContent = React.forwardRef< + React.ElementRef, + React.ComponentPropsWithoutRef +>(({ className, ...props }, ref) => ( + +)) +DropdownMenuSubContent.displayName = + DropdownMenuPrimitive.SubContent.displayName + +const DropdownMenuContent = React.forwardRef< + React.ElementRef, + React.ComponentPropsWithoutRef +>(({ className, sideOffset = 4, ...props }, ref) => ( + + + +)) +DropdownMenuContent.displayName = DropdownMenuPrimitive.Content.displayName + +const DropdownMenuItem = React.forwardRef< + React.ElementRef, + React.ComponentPropsWithoutRef & { + inset?: boolean + } +>(({ className, inset, ...props }, ref) => ( + +)) +DropdownMenuItem.displayName = DropdownMenuPrimitive.Item.displayName + +const DropdownMenuCheckboxItem = React.forwardRef< + React.ElementRef, + React.ComponentPropsWithoutRef +>(({ className, children, checked, ...props }, ref) => ( + + + + + + + {children} + +)) +DropdownMenuCheckboxItem.displayName = + DropdownMenuPrimitive.CheckboxItem.displayName + +const DropdownMenuRadioItem = React.forwardRef< + React.ElementRef, + React.ComponentPropsWithoutRef +>(({ className, children, ...props }, ref) => ( + + + + + + + {children} + +)) +DropdownMenuRadioItem.displayName = DropdownMenuPrimitive.RadioItem.displayName + +const DropdownMenuLabel = React.forwardRef< + React.ElementRef, + React.ComponentPropsWithoutRef & { + inset?: boolean + } +>(({ className, inset, ...props }, ref) => ( + +)) +DropdownMenuLabel.displayName = DropdownMenuPrimitive.Label.displayName + +const DropdownMenuSeparator = React.forwardRef< + React.ElementRef, + React.ComponentPropsWithoutRef +>(({ className, ...props }, ref) => ( + +)) +DropdownMenuSeparator.displayName = DropdownMenuPrimitive.Separator.displayName + +const DropdownMenuShortcut = ({ + className, + ...props +}: React.HTMLAttributes) => { + return ( + + ) +} +DropdownMenuShortcut.displayName = "DropdownMenuShortcut" + +export { + DropdownMenu, + DropdownMenuTrigger, + DropdownMenuContent, + DropdownMenuItem, + DropdownMenuCheckboxItem, + DropdownMenuRadioItem, + DropdownMenuLabel, + DropdownMenuSeparator, + DropdownMenuShortcut, + DropdownMenuGroup, + DropdownMenuPortal, + DropdownMenuSub, + DropdownMenuSubContent, + DropdownMenuSubTrigger, + DropdownMenuRadioGroup, +} \ No newline at end of file diff --git a/frontend/src/components/user-nav.tsx b/frontend/src/components/user-nav.tsx new file mode 100644 index 00000000..07c899be --- /dev/null +++ b/frontend/src/components/user-nav.tsx @@ -0,0 +1,68 @@ +"use client" + +import { Avatar, AvatarFallback, AvatarImage } from "@/components/ui/avatar" +import { Button } from "@/components/ui/button" +import { + DropdownMenu, + DropdownMenuContent, + DropdownMenuItem, + DropdownMenuLabel, + DropdownMenuSeparator, + DropdownMenuTrigger, +} from "@/components/ui/dropdown-menu" +import { useAuth } from "@/contexts/auth-context" +import { LogIn, LogOut, User } from "lucide-react" + +export function UserNav() { + const { user, isLoading, isAuthenticated, login, logout } = useAuth() + + if (isLoading) { + return ( +
+ ) + } + + if (!isAuthenticated) { + return ( + + ) + } + + return ( + + + + + + +
+

{user?.name}

+

+ {user?.email} +

+
+
+ + + + Log out + +
+
+ ) +} \ No newline at end of file diff --git a/frontend/src/contexts/auth-context.tsx b/frontend/src/contexts/auth-context.tsx new file mode 100644 index 00000000..aafb80b2 --- /dev/null +++ b/frontend/src/contexts/auth-context.tsx @@ -0,0 +1,146 @@ +"use client" + +import React, { createContext, useContext, useState, useEffect, ReactNode } from 'react' + +interface User { + user_id: string + email: string + name: string + picture?: string + provider: string + last_login?: string +} + +interface AuthContextType { + user: User | null + isLoading: boolean + isAuthenticated: boolean + login: () => void + logout: () => Promise + refreshAuth: () => Promise +} + +const AuthContext = createContext(undefined) + +export function useAuth() { + const context = useContext(AuthContext) + if (context === undefined) { + throw new Error('useAuth must be used within an AuthProvider') + } + return context +} + +interface AuthProviderProps { + children: ReactNode +} + +export function AuthProvider({ children }: AuthProviderProps) { + const [user, setUser] = useState(null) + const [isLoading, setIsLoading] = useState(true) + + const checkAuth = async () => { + try { + const response = await fetch('/api/auth/me') + const data = await response.json() + + if (data.authenticated && data.user) { + setUser(data.user) + } else { + setUser(null) + } + } catch (error) { + console.error('Auth check failed:', error) + setUser(null) + } finally { + setIsLoading(false) + } + } + + const login = () => { + // Use the correct auth callback URL, not connectors callback + const redirectUri = `${window.location.origin}/auth/callback` + + console.log('Starting login with redirect URI:', redirectUri) + + fetch('/api/auth/init', { + method: 'POST', + headers: { + 'Content-Type': 'application/json', + }, + body: JSON.stringify({ + provider: 'google', + purpose: 'app_auth', + name: 'App Authentication', + redirect_uri: redirectUri + }), + }) + .then(response => response.json()) + .then(result => { + console.log('Auth init response:', result) + + if (result.oauth_config) { + // Store that this is for app authentication + localStorage.setItem('auth_purpose', 'app_auth') + localStorage.setItem('connecting_connector_id', result.connection_id) + localStorage.setItem('connecting_connector_type', 'app_auth') + + console.log('Stored localStorage items:', { + auth_purpose: localStorage.getItem('auth_purpose'), + connecting_connector_id: localStorage.getItem('connecting_connector_id'), + connecting_connector_type: localStorage.getItem('connecting_connector_type') + }) + + const authUrl = `${result.oauth_config.authorization_endpoint}?` + + `client_id=${result.oauth_config.client_id}&` + + `response_type=code&` + + `scope=${result.oauth_config.scopes.join(' ')}&` + + `redirect_uri=${encodeURIComponent(result.oauth_config.redirect_uri)}&` + + `access_type=offline&` + + `prompt=consent&` + + `state=${result.connection_id}` + + console.log('Redirecting to OAuth URL:', authUrl) + window.location.href = authUrl + } else { + console.error('No oauth_config in response:', result) + } + }) + .catch(error => { + console.error('Login failed:', error) + }) + } + + const logout = async () => { + try { + await fetch('/api/auth/logout', { + method: 'POST', + }) + setUser(null) + } catch (error) { + console.error('Logout failed:', error) + } + } + + const refreshAuth = async () => { + await checkAuth() + } + + useEffect(() => { + checkAuth() + }, []) + + const value: AuthContextType = { + user, + isLoading, + isAuthenticated: !!user, + login, + logout, + refreshAuth, + } + + return ( + + {children} + + ) +} \ No newline at end of file diff --git a/frontend/tsconfig.json b/frontend/tsconfig.json index d8b93235..47b1b61e 100644 --- a/frontend/tsconfig.json +++ b/frontend/tsconfig.json @@ -19,7 +19,7 @@ } ], "paths": { - "@/*": ["./*"] + "@/*": ["./src/*", "./*"] } }, "include": ["next-env.d.ts", "**/*.ts", "**/*.tsx", ".next/types/**/*.ts"], diff --git a/pyproject.toml b/pyproject.toml index ceca68e3..dfcf25f4 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -8,7 +8,12 @@ dependencies = [ "agentd>=0.2.1", "aiofiles>=24.1.0", "docling>=2.41.0", + "google-api-python-client>=2.143.0", + "google-auth-httplib2>=0.2.0", + "google-auth-oauthlib>=1.2.0", + "httpx>=0.27.0", "opensearch-py[async]>=3.0.0", + "pyjwt>=2.8.0", "python-multipart>=0.0.20", "starlette>=0.47.1", "torch>=2.7.1", diff --git a/src/agent.py b/src/agent.py index 93eeca1c..5b3a896d 100644 --- a/src/agent.py +++ b/src/agent.py @@ -1,10 +1,14 @@ -messages = [{"role": "system", "content": "You are a helpful assistant. Always use the search_tools to answer questions."}] +# User-scoped conversation state - keyed by user_id +user_conversations = {} # user_id -> {"messages": [...], "previous_response_id": None} -# Simple session store for conversation state -conversation_state = { - "messages": messages, - "previous_response_id": None -} +def get_user_conversation(user_id: str): + """Get or create conversation state for a user""" + if user_id not in user_conversations: + user_conversations[user_id] = { + "messages": [{"role": "system", "content": "You are a helpful assistant. Always use the search_tools to answer questions."}], + "previous_response_id": None + } + return user_conversations[user_id] # Generic async response function for streaming async def async_response_stream(client, prompt: str, model: str, previous_response_id: str = None, log_prefix: str = "response"): @@ -90,10 +94,6 @@ async def async_response(client, prompt: str, model: str, previous_response_id: # Extract and store response_id if available response_id = getattr(response, 'id', None) or getattr(response, 'response_id', None) - if response_id: - global conversation_state - conversation_state["previous_response_id"] = response_id - print(f"Stored response_id: {response_id}") return response_text, response_id @@ -122,8 +122,8 @@ async def async_langflow_stream(langflow_client, flow_id: str, prompt: str, prev raise # Async chat function (non-streaming only) -async def async_chat(async_client, prompt: str, model: str = "gpt-4.1-mini", previous_response_id: str = None): - global conversation_state +async def async_chat(async_client, prompt: str, user_id: str, model: str = "gpt-4.1-mini", previous_response_id: str = None): + conversation_state = get_user_conversation(user_id) # If no previous_response_id is provided, reset conversation state if previous_response_id is None: @@ -138,11 +138,16 @@ async def async_chat(async_client, prompt: str, model: str = "gpt-4.1-mini", pre # Add assistant response to conversation conversation_state["messages"].append({"role": "assistant", "content": response_text}) + # Store response_id for this user's conversation + if response_id: + conversation_state["previous_response_id"] = response_id + print(f"Stored response_id for user {user_id}: {response_id}") + return response_text, response_id # Async chat function for streaming (alias for compatibility) -async def async_chat_stream(async_client, prompt: str, model: str = "gpt-4.1-mini", previous_response_id: str = None): - global conversation_state +async def async_chat_stream(async_client, prompt: str, user_id: str, model: str = "gpt-4.1-mini", previous_response_id: str = None): + conversation_state = get_user_conversation(user_id) # If no previous_response_id is provided, reset conversation state if previous_response_id is None: diff --git a/src/app.py b/src/app.py index 285de4e4..1056fe0d 100644 --- a/src/app.py +++ b/src/app.py @@ -1,5 +1,5 @@ # app.py - +import datetime import os from collections import defaultdict from typing import Any @@ -13,6 +13,12 @@ import multiprocessing from agent import async_chat, async_langflow +# Import connector components +from connectors.service import ConnectorService +from connectors.google_drive import GoogleDriveConnector +from session_manager import SessionManager +from auth_middleware import require_auth, optional_auth + import hashlib import tempfile import asyncio @@ -91,7 +97,22 @@ index_body = { "m": 16 } } - } + }, + # Connector and source information + "source_url": { "type": "keyword" }, + "connector_type": { "type": "keyword" }, + # ACL fields + "owner": { "type": "keyword" }, + "allowed_users": { "type": "keyword" }, + "allowed_groups": { "type": "keyword" }, + "user_permissions": { "type": "object" }, + "group_permissions": { "type": "object" }, + # Timestamps + "created_time": { "type": "date" }, + "modified_time": { "type": "date" }, + "indexed_time": { "type": "date" }, + # Additional metadata + "metadata": { "type": "object" } } } } @@ -102,6 +123,22 @@ langflow_client = AsyncOpenAI( ) patched_async_client = patch_openai_with_mcp(AsyncOpenAI()) # Get the patched client back +# Initialize connector service +connector_service = ConnectorService( + opensearch_client=opensearch, + patched_async_client=patched_async_client, + process_pool=None, # Will be set after process_pool is initialized + embed_model=EMBED_MODEL, + index_name=INDEX_NAME +) + +# Initialize session manager +session_secret = os.getenv("SESSION_SECRET", "your-secret-key-change-in-production") +session_manager = SessionManager(session_secret) + +# Track used authorization codes to prevent duplicate usage +used_auth_codes = set() + class TaskStatus(Enum): PENDING = "pending" RUNNING = "running" @@ -130,7 +167,7 @@ class UploadTask: created_at: float = field(default_factory=time.time) updated_at: float = field(default_factory=time.time) -task_store = {} +task_store = {} # user_id -> {task_id -> UploadTask} background_tasks = set() # GPU device detection @@ -171,6 +208,7 @@ else: MAX_WORKERS = int(os.getenv("MAX_WORKERS", DEFAULT_WORKERS)) process_pool = ProcessPoolExecutor(max_workers=MAX_WORKERS) +connector_service.process_pool = process_pool # Set the process pool for connector service print(f"Process pool initialized with {MAX_WORKERS} workers") @@ -305,33 +343,60 @@ async def init_index(): else: print(f"Index '{INDEX_NAME}' already exists, skipping creation.") +from collections import defaultdict + + def extract_relevant(doc_dict: dict) -> dict: """ Given the full export_to_dict() result: - Grabs origin metadata (hash, filename, mimetype) - Finds every text fragment in `texts`, groups them by page_no - - Concatenates each page’s fragments into one string chunk - Returns a slimmed dict ready for indexing. + - Flattens tables in `tables` into tab-separated text, grouping by row + - Concatenates each page’s fragments and each table into its own chunk + Returns a slimmed dict ready for indexing, with each chunk under "text". """ origin = doc_dict.get("origin", {}) - texts = doc_dict.get("texts", []) + chunks = [] - # Group all text fragments by page number + # 1) process free-text fragments page_texts = defaultdict(list) - for txt in texts: - # Each txt['prov'][0]['page_no'] tells you which page it came from + for txt in doc_dict.get("texts", []): prov = txt.get("prov", []) page_no = prov[0].get("page_no") if prov else None if page_no is not None: page_texts[page_no].append(txt.get("text", "").strip()) - # Build an ordered list of {page, text} - chunks = [] for page in sorted(page_texts): - joined = "\n".join(page_texts[page]) chunks.append({ "page": page, - "text": joined + "type": "text", + "text": "\n".join(page_texts[page]) + }) + + # 2) process tables + for t_idx, table in enumerate(doc_dict.get("tables", [])): + prov = table.get("prov", []) + page_no = prov[0].get("page_no") if prov else None + + # group cells by their row index + rows = defaultdict(list) + for cell in table.get("data").get("table_cells", []): + r = cell.get("start_row_offset_idx") + c = cell.get("start_col_offset_idx") + text = cell.get("text", "").strip() + rows[r].append((c, text)) + + # build a tab‑separated line for each row, in order + flat_rows = [] + for r in sorted(rows): + cells = [txt for _, txt in sorted(rows[r], key=lambda x: x[0])] + flat_rows.append("\t".join(cells)) + + chunks.append({ + "page": page_no, + "type": "table", + "table_index": t_idx, + "text": "\n".join(flat_rows) }) return { @@ -361,7 +426,7 @@ async def process_file_with_retry(file_path: str, max_retries: int = 3) -> dict: else: raise last_error -async def process_file_common(file_path: str, file_hash: str = None): +async def process_file_common(file_path: str, file_hash: str = None, owner_user_id: str = None): """ Common processing logic for both upload and upload_path. 1. Optionally compute SHA256 hash if not provided. @@ -402,7 +467,9 @@ async def process_file_common(file_path: str, file_hash: str = None): "mimetype": slim_doc["mimetype"], "page": chunk["page"], "text": chunk["text"], - "chunk_embedding": vect + "chunk_embedding": vect, + "owner": owner_user_id, # User who uploaded/owns this document + "indexed_time": datetime.datetime.now().isoformat() } chunk_id = f"{file_hash}_{i}" await opensearch.index(index=INDEX_NAME, id=chunk_id, body=chunk_doc) @@ -475,10 +542,10 @@ async def process_single_file_task(upload_task: UploadTask, file_path: str) -> N if upload_task.processed_files >= upload_task.total_files: upload_task.status = TaskStatus.COMPLETED -async def background_upload_processor(task_id: str) -> None: +async def background_upload_processor(user_id: str, task_id: str) -> None: """Background task to process all files in an upload job with concurrency control""" try: - upload_task = task_store[task_id] + upload_task = task_store[user_id][task_id] upload_task.status = TaskStatus.RUNNING upload_task.updated_at = time.time() @@ -500,10 +567,11 @@ async def background_upload_processor(task_id: str) -> None: print(f"[ERROR] Background upload processor failed for task {task_id}: {e}") import traceback traceback.print_exc() - if task_id in task_store: - task_store[task_id].status = TaskStatus.FAILED - task_store[task_id].updated_at = time.time() + if user_id in task_store and task_id in task_store[user_id]: + task_store[user_id][task_id].status = TaskStatus.FAILED + task_store[user_id][task_id].updated_at = time.time() +@require_auth(session_manager) async def upload(request: Request): form = await request.form() upload_file = form["file"] @@ -524,13 +592,15 @@ async def upload(request: Request): if exists: return JSONResponse({"status": "unchanged", "id": file_hash}) - result = await process_file_common(tmp.name, file_hash) + user = request.state.user + result = await process_file_common(tmp.name, file_hash, owner_user_id=user.user_id) return JSONResponse(result) finally: tmp.close() os.remove(tmp.name) +@require_auth(session_manager) async def upload_path(request: Request): payload = await request.json() base_dir = payload.get("path") @@ -551,9 +621,12 @@ async def upload_path(request: Request): file_tasks={path: FileTask(file_path=path) for path in file_paths} ) - task_store[task_id] = upload_task + user = request.state.user + if user.user_id not in task_store: + task_store[user.user_id] = {} + task_store[user.user_id][task_id] = upload_task - background_task = asyncio.create_task(background_upload_processor(task_id)) + background_task = asyncio.create_task(background_upload_processor(user.user_id, task_id)) background_tasks.add(background_task) background_task.add_done_callback(background_tasks.discard) @@ -563,6 +636,7 @@ async def upload_path(request: Request): "status": "accepted" }, status_code=201) +@require_auth(session_manager) async def upload_context(request: Request): """Upload a file and add its content as context to the current conversation""" import io @@ -619,14 +693,19 @@ async def upload_context(request: Request): return JSONResponse(response_data) +@require_auth(session_manager) async def task_status(request: Request): """Get the status of an upload task""" task_id = request.path_params.get("task_id") - if not task_id or task_id not in task_store: + user = request.state.user + + if (not task_id or + user.user_id not in task_store or + task_id not in task_store[user.user_id]): return JSONResponse({"error": "Task not found"}, status_code=404) - upload_task = task_store[task_id] + upload_task = task_store[user.user_id][task_id] file_statuses = {} for file_path, file_task in upload_task.file_tasks.items(): @@ -651,63 +730,98 @@ async def task_status(request: Request): "files": file_statuses }) +@require_auth(session_manager) async def search(request: Request): - payload = await request.json() query = payload.get("query") if not query: return JSONResponse({"error": "Query is required"}, status_code=400) - return JSONResponse(await search_tool(query)) + + user = request.state.user + return JSONResponse(await search_tool(query, user_id=user.user_id)) @tool -async def search_tool(query: str)-> dict[str, Any]: +async def search_tool(query: str, user_id: str = None)-> dict[str, Any]: """ Use this tool to search for documents relevant to the query. - This endpoint accepts POST requests with a query string, - Args: - query (str): query string to search the corpus + query (str): query string to search the corpus + user_id (str): user ID for access control (optional) Returns: - dict (str, Any) - - {"results": [chunks]} on success + dict (str, Any): {"results": [chunks]} on success """ # Embed the query resp = await patched_async_client.embeddings.create(model=EMBED_MODEL, input=[query]) query_embedding = resp.data[0].embedding - # Search using vector similarity on individual chunks + + # Base query structure search_body = { "query": { - "knn": { - "chunk_embedding": { - "vector": query_embedding, - "k": 10 - } + "bool": { + "must": [ + { + "knn": { + "chunk_embedding": { + "vector": query_embedding, + "k": 10 + } + } + } + ] } }, - "_source": ["filename", "mimetype", "page", "text"], + "_source": ["filename", "mimetype", "page", "text", "source_url", "owner", "allowed_users", "allowed_groups"], "size": 10 } + + # Require authentication - no anonymous access to search + if not user_id: + return {"results": [], "error": "Authentication required"} + + # Authenticated user access control + # User can access documents if: + # 1. They own the document (owner field matches user_id) + # 2. They're in allowed_users list + # 3. Document has no ACL (public documents) + # TODO: Add group access control later + should_clauses = [ + {"term": {"owner": user_id}}, + {"term": {"allowed_users": user_id}}, + {"bool": {"must_not": {"exists": {"field": "owner"}}}} # Public docs + ] + + search_body["query"]["bool"]["should"] = should_clauses + search_body["query"]["bool"]["minimum_should_match"] = 1 + results = await opensearch.search(index=INDEX_NAME, body=search_body) - # Transform results to match expected format + + # Transform results chunks = [] for hit in results["hits"]["hits"]: chunks.append({ "filename": hit["_source"]["filename"], - "mimetype": hit["_source"]["mimetype"], + "mimetype": hit["_source"]["mimetype"], "page": hit["_source"]["page"], "text": hit["_source"]["text"], - "score": hit["_score"] + "score": hit["_score"], + "source_url": hit["_source"].get("source_url"), + "owner": hit["_source"].get("owner") }) return {"results": chunks} +@require_auth(session_manager) async def chat_endpoint(request): data = await request.json() prompt = data.get("prompt", "") previous_response_id = data.get("previous_response_id") stream = data.get("stream", False) + + # Get authenticated user + user = request.state.user + user_id = user.user_id if not prompt: return JSONResponse({"error": "Prompt is required"}, status_code=400) @@ -715,7 +829,7 @@ async def chat_endpoint(request): if stream: from agent import async_chat_stream return StreamingResponse( - async_chat_stream(patched_async_client, prompt, previous_response_id=previous_response_id), + async_chat_stream(patched_async_client, prompt, user_id, previous_response_id=previous_response_id), media_type="text/event-stream", headers={ "Cache-Control": "no-cache", @@ -725,12 +839,13 @@ async def chat_endpoint(request): } ) else: - response_text, response_id = await async_chat(patched_async_client, prompt, previous_response_id=previous_response_id) + response_text, response_id = await async_chat(patched_async_client, prompt, user_id, previous_response_id=previous_response_id) response_data = {"response": response_text} if response_id: response_data["response_id"] = response_id return JSONResponse(response_data) +@require_auth(session_manager) async def langflow_endpoint(request): data = await request.json() prompt = data.get("prompt", "") @@ -766,6 +881,362 @@ async def langflow_endpoint(request): except Exception as e: return JSONResponse({"error": f"Langflow request failed: {str(e)}"}, status_code=500) + +# Authentication endpoints +@optional_auth(session_manager) # Allow both authenticated and non-authenticated users +async def auth_init(request: Request): + """Initialize OAuth flow for authentication or data source connection""" + try: + data = await request.json() + provider = data.get("provider") # "google", "microsoft", etc. + purpose = data.get("purpose", "data_source") # "app_auth" or "data_source" + connection_name = data.get("name", f"{provider}_{purpose}") + redirect_uri = data.get("redirect_uri") # Frontend provides this + + # Get user from authentication if available + user = getattr(request.state, 'user', None) + user_id = user.user_id if user else None + + if provider != "google": + return JSONResponse({"error": "Unsupported provider"}, status_code=400) + + if not redirect_uri: + return JSONResponse({"error": "redirect_uri is required"}, status_code=400) + + # Get OAuth client configuration from environment + google_client_id = os.getenv("GOOGLE_OAUTH_CLIENT_ID") + if not google_client_id: + return JSONResponse({"error": "Google OAuth client ID not configured"}, status_code=500) + + # Create connection configuration + token_file = f"{provider}_{purpose}_{uuid.uuid4().hex[:8]}.json" + config = { + "client_id": google_client_id, + "token_file": token_file, + "provider": provider, + "purpose": purpose, + "redirect_uri": redirect_uri # Store redirect_uri for use in callback + } + + # Create connection in manager + # For data sources, use provider name (e.g. "google_drive") + # For app auth, connector_type doesn't matter since it gets deleted + connector_type = f"{provider}_drive" if purpose == "data_source" else f"{provider}_auth" + connection_id = await connector_service.connection_manager.create_connection( + connector_type=connector_type, + name=connection_name, + config=config, + user_id=user_id + ) + + # Return OAuth configuration for client-side flow + # Include both identity and data access scopes + scopes = [ + # Identity scopes (for app auth) + 'openid', + 'email', + 'profile', + # Data access scopes (for connectors) + 'https://www.googleapis.com/auth/drive.readonly', + 'https://www.googleapis.com/auth/drive.metadata.readonly' + ] + + oauth_config = { + "client_id": google_client_id, + "scopes": scopes, + "redirect_uri": redirect_uri, # Use the redirect_uri from frontend + "authorization_endpoint": + "https://accounts.google.com/o/oauth2/v2/auth", + "token_endpoint": + "https://oauth2.googleapis.com/token" + } + + return JSONResponse({ + "connection_id": connection_id, + "oauth_config": oauth_config + }) + + except Exception as e: + import traceback + traceback.print_exc() + return JSONResponse({"error": f"Failed to initialize OAuth: {str(e)}"}, status_code=500) + + +async def auth_callback(request: Request): + """Handle OAuth callback - exchange authorization code for tokens""" + try: + data = await request.json() + connection_id = data.get("connection_id") + authorization_code = data.get("authorization_code") + state = data.get("state") + + if not all([connection_id, authorization_code]): + return JSONResponse({"error": "Missing required parameters (connection_id, authorization_code)"}, status_code=400) + + # Check if authorization code has already been used + if authorization_code in used_auth_codes: + return JSONResponse({"error": "Authorization code already used"}, status_code=400) + + # Mark code as used to prevent duplicate requests + used_auth_codes.add(authorization_code) + + try: + # Get connection config + connection_config = await connector_service.connection_manager.get_connection(connection_id) + if not connection_config: + return JSONResponse({"error": "Connection not found"}, status_code=404) + + # Exchange authorization code for tokens + import httpx + + # Use the redirect_uri that was stored during auth_init + redirect_uri = connection_config.config.get("redirect_uri") + if not redirect_uri: + return JSONResponse({"error": "Redirect URI not found in connection config"}, status_code=400) + + token_url = "https://oauth2.googleapis.com/token" + + token_payload = { + "code": authorization_code, + "client_id": connection_config.config["client_id"], + "client_secret": os.getenv("GOOGLE_OAUTH_CLIENT_SECRET"), # Need this for server-side + "redirect_uri": redirect_uri, + "grant_type": "authorization_code" + } + + async with httpx.AsyncClient() as client: + token_response = await client.post(token_url, data=token_payload) + + if token_response.status_code != 200: + raise Exception(f"Token exchange failed: {token_response.text}") + + token_data = token_response.json() + + # Store tokens in the token file + token_file_data = { + "token": token_data["access_token"], + "refresh_token": token_data.get("refresh_token"), + "scopes": [ + "openid", + "email", + "profile", + "https://www.googleapis.com/auth/drive.readonly", + "https://www.googleapis.com/auth/drive.metadata.readonly" + ] + } + + # Add expiry if provided + if token_data.get("expires_in"): + from datetime import datetime, timedelta + expiry = datetime.now() + timedelta(seconds=int(token_data["expires_in"])) + token_file_data["expiry"] = expiry.isoformat() + + # Save tokens to file + import json + token_file_path = connection_config.config["token_file"] + async with aiofiles.open(token_file_path, 'w') as f: + await f.write(json.dumps(token_file_data, indent=2)) + + # Route based on purpose + purpose = connection_config.config.get("purpose", "data_source") + + if purpose == "app_auth": + # Handle app authentication - create user session + jwt_token = await session_manager.create_user_session(token_data["access_token"]) + + if jwt_token: + # Get the user info to create a persistent Google Drive connection + user_info = await session_manager.get_user_info_from_token(token_data["access_token"]) + user_id = user_info["id"] if user_info else None + + if user_id: + # Convert the temporary auth connection to a persistent Google Drive connection + # Update the connection to be a data source connection with the user_id + await connector_service.connection_manager.update_connection( + connection_id=connection_id, + connector_type="google_drive", + name=f"Google Drive ({user_info.get('email', 'Unknown')})", + user_id=user_id, + config={ + **connection_config.config, + "purpose": "data_source", # Convert to data source + "user_email": user_info.get("email") + } + ) + + response = JSONResponse({ + "status": "authenticated", + "purpose": "app_auth", + "redirect": "/", # Redirect to home page instead of dashboard + "google_drive_connection_id": connection_id # Return connection ID for frontend + }) + else: + # Fallback: delete connection if we can't get user info + await connector_service.connection_manager.delete_connection(connection_id) + response = JSONResponse({ + "status": "authenticated", + "purpose": "app_auth", + "redirect": "/" + }) + + # Set JWT as HTTP-only cookie for security + response.set_cookie( + key="auth_token", + value=jwt_token, + httponly=True, + secure=False, # False for development/testing + samesite="lax", + max_age=7 * 24 * 60 * 60 # 7 days + ) + return response + else: + # Clean up connection if session creation failed + await connector_service.connection_manager.delete_connection(connection_id) + return JSONResponse({"error": "Failed to create user session"}, status_code=500) + else: + # Handle data source connection - keep the connection for syncing + return JSONResponse({ + "status": "authenticated", + "connection_id": connection_id, + "purpose": "data_source", + "connector_type": connection_config.connector_type + }) + + except Exception as e: + import traceback + traceback.print_exc() + return JSONResponse({"error": f"OAuth callback failed: {str(e)}"}, status_code=500) + except Exception as e: + import traceback + traceback.print_exc() + return JSONResponse({"error": f"Callback failed: {str(e)}"}, status_code=500) + + +@optional_auth(session_manager) +async def auth_me(request: Request): + """Get current user information""" + user = getattr(request.state, 'user', None) + + if user: + return JSONResponse({ + "authenticated": True, + "user": { + "user_id": user.user_id, + "email": user.email, + "name": user.name, + "picture": user.picture, + "provider": user.provider, + "last_login": user.last_login.isoformat() if user.last_login else None + } + }) + else: + return JSONResponse({ + "authenticated": False, + "user": None + }) + +@require_auth(session_manager) +async def auth_logout(request: Request): + """Logout user by clearing auth cookie""" + response = JSONResponse({ + "status": "logged_out", + "message": "Successfully logged out" + }) + + # Clear the auth cookie + response.delete_cookie( + key="auth_token", + httponly=True, + secure=False, # False for development/testing + samesite="lax" + ) + + return response + + +@require_auth(session_manager) +async def connector_sync(request: Request): + """Sync files from a connector connection""" + data = await request.json() + connection_id = data.get("connection_id") + max_files = data.get("max_files") + + if not connection_id: + return JSONResponse({"error": "connection_id is required"}, status_code=400) + + try: + print(f"[DEBUG] Starting connector sync for connection_id={connection_id}, max_files={max_files}") + + # Verify user owns this connection + user = request.state.user + print(f"[DEBUG] User: {user.user_id}") + + connection_config = await connector_service.connection_manager.get_connection(connection_id) + print(f"[DEBUG] Got connection config: {connection_config is not None}") + + if not connection_config: + return JSONResponse({"error": "Connection not found"}, status_code=404) + + if connection_config.user_id != user.user_id: + return JSONResponse({"error": "Access denied"}, status_code=403) + + print(f"[DEBUG] About to call sync_connector_files") + task_id = await connector_service.sync_connector_files(connection_id, user.user_id, max_files) + print(f"[DEBUG] Got task_id: {task_id}") + + return JSONResponse({ + "task_id": task_id, + "status": "sync_started", + "message": f"Started syncing files from connection {connection_id}" + }, + status_code=201 + ) + + except Exception as e: + import sys + import traceback + + error_msg = f"[ERROR] Connector sync failed: {str(e)}" + print(error_msg, file=sys.stderr, flush=True) + traceback.print_exc(file=sys.stderr) + sys.stderr.flush() + + return JSONResponse({"error": f"Sync failed: {str(e)}"}, status_code=500) + + +@require_auth(session_manager) +async def connector_status(request: Request): + """Get connector status for authenticated user""" + connector_type = request.path_params.get("connector_type", "google_drive") + user = request.state.user + + # Get connections for this connector type and user + connections = await connector_service.connection_manager.list_connections( + user_id=user.user_id, + connector_type=connector_type + ) + + # Check if there are any active connections + active_connections = [conn for conn in connections if conn.is_active] + has_authenticated_connection = len(active_connections) > 0 + + return JSONResponse({ + "connector_type": connector_type, + "authenticated": has_authenticated_connection, # For frontend compatibility + "status": "connected" if has_authenticated_connection else "not_connected", + "connections": [ + { + "connection_id": conn.connection_id, + "name": conn.name, + "is_active": conn.is_active, + "created_at": conn.created_at.isoformat(), + "last_sync": conn.last_sync.isoformat() if conn.last_sync else None + } + for conn in connections + ] + }) + + app = Starlette(debug=True, routes=[ Route("/upload", upload, methods=["POST"]), Route("/upload_context", upload_context, methods=["POST"]), @@ -774,6 +1245,13 @@ app = Starlette(debug=True, routes=[ Route("/search", search, methods=["POST"]), Route("/chat", chat_endpoint, methods=["POST"]), Route("/langflow", langflow_endpoint, methods=["POST"]), + # Authentication endpoints + Route("/auth/init", auth_init, methods=["POST"]), + Route("/auth/callback", auth_callback, methods=["POST"]), + Route("/auth/me", auth_me, methods=["GET"]), + Route("/auth/logout", auth_logout, methods=["POST"]), + Route("/connectors/sync", connector_sync, methods=["POST"]), + Route("/connectors/status/{connector_type}", connector_status, methods=["GET"]), ]) if __name__ == "__main__": @@ -782,6 +1260,7 @@ if __name__ == "__main__": async def main(): await init_index() + await connector_service.initialize() # Cleanup process pool on exit def cleanup(): diff --git a/src/auth_middleware.py b/src/auth_middleware.py new file mode 100644 index 00000000..f0e12bfd --- /dev/null +++ b/src/auth_middleware.py @@ -0,0 +1,44 @@ +from starlette.requests import Request +from starlette.responses import JSONResponse +from typing import Optional +from session_manager import User + + +def get_current_user(request: Request, session_manager) -> Optional[User]: + """Extract current user from request cookies""" + auth_token = request.cookies.get("auth_token") + if not auth_token: + return None + + return session_manager.get_user_from_token(auth_token) + + +def require_auth(session_manager): + """Decorator to require authentication for endpoints""" + def decorator(handler): + async def wrapper(request: Request): + user = get_current_user(request, session_manager) + if not user: + return JSONResponse( + {"error": "Authentication required"}, + status_code=401 + ) + + # Add user to request state so handlers can access it + request.state.user = user + return await handler(request) + + return wrapper + return decorator + + +def optional_auth(session_manager): + """Decorator to optionally extract user for endpoints""" + def decorator(handler): + async def wrapper(request: Request): + user = get_current_user(request, session_manager) + request.state.user = user # Can be None + return await handler(request) + + return wrapper + return decorator \ No newline at end of file diff --git a/src/connectors/__init__.py b/src/connectors/__init__.py new file mode 100644 index 00000000..4f25233d --- /dev/null +++ b/src/connectors/__init__.py @@ -0,0 +1,4 @@ +from .base import BaseConnector +from .google_drive import GoogleDriveConnector + +__all__ = ["BaseConnector", "GoogleDriveConnector"] \ No newline at end of file diff --git a/src/connectors/base.py b/src/connectors/base.py new file mode 100644 index 00000000..40415dd9 --- /dev/null +++ b/src/connectors/base.py @@ -0,0 +1,88 @@ +from abc import ABC, abstractmethod +from typing import Dict, List, Any, Optional, AsyncGenerator +from dataclasses import dataclass +from datetime import datetime + + +@dataclass +class DocumentACL: + """Access Control List information for a document""" + owner: str = None + user_permissions: Dict[str, str] = None # user email -> permission level (read, write, owner) + group_permissions: Dict[str, str] = None # group identifier -> permission level + + def __post_init__(self): + if self.user_permissions is None: + self.user_permissions = {} + if self.group_permissions is None: + self.group_permissions = {} + + @property + def allowed_users(self) -> List[str]: + """Get list of users with any access""" + return list(self.user_permissions.keys()) + + @property + def allowed_groups(self) -> List[str]: + """Get list of groups with any access""" + return list(self.group_permissions.keys()) + + +@dataclass +class ConnectorDocument: + """Document from a connector with metadata""" + id: str + filename: str + mimetype: str + content: bytes + source_url: str + acl: DocumentACL + modified_time: datetime + created_time: datetime + metadata: Dict[str, Any] = None + + def __post_init__(self): + if self.metadata is None: + self.metadata = {} + + +class BaseConnector(ABC): + """Base class for all document connectors""" + + def __init__(self, config: Dict[str, Any]): + self.config = config + self._authenticated = False + + @abstractmethod + async def authenticate(self) -> bool: + """Authenticate with the service""" + pass + + @abstractmethod + async def setup_subscription(self) -> str: + """Set up real-time subscription for file changes. Returns subscription ID.""" + pass + + @abstractmethod + async def list_files(self, page_token: Optional[str] = None) -> Dict[str, Any]: + """List all files. Returns files and next_page_token if any.""" + pass + + @abstractmethod + async def get_file_content(self, file_id: str) -> ConnectorDocument: + """Get file content and metadata""" + pass + + @abstractmethod + async def handle_webhook(self, payload: Dict[str, Any]) -> List[str]: + """Handle webhook notification. Returns list of affected file IDs.""" + pass + + @abstractmethod + async def cleanup_subscription(self, subscription_id: str) -> bool: + """Clean up subscription""" + pass + + @property + def is_authenticated(self) -> bool: + return self._authenticated \ No newline at end of file diff --git a/src/connectors/connection_manager.py b/src/connectors/connection_manager.py new file mode 100644 index 00000000..f4b1cee5 --- /dev/null +++ b/src/connectors/connection_manager.py @@ -0,0 +1,209 @@ +import json +import uuid +import asyncio +import aiofiles +from typing import Dict, List, Any, Optional +from datetime import datetime +from dataclasses import dataclass, asdict +from pathlib import Path + +from .base import BaseConnector +from .google_drive import GoogleDriveConnector + + +@dataclass +class ConnectionConfig: + """Configuration for a connector connection""" + connection_id: str + connector_type: str # "google_drive", "box", etc. + name: str # User-friendly name + config: Dict[str, Any] # Connector-specific config + user_id: Optional[str] = None # For multi-tenant support + created_at: datetime = None + last_sync: Optional[datetime] = None + is_active: bool = True + + def __post_init__(self): + if self.created_at is None: + self.created_at = datetime.now() + + +class ConnectionManager: + """Manages multiple connector connections with persistence""" + + def __init__(self, connections_file: str = "connections.json"): + self.connections_file = Path(connections_file) + self.connections: Dict[str, ConnectionConfig] = {} + self.active_connectors: Dict[str, BaseConnector] = {} + + async def load_connections(self): + """Load connections from persistent storage""" + if self.connections_file.exists(): + async with aiofiles.open(self.connections_file, 'r') as f: + data = json.loads(await f.read()) + + for conn_data in data.get('connections', []): + # Convert datetime strings back to datetime objects + if conn_data.get('created_at'): + conn_data['created_at'] = datetime.fromisoformat(conn_data['created_at']) + if conn_data.get('last_sync'): + conn_data['last_sync'] = datetime.fromisoformat(conn_data['last_sync']) + + config = ConnectionConfig(**conn_data) + self.connections[config.connection_id] = config + + async def save_connections(self): + """Save connections to persistent storage""" + data = { + 'connections': [] + } + + for config in self.connections.values(): + conn_data = asdict(config) + # Convert datetime objects to strings + if conn_data.get('created_at'): + conn_data['created_at'] = conn_data['created_at'].isoformat() + if conn_data.get('last_sync'): + conn_data['last_sync'] = conn_data['last_sync'].isoformat() + data['connections'].append(conn_data) + + async with aiofiles.open(self.connections_file, 'w') as f: + await f.write(json.dumps(data, indent=2)) + + async def create_connection(self, connector_type: str, name: str, config: Dict[str, Any], user_id: Optional[str] = None) -> str: + """Create a new connection configuration""" + connection_id = str(uuid.uuid4()) + + connection_config = ConnectionConfig( + connection_id=connection_id, + connector_type=connector_type, + name=name, + config=config, + user_id=user_id + ) + + self.connections[connection_id] = connection_config + await self.save_connections() + + return connection_id + + async def get_connection(self, connection_id: str) -> Optional[ConnectionConfig]: + """Get connection configuration""" + return self.connections.get(connection_id) + + async def update_connection(self, connection_id: str, connector_type: str = None, name: str = None, + config: Dict[str, Any] = None, user_id: str = None) -> bool: + """Update an existing connection configuration""" + if connection_id not in self.connections: + return False + + connection = self.connections[connection_id] + + # Update fields if provided + if connector_type is not None: + connection.connector_type = connector_type + if name is not None: + connection.name = name + if config is not None: + connection.config = config + if user_id is not None: + connection.user_id = user_id + + await self.save_connections() + return True + + async def list_connections(self, user_id: Optional[str] = None, connector_type: Optional[str] = None) -> List[ConnectionConfig]: + """List connections, optionally filtered by user or connector type""" + connections = list(self.connections.values()) + + if user_id is not None: + connections = [c for c in connections if c.user_id == user_id] + + if connector_type is not None: + connections = [c for c in connections if c.connector_type == connector_type] + + return connections + + async def delete_connection(self, connection_id: str) -> bool: + """Delete a connection""" + if connection_id not in self.connections: + return False + + # Clean up active connector if exists + if connection_id in self.active_connectors: + connector = self.active_connectors[connection_id] + # Try to cleanup subscriptions if applicable + try: + if hasattr(connector, 'webhook_channel_id') and connector.webhook_channel_id: + await connector.cleanup_subscription(connector.webhook_channel_id) + except: + pass # Best effort cleanup + + del self.active_connectors[connection_id] + + del self.connections[connection_id] + await self.save_connections() + return True + + async def get_connector(self, connection_id: str) -> Optional[BaseConnector]: + """Get an active connector instance""" + # Return cached connector if available + if connection_id in self.active_connectors: + connector = self.active_connectors[connection_id] + if connector.is_authenticated: + return connector + else: + # Remove unauthenticated connector from cache + del self.active_connectors[connection_id] + + # Try to create and authenticate connector + connection_config = self.connections.get(connection_id) + if not connection_config or not connection_config.is_active: + return None + + connector = self._create_connector(connection_config) + if await connector.authenticate(): + self.active_connectors[connection_id] = connector + return connector + + return None + + def _create_connector(self, config: ConnectionConfig) -> BaseConnector: + """Factory method to create connector instances""" + if config.connector_type == "google_drive": + return GoogleDriveConnector(config.config) + elif config.connector_type == "box": + # Future: BoxConnector(config.config) + raise NotImplementedError("Box connector not implemented yet") + elif config.connector_type == "dropbox": + # Future: DropboxConnector(config.config) + raise NotImplementedError("Dropbox connector not implemented yet") + else: + raise ValueError(f"Unknown connector type: {config.connector_type}") + + async def update_last_sync(self, connection_id: str): + """Update the last sync timestamp for a connection""" + if connection_id in self.connections: + self.connections[connection_id].last_sync = datetime.now() + await self.save_connections() + + async def activate_connection(self, connection_id: str) -> bool: + """Activate a connection""" + if connection_id in self.connections: + self.connections[connection_id].is_active = True + await self.save_connections() + return True + return False + + async def deactivate_connection(self, connection_id: str) -> bool: + """Deactivate a connection""" + if connection_id in self.connections: + self.connections[connection_id].is_active = False + await self.save_connections() + + # Remove from active connectors + if connection_id in self.active_connectors: + del self.active_connectors[connection_id] + + return True + return False \ No newline at end of file diff --git a/src/connectors/google_drive/__init__.py b/src/connectors/google_drive/__init__.py new file mode 100644 index 00000000..e844e7dc --- /dev/null +++ b/src/connectors/google_drive/__init__.py @@ -0,0 +1,4 @@ +from .connector import GoogleDriveConnector +from .oauth import GoogleDriveOAuth + +__all__ = ["GoogleDriveConnector", "GoogleDriveOAuth"] \ No newline at end of file diff --git a/src/connectors/google_drive/connector.py b/src/connectors/google_drive/connector.py new file mode 100644 index 00000000..10d44514 --- /dev/null +++ b/src/connectors/google_drive/connector.py @@ -0,0 +1,405 @@ +import asyncio +import io +import os +import uuid +from datetime import datetime +from typing import Dict, List, Any, Optional +from googleapiclient.discovery import build +from googleapiclient.errors import HttpError +from googleapiclient.http import MediaIoBaseDownload + +from ..base import BaseConnector, ConnectorDocument, DocumentACL +from .oauth import GoogleDriveOAuth + + +# Global worker service cache for process pools +_worker_drive_service = None + +def get_worker_drive_service(client_id: str, token_file: str): + """Get or create a Google Drive service instance for this worker process""" + global _worker_drive_service + if _worker_drive_service is None: + print(f"🔧 Initializing Google Drive service in worker process (PID: {os.getpid()})") + + # Create OAuth instance and load credentials in worker + from .oauth import GoogleDriveOAuth + oauth = GoogleDriveOAuth(client_id=client_id, token_file=token_file) + + # Load credentials synchronously in worker + import asyncio + loop = asyncio.new_event_loop() + asyncio.set_event_loop(loop) + try: + loop.run_until_complete(oauth.load_credentials()) + _worker_drive_service = oauth.get_service() + print(f"✅ Google Drive service ready in worker process (PID: {os.getpid()})") + finally: + loop.close() + + return _worker_drive_service + + +# Module-level functions for process pool execution (must be pickleable) +def _sync_list_files_worker(client_id, token_file, query, page_token, page_size): + """Worker function for listing files in process pool""" + service = get_worker_drive_service(client_id, token_file) + return service.files().list( + q=query, + pageSize=page_size, + pageToken=page_token, + fields="nextPageToken, files(id, name, mimeType, modifiedTime, createdTime, webViewLink, permissions, owners)" + ).execute() + + +def _sync_get_metadata_worker(client_id, token_file, file_id): + """Worker function for getting file metadata in process pool""" + service = get_worker_drive_service(client_id, token_file) + return service.files().get( + fileId=file_id, + fields="id, name, mimeType, modifiedTime, createdTime, webViewLink, permissions, owners, size" + ).execute() + + +def _sync_download_worker(client_id, token_file, file_id, mime_type, file_size=None): + """Worker function for downloading files in process pool""" + import signal + import time + + # File size limits (in bytes) + MAX_REGULAR_FILE_SIZE = 100 * 1024 * 1024 # 100MB for regular files + MAX_GOOGLE_WORKSPACE_SIZE = 50 * 1024 * 1024 # 50MB for Google Workspace docs (they can't be streamed) + + # Check file size limits + if file_size: + if mime_type.startswith('application/vnd.google-apps.') and file_size > MAX_GOOGLE_WORKSPACE_SIZE: + raise ValueError(f"Google Workspace file too large: {file_size} bytes (max {MAX_GOOGLE_WORKSPACE_SIZE})") + elif not mime_type.startswith('application/vnd.google-apps.') and file_size > MAX_REGULAR_FILE_SIZE: + raise ValueError(f"File too large: {file_size} bytes (max {MAX_REGULAR_FILE_SIZE})") + + # Dynamic timeout based on file size (minimum 60s, 10s per MB, max 300s) + if file_size: + file_size_mb = file_size / (1024 * 1024) + timeout_seconds = min(300, max(60, int(file_size_mb * 10))) + else: + timeout_seconds = 60 # Default timeout if size unknown + + # Set a timeout for the entire download operation + def timeout_handler(signum, frame): + raise TimeoutError(f"File download timed out after {timeout_seconds} seconds") + + signal.signal(signal.SIGALRM, timeout_handler) + signal.alarm(timeout_seconds) + + try: + service = get_worker_drive_service(client_id, token_file) + + # For Google native formats, export as PDF + if mime_type.startswith('application/vnd.google-apps.'): + export_format = 'application/pdf' + request = service.files().export_media(fileId=file_id, mimeType=export_format) + else: + # For regular files, download directly + request = service.files().get_media(fileId=file_id) + + # Download file with chunked approach + file_io = io.BytesIO() + downloader = MediaIoBaseDownload(file_io, request, chunksize=1024*1024) # 1MB chunks + + done = False + retry_count = 0 + max_retries = 2 + + while not done and retry_count < max_retries: + try: + status, done = downloader.next_chunk() + retry_count = 0 # Reset retry count on successful chunk + except Exception as e: + retry_count += 1 + if retry_count >= max_retries: + raise e + time.sleep(1) # Brief pause before retry + + return file_io.getvalue() + + finally: + # Cancel the alarm + signal.alarm(0) + + +class GoogleDriveConnector(BaseConnector): + """Google Drive connector with OAuth and webhook support""" + + # Supported file types that can be processed by docling + SUPPORTED_MIMETYPES = { + 'application/pdf', + 'application/vnd.openxmlformats-officedocument.wordprocessingml.document', # .docx + 'application/msword', # .doc + 'application/vnd.openxmlformats-officedocument.presentationml.presentation', # .pptx + 'application/vnd.ms-powerpoint', # .ppt + 'text/plain', + 'text/html', + 'application/rtf', + # Google Docs native formats - we'll export these + 'application/vnd.google-apps.document', # Google Docs -> PDF + 'application/vnd.google-apps.presentation', # Google Slides -> PDF + 'application/vnd.google-apps.spreadsheet', # Google Sheets -> PDF + } + + def __init__(self, config: Dict[str, Any]): + super().__init__(config) + self.oauth = GoogleDriveOAuth( + client_id=config.get('client_id'), + token_file=config.get('token_file', 'gdrive_token.json') + ) + self.service = None + self.webhook_channel_id = None + + async def authenticate(self) -> bool: + """Authenticate with Google Drive""" + try: + if await self.oauth.is_authenticated(): + self.service = self.oauth.get_service() + self._authenticated = True + return True + return False + except Exception as e: + print(f"Authentication failed: {e}") + return False + + + async def setup_subscription(self) -> str: + """Set up Google Drive push notifications""" + if not self._authenticated: + raise ValueError("Not authenticated") + + # Generate unique channel ID + channel_id = str(uuid.uuid4()) + + # Set up push notification + # Note: This requires a publicly accessible webhook endpoint + webhook_url = self.config.get('webhook_url') + if not webhook_url: + raise ValueError("webhook_url required in config for subscriptions") + + try: + body = { + 'id': channel_id, + 'type': 'web_hook', + 'address': webhook_url, + 'payload': True, + 'expiration': str(int((datetime.now().timestamp() + 86400) * 1000)) # 24 hours + } + + result = self.service.changes().watch( + pageToken=self._get_start_page_token(), + body=body + ).execute() + + self.webhook_channel_id = channel_id + return channel_id + + except HttpError as e: + print(f"Failed to set up subscription: {e}") + raise + + def _get_start_page_token(self) -> str: + """Get the current page token for change notifications""" + return self.service.changes().getStartPageToken().execute()['startPageToken'] + + async def list_files(self, page_token: Optional[str] = None, limit: Optional[int] = None) -> Dict[str, Any]: + """List all supported files in Google Drive""" + if not self._authenticated: + raise ValueError("Not authenticated") + + # Build query for supported file types + mimetype_query = " or ".join([f"mimeType='{mt}'" for mt in self.SUPPORTED_MIMETYPES]) + query = f"({mimetype_query}) and trashed=false" + + # Use provided limit or default to 100, max 1000 (Google Drive API limit) + page_size = min(limit or 100, 1000) + + try: + # Run the blocking Google API call in a thread pool to avoid blocking the event loop + import asyncio + loop = asyncio.get_event_loop() + + # Use the same process pool as docling processing + from app import process_pool + results = await loop.run_in_executor( + process_pool, + _sync_list_files_worker, + self.oauth.client_id, + self.oauth.token_file, + query, + page_token, # page_token should come before page_size + page_size + ) + + files = [] + for file in results.get('files', []): + files.append({ + 'id': file['id'], + 'name': file['name'], + 'mimeType': file['mimeType'], + 'modifiedTime': file['modifiedTime'], + 'createdTime': file['createdTime'], + 'webViewLink': file['webViewLink'], + 'permissions': file.get('permissions', []), + 'owners': file.get('owners', []) + }) + + return { + 'files': files, + 'nextPageToken': results.get('nextPageToken') + } + + except HttpError as e: + print(f"Failed to list files: {e}") + raise + + async def get_file_content(self, file_id: str) -> ConnectorDocument: + """Get file content and metadata""" + if not self._authenticated: + raise ValueError("Not authenticated") + + try: + # Get file metadata (run in thread pool to avoid blocking) + import asyncio + loop = asyncio.get_event_loop() + + # Use the same process pool as docling processing + from app import process_pool + file_metadata = await loop.run_in_executor( + process_pool, + _sync_get_metadata_worker, + self.oauth.client_id, + self.oauth.token_file, + file_id + ) + + # Download file content (pass file size for timeout calculation) + file_size = file_metadata.get('size') + if file_size: + file_size = int(file_size) # Ensure it's an integer + content = await self._download_file_content(file_id, file_metadata['mimeType'], file_size) + + # Extract ACL information + acl = self._extract_acl(file_metadata) + + return ConnectorDocument( + id=file_id, + filename=file_metadata['name'], + mimetype=file_metadata['mimeType'], + content=content, + source_url=file_metadata['webViewLink'], + acl=acl, + modified_time=datetime.fromisoformat(file_metadata['modifiedTime'].replace('Z', '+00:00')).replace(tzinfo=None), + created_time=datetime.fromisoformat(file_metadata['createdTime'].replace('Z', '+00:00')).replace(tzinfo=None), + metadata={ + 'size': file_metadata.get('size'), + 'owners': file_metadata.get('owners', []) + } + ) + + except HttpError as e: + print(f"Failed to get file content: {e}") + raise + + async def _download_file_content(self, file_id: str, mime_type: str, file_size: int = None) -> bytes: + """Download file content, converting Google Docs formats if needed""" + + # Download file (run in process pool to avoid blocking) + import asyncio + loop = asyncio.get_event_loop() + + # Use the same process pool as docling processing + from app import process_pool + return await loop.run_in_executor( + process_pool, + _sync_download_worker, + self.oauth.client_id, + self.oauth.token_file, + file_id, + mime_type, + file_size + ) + + def _extract_acl(self, file_metadata: Dict[str, Any]) -> DocumentACL: + """Extract ACL information from file metadata""" + user_permissions = {} + group_permissions = {} + + owner = None + if file_metadata.get('owners'): + owner = file_metadata['owners'][0].get('emailAddress') + + # Process permissions + for perm in file_metadata.get('permissions', []): + email = perm.get('emailAddress') + role = perm.get('role', 'reader') + perm_type = perm.get('type') + + if perm_type == 'user' and email: + user_permissions[email] = role + elif perm_type == 'group' and email: + group_permissions[email] = role + elif perm_type == 'domain': + # Domain-wide permissions - could be treated as a group + domain = perm.get('domain', 'unknown-domain') + group_permissions[f"domain:{domain}"] = role + + return DocumentACL( + owner=owner, + user_permissions=user_permissions, + group_permissions=group_permissions + ) + + async def handle_webhook(self, payload: Dict[str, Any]) -> List[str]: + """Handle Google Drive webhook notification""" + if not self._authenticated: + raise ValueError("Not authenticated") + + # Google Drive sends change notifications + # We need to query for actual changes + try: + page_token = payload.get('pageToken') + if not page_token: + # Get current page token and return empty list + return [] + + # Get list of changes + changes = self.service.changes().list( + pageToken=page_token, + fields="changes(fileId, file(id, name, mimeType, trashed))" + ).execute() + + affected_files = [] + for change in changes.get('changes', []): + file_info = change.get('file', {}) + # Only include supported file types that aren't trashed + if (file_info.get('mimeType') in self.SUPPORTED_MIMETYPES and + not file_info.get('trashed', False)): + affected_files.append(change['fileId']) + + return affected_files + + except HttpError as e: + print(f"Failed to handle webhook: {e}") + return [] + + async def cleanup_subscription(self, subscription_id: str) -> bool: + """Clean up Google Drive subscription""" + if not self._authenticated: + return False + + try: + self.service.channels().stop( + body={ + 'id': subscription_id, + 'resourceId': subscription_id # This might need adjustment based on Google's response + } + ).execute() + return True + except HttpError as e: + print(f"Failed to cleanup subscription: {e}") + return False \ No newline at end of file diff --git a/src/connectors/google_drive/oauth.py b/src/connectors/google_drive/oauth.py new file mode 100644 index 00000000..5d0f6fe6 --- /dev/null +++ b/src/connectors/google_drive/oauth.py @@ -0,0 +1,118 @@ +import os +import json +import asyncio +from typing import Dict, Any, Optional +from google.auth.transport.requests import Request +from google.oauth2.credentials import Credentials +from google_auth_oauthlib.flow import Flow +from googleapiclient.discovery import build +import aiofiles + + +class GoogleDriveOAuth: + """Handles Google Drive OAuth authentication flow""" + + SCOPES = [ + 'https://www.googleapis.com/auth/drive.readonly', + 'https://www.googleapis.com/auth/drive.metadata.readonly' + ] + + def __init__(self, client_id: str = None, token_file: str = "token.json"): + self.client_id = client_id + self.token_file = token_file + self.creds: Optional[Credentials] = None + + async def load_credentials(self) -> Optional[Credentials]: + """Load existing credentials from token file""" + if os.path.exists(self.token_file): + async with aiofiles.open(self.token_file, 'r') as f: + token_data = json.loads(await f.read()) + + # Create credentials from token data + self.creds = Credentials( + token=token_data.get('token'), + refresh_token=token_data.get('refresh_token'), + id_token=token_data.get('id_token'), + token_uri="https://oauth2.googleapis.com/token", + client_id=self.client_id, + client_secret=os.getenv("GOOGLE_OAUTH_CLIENT_SECRET"), # Need for refresh + scopes=token_data.get('scopes', self.SCOPES) + ) + + # Set expiry if available (ensure timezone-naive for Google auth compatibility) + if token_data.get('expiry'): + from datetime import datetime + expiry_dt = datetime.fromisoformat(token_data['expiry']) + # Remove timezone info to make it naive (Google auth expects naive datetimes) + self.creds.expiry = expiry_dt.replace(tzinfo=None) + + # If credentials are expired, refresh them + if self.creds and self.creds.expired and self.creds.refresh_token: + self.creds.refresh(Request()) + await self.save_credentials() + + return self.creds + + async def save_credentials(self): + """Save credentials to token file""" + if self.creds: + async with aiofiles.open(self.token_file, 'w') as f: + await f.write(self.creds.to_json()) + + def create_authorization_url(self, redirect_uri: str) -> str: + """Create authorization URL for OAuth flow""" + flow = Flow.from_client_secrets_file( + self.credentials_file, + scopes=self.SCOPES, + redirect_uri=redirect_uri + ) + + auth_url, _ = flow.authorization_url( + access_type='offline', + include_granted_scopes='true', + prompt='consent' # Force consent to get refresh token + ) + + # Store flow state for later use + self._flow_state = flow.state + self._flow = flow + + return auth_url + + async def handle_authorization_callback(self, authorization_code: str, state: str) -> bool: + """Handle OAuth callback and exchange code for tokens""" + if not hasattr(self, '_flow') or self._flow_state != state: + raise ValueError("Invalid OAuth state") + + # Exchange authorization code for credentials + self._flow.fetch_token(code=authorization_code) + self.creds = self._flow.credentials + + # Save credentials + await self.save_credentials() + + return True + + async def is_authenticated(self) -> bool: + """Check if we have valid credentials""" + if not self.creds: + await self.load_credentials() + + return self.creds and self.creds.valid + + def get_service(self): + """Get authenticated Google Drive service""" + if not self.creds or not self.creds.valid: + raise ValueError("Not authenticated") + + return build('drive', 'v3', credentials=self.creds) + + async def revoke_credentials(self): + """Revoke credentials and delete token file""" + if self.creds: + self.creds.revoke(Request()) + + if os.path.exists(self.token_file): + os.remove(self.token_file) + + self.creds = None \ No newline at end of file diff --git a/src/connectors/service.py b/src/connectors/service.py new file mode 100644 index 00000000..95acafb2 --- /dev/null +++ b/src/connectors/service.py @@ -0,0 +1,258 @@ +import asyncio +import tempfile +import os +from typing import Dict, Any, List, Optional + +from .base import BaseConnector, ConnectorDocument +from .google_drive import GoogleDriveConnector +from .connection_manager import ConnectionManager + + +class ConnectorService: + """Service to manage document connectors and process files""" + + def __init__(self, opensearch_client, patched_async_client, process_pool, embed_model: str, index_name: str): + self.opensearch = opensearch_client + self.openai_client = patched_async_client + self.process_pool = process_pool + self.embed_model = embed_model + self.index_name = index_name + self.connection_manager = ConnectionManager() + + async def initialize(self): + """Initialize the service by loading existing connections""" + await self.connection_manager.load_connections() + + async def get_connector(self, connection_id: str) -> Optional[BaseConnector]: + """Get a connector by connection ID""" + return await self.connection_manager.get_connector(connection_id) + + async def process_connector_document(self, document: ConnectorDocument, owner_user_id: str) -> Dict[str, Any]: + """Process a document from a connector using existing processing pipeline""" + + # Create temporary file from document content + with tempfile.NamedTemporaryFile(delete=False, suffix=self._get_file_extension(document.mimetype)) as tmp_file: + tmp_file.write(document.content) + tmp_file.flush() + + try: + # Use existing process_file_common function from app.py with connector document ID + from app import process_file_common + + # Process using the existing pipeline but with connector document metadata + result = await process_file_common( + file_path=tmp_file.name, + file_hash=document.id, # Use connector document ID as hash + owner_user_id=owner_user_id + ) + + # If successfully indexed, update the indexed documents with connector metadata + if result["status"] == "indexed": + # Update all chunks with connector-specific metadata + await self._update_connector_metadata(document, owner_user_id) + + return { + **result, + "filename": document.filename, + "source_url": document.source_url + } + + finally: + # Clean up temporary file + os.unlink(tmp_file.name) + + async def _update_connector_metadata(self, document: ConnectorDocument, owner_user_id: str): + """Update indexed chunks with connector-specific metadata""" + # Find all chunks for this document + query = { + "query": { + "term": {"document_id": document.id} + } + } + + response = await self.opensearch.search(index=self.index_name, body=query) + + # Update each chunk with connector metadata + for hit in response["hits"]["hits"]: + chunk_id = hit["_id"] + update_body = { + "doc": { + "source_url": document.source_url, + "connector_type": "google_drive", # Could be passed as parameter + # Additional ACL info beyond owner (already set by process_file_common) + "allowed_users": document.acl.allowed_users, + "allowed_groups": document.acl.allowed_groups, + "user_permissions": document.acl.user_permissions, + "group_permissions": document.acl.group_permissions, + # Timestamps + "created_time": document.created_time.isoformat(), + "modified_time": document.modified_time.isoformat(), + # Additional metadata + "metadata": document.metadata + } + } + + await self.opensearch.update(index=self.index_name, id=chunk_id, body=update_body) + + def _get_file_extension(self, mimetype: str) -> str: + """Get file extension based on MIME type""" + mime_to_ext = { + 'application/pdf': '.pdf', + 'application/vnd.openxmlformats-officedocument.wordprocessingml.document': '.docx', + 'application/msword': '.doc', + 'application/vnd.openxmlformats-officedocument.presentationml.presentation': '.pptx', + 'application/vnd.ms-powerpoint': '.ppt', + 'text/plain': '.txt', + 'text/html': '.html', + 'application/rtf': '.rtf', + 'application/vnd.google-apps.document': '.pdf', # Exported as PDF + 'application/vnd.google-apps.presentation': '.pdf', + 'application/vnd.google-apps.spreadsheet': '.pdf', + } + return mime_to_ext.get(mimetype, '.bin') + + async def sync_connector_files(self, connection_id: str, user_id: str, max_files: int = None) -> str: + """Sync files from a connector connection using existing task tracking system""" + print(f"[DEBUG] Starting sync for connection {connection_id}, max_files={max_files}") + + connector = await self.get_connector(connection_id) + if not connector: + raise ValueError(f"Connection '{connection_id}' not found or not authenticated") + + print(f"[DEBUG] Got connector, authenticated: {connector.is_authenticated}") + + if not connector.is_authenticated: + raise ValueError(f"Connection '{connection_id}' not authenticated") + + # Collect files to process (limited by max_files) + files_to_process = [] + page_token = None + + # Calculate page size to minimize API calls + page_size = min(max_files or 100, 1000) if max_files else 100 + + while True: + # List files from connector with limit + print(f"[DEBUG] Calling list_files with page_size={page_size}, page_token={page_token}") + file_list = await connector.list_files(page_token, limit=page_size) + print(f"[DEBUG] Got {len(file_list.get('files', []))} files") + files = file_list['files'] + + if not files: + break + + for file_info in files: + if max_files and len(files_to_process) >= max_files: + break + files_to_process.append(file_info) + + # Stop if we have enough files or no more pages + if (max_files and len(files_to_process) >= max_files) or not file_list.get('nextPageToken'): + break + + page_token = file_list.get('nextPageToken') + + if not files_to_process: + raise ValueError("No files found to sync") + + # Create upload task using existing task system + import uuid + from app import UploadTask, FileTask, TaskStatus, task_store, background_upload_processor + + task_id = str(uuid.uuid4()) + upload_task = UploadTask( + task_id=task_id, + total_files=len(files_to_process), + file_tasks={f"connector_file_{file_info['id']}": FileTask(file_path=f"connector_file_{file_info['id']}") for file_info in files_to_process} + ) + + # Store task for user + if user_id not in task_store: + task_store[user_id] = {} + task_store[user_id][task_id] = upload_task + + # Start background processing with connector-specific logic + import asyncio + from app import background_tasks + background_task = asyncio.create_task(self._background_connector_sync(user_id, task_id, connection_id, files_to_process)) + background_tasks.add(background_task) + background_task.add_done_callback(background_tasks.discard) + + return task_id + + async def _background_connector_sync(self, user_id: str, task_id: str, connection_id: str, files_to_process: List[Dict]): + """Background task to sync connector files""" + from app import task_store, TaskStatus + import datetime + + try: + upload_task = task_store[user_id][task_id] + upload_task.status = TaskStatus.RUNNING + upload_task.updated_at = datetime.datetime.now().timestamp() + + connector = await self.get_connector(connection_id) + if not connector: + raise ValueError(f"Connection '{connection_id}' not found") + + # Process files with limited concurrency + semaphore = asyncio.Semaphore(4) # Limit concurrent file processing + + async def process_connector_file(file_info): + async with semaphore: + file_key = f"connector_file_{file_info['id']}" + file_task = upload_task.file_tasks[file_key] + file_task.status = TaskStatus.RUNNING + file_task.updated_at = datetime.datetime.now().timestamp() + + try: + # Get file content from connector + document = await connector.get_file_content(file_info['id']) + + # Process using existing pipeline + result = await self.process_connector_document(document, user_id) + + file_task.status = TaskStatus.COMPLETED + file_task.result = result + upload_task.successful_files += 1 + + except Exception as e: + import sys + import traceback + + error_msg = f"[ERROR] Failed to process connector file {file_info['id']}: {e}" + print(error_msg, file=sys.stderr, flush=True) + traceback.print_exc(file=sys.stderr) + sys.stderr.flush() + + # Also store full traceback in task error + full_error = f"{str(e)}\n{traceback.format_exc()}" + file_task.status = TaskStatus.FAILED + file_task.error = full_error + upload_task.failed_files += 1 + finally: + file_task.updated_at = datetime.datetime.now().timestamp() + upload_task.processed_files += 1 + upload_task.updated_at = datetime.datetime.now().timestamp() + + # Process all files concurrently + tasks = [process_connector_file(file_info) for file_info in files_to_process] + await asyncio.gather(*tasks, return_exceptions=True) + + # Update connection last sync time + await self.connection_manager.update_last_sync(connection_id) + + upload_task.status = TaskStatus.COMPLETED + upload_task.updated_at = datetime.datetime.now().timestamp() + + except Exception as e: + import sys + import traceback + + error_msg = f"[ERROR] Background connector sync failed for task {task_id}: {e}" + print(error_msg, file=sys.stderr, flush=True) + traceback.print_exc(file=sys.stderr) + sys.stderr.flush() + + if user_id in task_store and task_id in task_store[user_id]: + task_store[user_id][task_id].status = TaskStatus.FAILED + task_store[user_id][task_id].updated_at = datetime.datetime.now().timestamp() \ No newline at end of file diff --git a/src/session_manager.py b/src/session_manager.py new file mode 100644 index 00000000..b133c936 --- /dev/null +++ b/src/session_manager.py @@ -0,0 +1,106 @@ +import json +import jwt +import httpx +from datetime import datetime, timedelta +from typing import Dict, Optional, Any +from dataclasses import dataclass, asdict + + +@dataclass +class User: + """User information from OAuth provider""" + user_id: str # From OAuth sub claim + email: str + name: str + picture: str = None + provider: str = "google" + created_at: datetime = None + last_login: datetime = None + + def __post_init__(self): + if self.created_at is None: + self.created_at = datetime.now() + if self.last_login is None: + self.last_login = datetime.now() + + +class SessionManager: + """Manages user sessions and JWT tokens""" + + def __init__(self, secret_key: str): + self.secret_key = secret_key + self.users: Dict[str, User] = {} # user_id -> User + + async def get_user_info_from_token(self, access_token: str) -> Optional[Dict[str, Any]]: + """Get user info from Google using access token""" + try: + async with httpx.AsyncClient() as client: + response = await client.get( + "https://www.googleapis.com/oauth2/v2/userinfo", + headers={"Authorization": f"Bearer {access_token}"} + ) + + if response.status_code == 200: + return response.json() + else: + print(f"Failed to get user info: {response.status_code} {response.text}") + return None + + except Exception as e: + print(f"Error getting user info: {e}") + return None + + async def create_user_session(self, access_token: str) -> Optional[str]: + """Create user session from OAuth access token""" + user_info = await self.get_user_info_from_token(access_token) + if not user_info: + return None + + # Create or update user + user_id = user_info["id"] + user = User( + user_id=user_id, + email=user_info["email"], + name=user_info["name"], + picture=user_info.get("picture"), + provider="google" + ) + + # Update last login if user exists + if user_id in self.users: + self.users[user_id].last_login = datetime.now() + else: + self.users[user_id] = user + + # Create JWT token + token_payload = { + "user_id": user_id, + "email": user.email, + "name": user.name, + "exp": datetime.utcnow() + timedelta(days=7), # 7 day expiry + "iat": datetime.utcnow() + } + + token = jwt.encode(token_payload, self.secret_key, algorithm="HS256") + return token + + def verify_token(self, token: str) -> Optional[Dict[str, Any]]: + """Verify JWT token and return user info""" + try: + payload = jwt.decode(token, self.secret_key, algorithms=["HS256"]) + return payload + except jwt.ExpiredSignatureError: + return None + except jwt.InvalidTokenError: + return None + + def get_user(self, user_id: str) -> Optional[User]: + """Get user by ID""" + return self.users.get(user_id) + + def get_user_from_token(self, token: str) -> Optional[User]: + """Get user from JWT token""" + payload = self.verify_token(token) + if payload: + return self.get_user(payload["user_id"]) + return None \ No newline at end of file diff --git a/uv.lock b/uv.lock index 7750d8ff..fd7f0951 100644 --- a/uv.lock +++ b/uv.lock @@ -131,6 +131,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/50/cd/30110dc0ffcf3b131156077b90e9f60ed75711223f306da4db08eff8403b/beautifulsoup4-4.13.4-py3-none-any.whl", hash = "sha256:9bbbb14bfde9d79f38b8cd5f8c7c85f4b8f2523190ebed90e950a8dea4cb1c4b", size = 187285, upload-time = "2025-04-15T17:05:12.221Z" }, ] +[[package]] +name = "cachetools" +version = "5.5.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/6c/81/3747dad6b14fa2cf53fcf10548cf5aea6913e96fab41a3c198676f8948a5/cachetools-5.5.2.tar.gz", hash = "sha256:1a661caa9175d26759571b2e19580f9d6393969e5dfca11fdb1f947a23e640d4", size = 28380, upload-time = "2025-02-20T21:01:19.524Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/72/76/20fa66124dbe6be5cafeb312ece67de6b61dd91a0247d1ea13db4ebb33c2/cachetools-5.5.2-py3-none-any.whl", hash = "sha256:d26a22bcc62eb95c3beabd9f1ee5e820d3d2704fe2967cbe350e20c8ffcd3f0a", size = 10080, upload-time = "2025-02-20T21:01:16.647Z" }, +] + [[package]] name = "certifi" version = "2025.7.9" @@ -425,7 +434,12 @@ dependencies = [ { name = "agentd" }, { name = "aiofiles" }, { name = "docling" }, + { name = "google-api-python-client" }, + { name = "google-auth-httplib2" }, + { name = "google-auth-oauthlib" }, + { name = "httpx" }, { name = "opensearch-py", extra = ["async"] }, + { name = "pyjwt" }, { name = "python-multipart" }, { name = "starlette" }, { name = "torch" }, @@ -437,13 +451,102 @@ requires-dist = [ { name = "agentd", specifier = ">=0.2.1" }, { name = "aiofiles", specifier = ">=24.1.0" }, { name = "docling", specifier = ">=2.41.0" }, + { name = "google-api-python-client", specifier = ">=2.143.0" }, + { name = "google-auth-httplib2", specifier = ">=0.2.0" }, + { name = "google-auth-oauthlib", specifier = ">=1.2.0" }, + { name = "httpx", specifier = ">=0.27.0" }, { name = "opensearch-py", extras = ["async"], specifier = ">=3.0.0" }, + { name = "pyjwt", specifier = ">=2.8.0" }, { name = "python-multipart", specifier = ">=0.0.20" }, { name = "starlette", specifier = ">=0.47.1" }, { name = "torch", specifier = ">=2.7.1", index = "https://download.pytorch.org/whl/cu128" }, { name = "uvicorn", specifier = ">=0.35.0" }, ] +[[package]] +name = "google-api-core" +version = "2.25.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "google-auth" }, + { name = "googleapis-common-protos" }, + { name = "proto-plus" }, + { name = "protobuf" }, + { name = "requests" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/dc/21/e9d043e88222317afdbdb567165fdbc3b0aad90064c7e0c9eb0ad9955ad8/google_api_core-2.25.1.tar.gz", hash = "sha256:d2aaa0b13c78c61cb3f4282c464c046e45fbd75755683c9c525e6e8f7ed0a5e8", size = 165443, upload-time = "2025-06-12T20:52:20.439Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/14/4b/ead00905132820b623732b175d66354e9d3e69fcf2a5dcdab780664e7896/google_api_core-2.25.1-py3-none-any.whl", hash = "sha256:8a2a56c1fef82987a524371f99f3bd0143702fecc670c72e600c1cda6bf8dbb7", size = 160807, upload-time = "2025-06-12T20:52:19.334Z" }, +] + +[[package]] +name = "google-api-python-client" +version = "2.177.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "google-api-core" }, + { name = "google-auth" }, + { name = "google-auth-httplib2" }, + { name = "httplib2" }, + { name = "uritemplate" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/7a/75/a89cad519fa8910132e3b08571d0e682ae1163643da6f963f1930f3dc788/google_api_python_client-2.177.0.tar.gz", hash = "sha256:9ffd2b57d68f5afa7e6ac64e2c440534eaa056cbb394812a62ff94723c31b50e", size = 13184405, upload-time = "2025-07-23T16:22:46.321Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/47/f5/121248e18ca605a11720c81ae1b52a5a8cb690af9f01887c56de23cd9a5a/google_api_python_client-2.177.0-py3-none-any.whl", hash = "sha256:f2f50f11105ab883eb9b6cf38ec54ea5fd4b429249f76444bec90deba5be79b3", size = 13709470, upload-time = "2025-07-23T16:22:44.081Z" }, +] + +[[package]] +name = "google-auth" +version = "2.40.3" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "cachetools" }, + { name = "pyasn1-modules" }, + { name = "rsa" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/9e/9b/e92ef23b84fa10a64ce4831390b7a4c2e53c0132568d99d4ae61d04c8855/google_auth-2.40.3.tar.gz", hash = "sha256:500c3a29adedeb36ea9cf24b8d10858e152f2412e3ca37829b3fa18e33d63b77", size = 281029, upload-time = "2025-06-04T18:04:57.577Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/17/63/b19553b658a1692443c62bd07e5868adaa0ad746a0751ba62c59568cd45b/google_auth-2.40.3-py2.py3-none-any.whl", hash = "sha256:1370d4593e86213563547f97a92752fc658456fe4514c809544f330fed45a7ca", size = 216137, upload-time = "2025-06-04T18:04:55.573Z" }, +] + +[[package]] +name = "google-auth-httplib2" +version = "0.2.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "google-auth" }, + { name = "httplib2" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/56/be/217a598a818567b28e859ff087f347475c807a5649296fb5a817c58dacef/google-auth-httplib2-0.2.0.tar.gz", hash = "sha256:38aa7badf48f974f1eb9861794e9c0cb2a0511a4ec0679b1f886d108f5640e05", size = 10842, upload-time = "2023-12-12T17:40:30.722Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/be/8a/fe34d2f3f9470a27b01c9e76226965863f153d5fbe276f83608562e49c04/google_auth_httplib2-0.2.0-py2.py3-none-any.whl", hash = "sha256:b65a0a2123300dd71281a7bf6e64d65a0759287df52729bdd1ae2e47dc311a3d", size = 9253, upload-time = "2023-12-12T17:40:13.055Z" }, +] + +[[package]] +name = "google-auth-oauthlib" +version = "1.2.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "google-auth" }, + { name = "requests-oauthlib" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/fb/87/e10bf24f7bcffc1421b84d6f9c3377c30ec305d082cd737ddaa6d8f77f7c/google_auth_oauthlib-1.2.2.tar.gz", hash = "sha256:11046fb8d3348b296302dd939ace8af0a724042e8029c1b872d87fabc9f41684", size = 20955, upload-time = "2025-04-22T16:40:29.172Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/ac/84/40ee070be95771acd2f4418981edb834979424565c3eec3cd88b6aa09d24/google_auth_oauthlib-1.2.2-py3-none-any.whl", hash = "sha256:fd619506f4b3908b5df17b65f39ca8d66ea56986e5472eb5978fd8f3786f00a2", size = 19072, upload-time = "2025-04-22T16:40:28.174Z" }, +] + +[[package]] +name = "googleapis-common-protos" +version = "1.70.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "protobuf" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/39/24/33db22342cf4a2ea27c9955e6713140fedd51e8b141b5ce5260897020f1a/googleapis_common_protos-1.70.0.tar.gz", hash = "sha256:0e1b44e0ea153e6594f9f394fef15193a68aaaea2d843f83e2742717ca753257", size = 145903, upload-time = "2025-04-14T10:17:02.924Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/86/f1/62a193f0227cf15a920390abe675f386dec35f7ae3ffe6da582d3ade42c7/googleapis_common_protos-1.70.0-py3-none-any.whl", hash = "sha256:b8bfcca8c25a2bb253e0e0b0adaf8c00773e5e6af6fd92397576680b807e0fd8", size = 294530, upload-time = "2025-04-14T10:17:01.271Z" }, +] + [[package]] name = "griffe" version = "1.7.3" @@ -493,6 +596,18 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/7e/f5/f66802a942d491edb555dd61e3a9961140fd64c90bce1eafd741609d334d/httpcore-1.0.9-py3-none-any.whl", hash = "sha256:2d400746a40668fc9dec9810239072b40b4484b640a8c38fd654a024c7a1bf55", size = 78784, upload-time = "2025-04-24T22:06:20.566Z" }, ] +[[package]] +name = "httplib2" +version = "0.22.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "pyparsing" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/3d/ad/2371116b22d616c194aa25ec410c9c6c37f23599dcd590502b74db197584/httplib2-0.22.0.tar.gz", hash = "sha256:d7a10bc5ef5ab08322488bde8c726eeee5c8618723fdb399597ec58f3d82df81", size = 351116, upload-time = "2023-03-21T22:29:37.214Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/a8/6c/d2fbdaaa5959339d53ba38e94c123e4e84b8fbc4b84beb0e70d7c1608486/httplib2-0.22.0-py3-none-any.whl", hash = "sha256:14ae0a53c1ba8f3d37e9e27cf37eabb0fb9980f435ba405d546948b009dd64dc", size = 96854, upload-time = "2023-03-21T22:29:35.683Z" }, +] + [[package]] name = "httpx" version = "0.28.1" @@ -1105,6 +1220,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/8d/cd/0e8c51b2ae3a58f054f2e7fe91b82d201abfb30167f2431e9bd92d532f42/nvidia_nvtx_cu12-12.8.55-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:2dd0780f1a55c21d8e06a743de5bd95653de630decfff40621dbde78cc307102", size = 89896, upload-time = "2025-01-23T17:50:44.487Z" }, ] +[[package]] +name = "oauthlib" +version = "3.3.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/0b/5f/19930f824ffeb0ad4372da4812c50edbd1434f678c90c2733e1188edfc63/oauthlib-3.3.1.tar.gz", hash = "sha256:0f0f8aa759826a193cf66c12ea1af1637f87b9b4622d46e866952bb022e538c9", size = 185918, upload-time = "2025-06-19T22:48:08.269Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/be/9c/92789c596b8df838baa98fa71844d84283302f7604ed565dafe5a6b5041a/oauthlib-3.3.1-py3-none-any.whl", hash = "sha256:88119c938d2b8fb88561af5f6ee0eec8cc8d552b7bb1f712743136eb7523b7a1", size = 160065, upload-time = "2025-06-19T22:48:06.508Z" }, +] + [[package]] name = "openai" version = "1.95.0" @@ -1333,6 +1457,53 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/cc/35/cc0aaecf278bb4575b8555f2b137de5ab821595ddae9da9d3cd1da4072c7/propcache-0.3.2-py3-none-any.whl", hash = "sha256:98f1ec44fb675f5052cccc8e609c46ed23a35a1cfd18545ad4e29002d858a43f", size = 12663, upload-time = "2025-06-09T22:56:04.484Z" }, ] +[[package]] +name = "proto-plus" +version = "1.26.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "protobuf" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/f4/ac/87285f15f7cce6d4a008f33f1757fb5a13611ea8914eb58c3d0d26243468/proto_plus-1.26.1.tar.gz", hash = "sha256:21a515a4c4c0088a773899e23c7bbade3d18f9c66c73edd4c7ee3816bc96a012", size = 56142, upload-time = "2025-03-10T15:54:38.843Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/4e/6d/280c4c2ce28b1593a19ad5239c8b826871fc6ec275c21afc8e1820108039/proto_plus-1.26.1-py3-none-any.whl", hash = "sha256:13285478c2dcf2abb829db158e1047e2f1e8d63a077d94263c2b88b043c75a66", size = 50163, upload-time = "2025-03-10T15:54:37.335Z" }, +] + +[[package]] +name = "protobuf" +version = "6.31.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/52/f3/b9655a711b32c19720253f6f06326faf90580834e2e83f840472d752bc8b/protobuf-6.31.1.tar.gz", hash = "sha256:d8cac4c982f0b957a4dc73a80e2ea24fab08e679c0de9deb835f4a12d69aca9a", size = 441797, upload-time = "2025-05-28T19:25:54.947Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/f3/6f/6ab8e4bf962fd5570d3deaa2d5c38f0a363f57b4501047b5ebeb83ab1125/protobuf-6.31.1-cp310-abi3-win32.whl", hash = "sha256:7fa17d5a29c2e04b7d90e5e32388b8bfd0e7107cd8e616feef7ed3fa6bdab5c9", size = 423603, upload-time = "2025-05-28T19:25:41.198Z" }, + { url = "https://files.pythonhosted.org/packages/44/3a/b15c4347dd4bf3a1b0ee882f384623e2063bb5cf9fa9d57990a4f7df2fb6/protobuf-6.31.1-cp310-abi3-win_amd64.whl", hash = "sha256:426f59d2964864a1a366254fa703b8632dcec0790d8862d30034d8245e1cd447", size = 435283, upload-time = "2025-05-28T19:25:44.275Z" }, + { url = "https://files.pythonhosted.org/packages/6a/c9/b9689a2a250264a84e66c46d8862ba788ee7a641cdca39bccf64f59284b7/protobuf-6.31.1-cp39-abi3-macosx_10_9_universal2.whl", hash = "sha256:6f1227473dc43d44ed644425268eb7c2e488ae245d51c6866d19fe158e207402", size = 425604, upload-time = "2025-05-28T19:25:45.702Z" }, + { url = "https://files.pythonhosted.org/packages/76/a1/7a5a94032c83375e4fe7e7f56e3976ea6ac90c5e85fac8576409e25c39c3/protobuf-6.31.1-cp39-abi3-manylinux2014_aarch64.whl", hash = "sha256:a40fc12b84c154884d7d4c4ebd675d5b3b5283e155f324049ae396b95ddebc39", size = 322115, upload-time = "2025-05-28T19:25:47.128Z" }, + { url = "https://files.pythonhosted.org/packages/fa/b1/b59d405d64d31999244643d88c45c8241c58f17cc887e73bcb90602327f8/protobuf-6.31.1-cp39-abi3-manylinux2014_x86_64.whl", hash = "sha256:4ee898bf66f7a8b0bd21bce523814e6fbd8c6add948045ce958b73af7e8878c6", size = 321070, upload-time = "2025-05-28T19:25:50.036Z" }, + { url = "https://files.pythonhosted.org/packages/f7/af/ab3c51ab7507a7325e98ffe691d9495ee3d3aa5f589afad65ec920d39821/protobuf-6.31.1-py3-none-any.whl", hash = "sha256:720a6c7e6b77288b85063569baae8536671b39f15cc22037ec7045658d80489e", size = 168724, upload-time = "2025-05-28T19:25:53.926Z" }, +] + +[[package]] +name = "pyasn1" +version = "0.6.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/ba/e9/01f1a64245b89f039897cb0130016d79f77d52669aae6ee7b159a6c4c018/pyasn1-0.6.1.tar.gz", hash = "sha256:6f580d2bdd84365380830acf45550f2511469f673cb4a5ae3857a3170128b034", size = 145322, upload-time = "2024-09-10T22:41:42.55Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/c8/f1/d6a797abb14f6283c0ddff96bbdd46937f64122b8c925cab503dd37f8214/pyasn1-0.6.1-py3-none-any.whl", hash = "sha256:0d632f46f2ba09143da3a8afe9e33fb6f92fa2320ab7e886e2d0f7672af84629", size = 83135, upload-time = "2024-09-11T16:00:36.122Z" }, +] + +[[package]] +name = "pyasn1-modules" +version = "0.4.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "pyasn1" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/e9/e6/78ebbb10a8c8e4b61a59249394a4a594c1a7af95593dc933a349c8d00964/pyasn1_modules-0.4.2.tar.gz", hash = "sha256:677091de870a80aae844b1ca6134f54652fa2c8c5a52aa396440ac3106e941e6", size = 307892, upload-time = "2025-03-28T02:41:22.17Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/47/8d/d529b5d697919ba8c11ad626e835d4039be708a35b0d22de83a269a6682c/pyasn1_modules-0.4.2-py3-none-any.whl", hash = "sha256:29253a9207ce32b64c3ac6600edc75368f98473906e8fd1043bd6b5b1de2c14a", size = 181259, upload-time = "2025-03-28T02:41:19.028Z" }, +] + [[package]] name = "pyclipper" version = "1.3.0.post6" @@ -1413,12 +1584,30 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/c7/21/705964c7812476f378728bdf590ca4b771ec72385c533964653c68e86bdc/pygments-2.19.2-py3-none-any.whl", hash = "sha256:86540386c03d588bb81d44bc3928634ff26449851e99741617ecb9037ee5ec0b", size = 1225217, upload-time = "2025-06-21T13:39:07.939Z" }, ] +[[package]] +name = "pyjwt" +version = "2.10.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/e7/46/bd74733ff231675599650d3e47f361794b22ef3e3770998dda30d3b63726/pyjwt-2.10.1.tar.gz", hash = "sha256:3cc5772eb20009233caf06e9d8a0577824723b44e6648ee0a2aedb6cf9381953", size = 87785, upload-time = "2024-11-28T03:43:29.933Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/61/ad/689f02752eeec26aed679477e80e632ef1b682313be70793d798c1d5fc8f/PyJWT-2.10.1-py3-none-any.whl", hash = "sha256:dcdd193e30abefd5debf142f9adfcdd2b58004e644f25406ffaebd50bd98dacb", size = 22997, upload-time = "2024-11-28T03:43:27.893Z" }, +] + [[package]] name = "pylatexenc" version = "2.10" source = { registry = "https://pypi.org/simple" } sdist = { url = "https://files.pythonhosted.org/packages/5d/ab/34ec41718af73c00119d0351b7a2531d2ebddb51833a36448fc7b862be60/pylatexenc-2.10.tar.gz", hash = "sha256:3dd8fd84eb46dc30bee1e23eaab8d8fb5a7f507347b23e5f38ad9675c84f40d3", size = 162597, upload-time = "2021-04-06T07:56:07.854Z" } +[[package]] +name = "pyparsing" +version = "3.2.3" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/bb/22/f1129e69d94ffff626bdb5c835506b3a5b4f3d070f17ea295e12c2c6f60f/pyparsing-3.2.3.tar.gz", hash = "sha256:b9c13f1ab8b3b542f72e28f634bad4de758ab3ce4546e4301970ad6fa77c38be", size = 1088608, upload-time = "2025-03-25T05:01:28.114Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/05/e7/df2285f3d08fee213f2d041540fa4fc9ca6c2d44cf36d3a035bf2a8d2bcc/pyparsing-3.2.3-py3-none-any.whl", hash = "sha256:a749938e02d6fd0b59b356ca504a24982314bb090c383e3cf201c95ef7e2bfcf", size = 111120, upload-time = "2025-03-25T05:01:24.908Z" }, +] + [[package]] name = "pypdfium2" version = "4.30.1" @@ -1606,6 +1795,19 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/7c/e4/56027c4a6b4ae70ca9de302488c5ca95ad4a39e190093d6c1a8ace08341b/requests-2.32.4-py3-none-any.whl", hash = "sha256:27babd3cda2a6d50b30443204ee89830707d396671944c998b5975b031ac2b2c", size = 64847, upload-time = "2025-06-09T16:43:05.728Z" }, ] +[[package]] +name = "requests-oauthlib" +version = "2.0.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "oauthlib" }, + { name = "requests" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/42/f2/05f29bc3913aea15eb670be136045bf5c5bbf4b99ecb839da9b422bb2c85/requests-oauthlib-2.0.0.tar.gz", hash = "sha256:b3dffaebd884d8cd778494369603a9e7b58d29111bf6b41bdc2dcd87203af4e9", size = 55650, upload-time = "2024-03-22T20:32:29.939Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/3b/5d/63d4ae3b9daea098d5d6f5da83984853c1bbacd5dc826764b249fe119d24/requests_oauthlib-2.0.0-py2.py3-none-any.whl", hash = "sha256:7dd8a5c40426b779b0868c404bdef9768deccf22749cde15852df527e6269b36", size = 24179, upload-time = "2024-03-22T20:32:28.055Z" }, +] + [[package]] name = "rich" version = "14.0.0" @@ -1681,6 +1883,18 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/75/04/5302cea1aa26d886d34cadbf2dc77d90d7737e576c0065f357b96dc7a1a6/rpds_py-0.26.0-cp314-cp314t-win_amd64.whl", hash = "sha256:f14440b9573a6f76b4ee4770c13f0b5921f71dde3b6fcb8dabbefd13b7fe05d7", size = 232821, upload-time = "2025-07-01T15:55:55.167Z" }, ] +[[package]] +name = "rsa" +version = "4.9.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "pyasn1" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/da/8a/22b7beea3ee0d44b1916c0c1cb0ee3af23b700b6da9f04991899d0c555d4/rsa-4.9.1.tar.gz", hash = "sha256:e7bdbfdb5497da4c07dfd35530e1a902659db6ff241e39d9953cad06ebd0ae75", size = 29034, upload-time = "2025-04-16T09:51:18.218Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/64/8d/0133e4eb4beed9e425d9a98ed6e081a55d195481b7632472be1af08d2f6b/rsa-4.9.1-py3-none-any.whl", hash = "sha256:68635866661c6836b8d39430f97a996acbd61bfa49406748ea243539fe239762", size = 34696, upload-time = "2025-04-16T09:51:17.142Z" }, +] + [[package]] name = "rtree" version = "1.4.0" @@ -2123,6 +2337,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/5c/23/c7abc0ca0a1526a0774eca151daeb8de62ec457e77262b66b359c3c7679e/tzdata-2025.2-py2.py3-none-any.whl", hash = "sha256:1a403fada01ff9221ca8044d701868fa132215d84beb92242d9acd2147f667a8", size = 347839, upload-time = "2025-03-23T13:54:41.845Z" }, ] +[[package]] +name = "uritemplate" +version = "4.2.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/98/60/f174043244c5306c9988380d2cb10009f91563fc4b31293d27e17201af56/uritemplate-4.2.0.tar.gz", hash = "sha256:480c2ed180878955863323eea31b0ede668795de182617fef9c6ca09e6ec9d0e", size = 33267, upload-time = "2025-06-02T15:12:06.318Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/a9/99/3ae339466c9183ea5b8ae87b34c0b897eda475d2aec2307cae60e5cd4f29/uritemplate-4.2.0-py3-none-any.whl", hash = "sha256:962201ba1c4edcab02e60f9a0d3821e82dfc5d2d6662a21abd533879bdb8a686", size = 11488, upload-time = "2025-06-02T15:12:03.405Z" }, +] + [[package]] name = "urllib3" version = "2.5.0"