openrag/src/api/connectors.py
estevez.sebastian@gmail.com 93b72a19be webhooks work
2025-07-31 00:42:52 -04:00

203 lines
No EOL
8.9 KiB
Python

from starlette.requests import Request
from starlette.responses import JSONResponse
async def connector_sync(request: Request, connector_service, session_manager):
"""Sync files from all active connections of a connector type"""
connector_type = request.path_params.get("connector_type", "google_drive")
data = await request.json()
max_files = data.get("max_files")
try:
print(f"[DEBUG] Starting connector sync for connector_type={connector_type}, max_files={max_files}")
user = request.state.user
print(f"[DEBUG] User: {user.user_id}")
# Get all active connections for this connector type and user
connections = await connector_service.connection_manager.list_connections(
user_id=user.user_id,
connector_type=connector_type
)
active_connections = [conn for conn in connections if conn.is_active]
if not active_connections:
return JSONResponse({"error": f"No active {connector_type} connections found"}, status_code=404)
# Start sync tasks for all active connections
task_ids = []
for connection in active_connections:
print(f"[DEBUG] About to call sync_connector_files for connection {connection.connection_id}")
task_id = await connector_service.sync_connector_files(connection.connection_id, user.user_id, max_files)
task_ids.append(task_id)
print(f"[DEBUG] Got task_id: {task_id}")
return JSONResponse({
"task_ids": task_ids,
"status": "sync_started",
"message": f"Started syncing files from {len(active_connections)} {connector_type} connection(s)",
"connections_synced": len(active_connections)
},
status_code=201
)
except Exception as e:
import sys
import traceback
error_msg = f"[ERROR] Connector sync failed: {str(e)}"
print(error_msg, file=sys.stderr, flush=True)
traceback.print_exc(file=sys.stderr)
sys.stderr.flush()
return JSONResponse({"error": f"Sync failed: {str(e)}"}, status_code=500)
async def connector_status(request: Request, connector_service, session_manager):
"""Get connector status for authenticated user"""
connector_type = request.path_params.get("connector_type", "google_drive")
user = request.state.user
# Get connections for this connector type and user
connections = await connector_service.connection_manager.list_connections(
user_id=user.user_id,
connector_type=connector_type
)
# Check if there are any active connections
active_connections = [conn for conn in connections if conn.is_active]
has_authenticated_connection = len(active_connections) > 0
return JSONResponse({
"connector_type": connector_type,
"authenticated": has_authenticated_connection,
"status": "connected" if has_authenticated_connection else "not_connected",
"connections": [
{
"connection_id": conn.connection_id,
"name": conn.name,
"is_active": conn.is_active,
"created_at": conn.created_at.isoformat(),
"last_sync": conn.last_sync.isoformat() if conn.last_sync else None
}
for conn in connections
]
})
async def connector_webhook(request: Request, connector_service, session_manager):
"""Handle webhook notifications from any connector type"""
connector_type = request.path_params.get("connector_type")
try:
# Get the raw payload and headers
payload = {}
headers = dict(request.headers)
if request.method == "POST":
content_type = headers.get('content-type', '').lower()
if 'application/json' in content_type:
payload = await request.json()
else:
# Some webhooks send form data or plain text
body = await request.body()
payload = {"raw_body": body.decode('utf-8') if body else ""}
else:
# GET webhooks use query params
payload = dict(request.query_params)
# Add headers to payload for connector processing
payload["_headers"] = headers
payload["_method"] = request.method
print(f"[WEBHOOK] {connector_type} notification received")
# Extract channel/subscription ID from headers (Google Drive specific)
channel_id = headers.get('x-goog-channel-id')
if not channel_id:
print(f"[WEBHOOK] No channel ID found in {connector_type} webhook")
return JSONResponse({"status": "ignored", "reason": "no_channel_id"})
# Find the specific connection for this webhook
connection = await connector_service.connection_manager.get_connection_by_webhook_id(channel_id)
if not connection or not connection.is_active:
print(f"[WEBHOOK] Unknown channel {channel_id} - attempting to cancel old subscription")
# Try to cancel this unknown subscription using any active connection of this connector type
try:
all_connections = await connector_service.connection_manager.list_connections(
connector_type=connector_type
)
active_connections = [c for c in all_connections if c.is_active]
if active_connections:
# Use the first active connection to cancel the unknown subscription
connector = await connector_service._get_connector(active_connections[0].connection_id)
if connector:
print(f"[WEBHOOK] Cancelling unknown subscription {channel_id}")
resource_id = headers.get('x-goog-resource-id')
await connector.cleanup_subscription(channel_id, resource_id)
print(f"[WEBHOOK] Successfully cancelled unknown subscription {channel_id}")
except Exception as e:
print(f"[WARNING] Failed to cancel unknown subscription {channel_id}: {e}")
return JSONResponse({"status": "cancelled_unknown", "channel_id": channel_id})
# Process webhook for the specific connection
results = []
try:
# Get the connector instance
connector = await connector_service._get_connector(connection.connection_id)
if not connector:
print(f"[WEBHOOK] Could not get connector for connection {connection.connection_id}")
return JSONResponse({"status": "error", "reason": "connector_not_found"})
# Let the connector handle the webhook and return affected file IDs
affected_files = await connector.handle_webhook(payload)
if affected_files:
print(f"[WEBHOOK] Connection {connection.connection_id}: {len(affected_files)} files affected")
# Trigger incremental sync for affected files
task_id = await connector_service.sync_specific_files(
connection.connection_id,
connection.user_id,
affected_files
)
result = {
"connection_id": connection.connection_id,
"task_id": task_id,
"affected_files": len(affected_files)
}
else:
# No specific files identified - just log the webhook
print(f"[WEBHOOK] Connection {connection.connection_id}: general change detected, no specific files to sync")
result = {
"connection_id": connection.connection_id,
"action": "logged_only",
"reason": "no_specific_files"
}
return JSONResponse({
"status": "processed",
"connector_type": connector_type,
"channel_id": channel_id,
**result
})
except Exception as e:
print(f"[ERROR] Failed to process webhook for connection {connection.connection_id}: {e}")
import traceback
traceback.print_exc()
return JSONResponse({
"status": "error",
"connector_type": connector_type,
"channel_id": channel_id,
"error": str(e)
}, status_code=500)
except Exception as e:
import traceback
print(f"[ERROR] Webhook processing failed: {str(e)}")
traceback.print_exc()
return JSONResponse({"error": f"Webhook processing failed: {str(e)}"}, status_code=500)