Updated all relative imports to use absolute import paths for consistency and improved maintainability. Also replaced bare except clauses with 'except Exception' for better error handling, and made minor code cleanups such as removing unused variables and improving import order. No functional changes were made.
704 lines
27 KiB
Python
704 lines
27 KiB
Python
import logging
|
|
from pathlib import Path
|
|
from typing import List, Dict, Any, Optional
|
|
from urllib.parse import urlparse
|
|
from datetime import datetime
|
|
import httpx
|
|
|
|
from connectors.base import BaseConnector, ConnectorDocument, DocumentACL
|
|
from connectors.sharepoint.oauth import SharePointOAuth
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
class SharePointConnector(BaseConnector):
|
|
"""SharePoint connector using MSAL-based OAuth for authentication"""
|
|
|
|
# Required BaseConnector class attributes
|
|
CLIENT_ID_ENV_VAR = "MICROSOFT_GRAPH_OAUTH_CLIENT_ID"
|
|
CLIENT_SECRET_ENV_VAR = "MICROSOFT_GRAPH_OAUTH_CLIENT_SECRET"
|
|
|
|
# Connector metadata
|
|
CONNECTOR_NAME = "SharePoint"
|
|
CONNECTOR_DESCRIPTION = "Add knowledge from SharePoint"
|
|
CONNECTOR_ICON = "sharepoint"
|
|
|
|
def __init__(self, config: Dict[str, Any]):
|
|
super().__init__(config)
|
|
|
|
logger.debug(
|
|
f"SharePoint connector __init__ called with config type: {type(config)}"
|
|
)
|
|
logger.debug(f"SharePoint connector __init__ config value: {config}")
|
|
|
|
# Ensure we always pass a valid config to the base class
|
|
if config is None:
|
|
logger.debug("Config was None, using empty dict")
|
|
config = {}
|
|
|
|
try:
|
|
logger.debug("Calling super().__init__")
|
|
super().__init__(config) # Now safe to call with empty dict instead of None
|
|
logger.debug("super().__init__ completed successfully")
|
|
except Exception as e:
|
|
logger.error(f"super().__init__ failed: {e}")
|
|
raise
|
|
|
|
# Initialize with defaults that allow the connector to be listed
|
|
self.client_id = None
|
|
self.client_secret = None
|
|
self.tenant_id = config.get("tenant_id", "common")
|
|
self.sharepoint_url = config.get("sharepoint_url")
|
|
self.redirect_uri = config.get("redirect_uri", "http://localhost")
|
|
|
|
# Try to get credentials, but don't fail if they're missing
|
|
try:
|
|
logger.debug("Attempting to get client_id")
|
|
self.client_id = self.get_client_id()
|
|
logger.debug(f"Got client_id: {self.client_id is not None}")
|
|
except Exception as e:
|
|
logger.debug(f"Failed to get client_id: {e}")
|
|
pass # Credentials not available, that's OK for listing
|
|
|
|
try:
|
|
logger.debug("Attempting to get client_secret")
|
|
self.client_secret = self.get_client_secret()
|
|
logger.debug(f"Got client_secret: {self.client_secret is not None}")
|
|
except Exception as e:
|
|
logger.debug(f"Failed to get client_secret: {e}")
|
|
pass # Credentials not available, that's OK for listing
|
|
|
|
# Token file setup
|
|
project_root = Path(__file__).resolve().parent.parent.parent.parent
|
|
token_file = config.get("token_file") or str(
|
|
project_root / "sharepoint_token.json"
|
|
)
|
|
Path(token_file).parent.mkdir(parents=True, exist_ok=True)
|
|
|
|
# Only initialize OAuth if we have credentials
|
|
if self.client_id and self.client_secret:
|
|
connection_id = config.get("connection_id", "default")
|
|
|
|
# Use token_file from config if provided, otherwise generate one
|
|
if config.get("token_file"):
|
|
oauth_token_file = config["token_file"]
|
|
else:
|
|
oauth_token_file = f"sharepoint_token_{connection_id}.json"
|
|
|
|
authority = (
|
|
f"https://login.microsoftonline.com/{self.tenant_id}"
|
|
if self.tenant_id != "common"
|
|
else "https://login.microsoftonline.com/common"
|
|
)
|
|
|
|
self.oauth = SharePointOAuth(
|
|
client_id=self.client_id,
|
|
client_secret=self.client_secret,
|
|
token_file=oauth_token_file,
|
|
authority=authority,
|
|
)
|
|
else:
|
|
self.oauth = None
|
|
|
|
# Track subscription ID for webhooks
|
|
self._subscription_id: Optional[str] = None
|
|
|
|
# Add Graph API defaults similar to Google Drive flags
|
|
self._graph_api_version = "v1.0"
|
|
self._default_params = {
|
|
"$select": "id,name,size,lastModifiedDateTime,createdDateTime,webUrl,file,folder,@microsoft.graph.downloadUrl"
|
|
}
|
|
|
|
# Selective sync support (similar to Google Drive and OneDrive)
|
|
self.cfg = type(
|
|
"SharePointConfig",
|
|
(),
|
|
{
|
|
"file_ids": config.get("file_ids")
|
|
or config.get("selected_files")
|
|
or config.get("selected_file_ids"),
|
|
"folder_ids": config.get("folder_ids")
|
|
or config.get("selected_folders")
|
|
or config.get("selected_folder_ids"),
|
|
},
|
|
)()
|
|
|
|
@property
|
|
def _graph_base_url(self) -> str:
|
|
"""Base URL for Microsoft Graph API calls"""
|
|
return f"https://graph.microsoft.com/{self._graph_api_version}"
|
|
|
|
def emit(self, doc: ConnectorDocument) -> None:
|
|
"""
|
|
Emit a ConnectorDocument instance.
|
|
"""
|
|
logger.debug(f"Emitting SharePoint document: {doc.id} ({doc.filename})")
|
|
|
|
async def authenticate(self) -> bool:
|
|
"""Test authentication - BaseConnector interface"""
|
|
logger.debug(
|
|
f"SharePoint authenticate() called, oauth is None: {self.oauth is None}"
|
|
)
|
|
try:
|
|
if not self.oauth:
|
|
logger.debug("SharePoint authentication failed: OAuth not initialized")
|
|
self._authenticated = False
|
|
return False
|
|
|
|
logger.debug("Loading SharePoint credentials...")
|
|
# Try to load existing credentials first
|
|
load_result = await self.oauth.load_credentials()
|
|
logger.debug(f"Load credentials result: {load_result}")
|
|
|
|
logger.debug("Checking SharePoint authentication status...")
|
|
authenticated = await self.oauth.is_authenticated()
|
|
logger.debug(f"SharePoint is_authenticated result: {authenticated}")
|
|
|
|
self._authenticated = authenticated
|
|
return authenticated
|
|
except Exception as e:
|
|
logger.error(f"SharePoint authentication failed: {e}")
|
|
import traceback
|
|
|
|
traceback.print_exc()
|
|
self._authenticated = False
|
|
return False
|
|
|
|
def get_auth_url(self) -> str:
|
|
"""Get OAuth authorization URL"""
|
|
if not self.oauth:
|
|
raise RuntimeError("SharePoint OAuth not initialized - missing credentials")
|
|
return self.oauth.create_authorization_url(self.redirect_uri)
|
|
|
|
async def handle_oauth_callback(self, auth_code: str) -> Dict[str, Any]:
|
|
"""Handle OAuth callback"""
|
|
if not self.oauth:
|
|
raise RuntimeError("SharePoint OAuth not initialized - missing credentials")
|
|
try:
|
|
success = await self.oauth.handle_authorization_callback(
|
|
auth_code, self.redirect_uri
|
|
)
|
|
if success:
|
|
self._authenticated = True
|
|
return {"status": "success"}
|
|
else:
|
|
raise ValueError("OAuth callback failed")
|
|
except Exception as e:
|
|
logger.error(f"OAuth callback failed: {e}")
|
|
raise
|
|
|
|
def sync_once(self) -> None:
|
|
"""
|
|
Perform a one-shot sync of SharePoint files and emit documents.
|
|
This method mirrors the Google Drive connector's sync_once functionality.
|
|
"""
|
|
import asyncio
|
|
|
|
async def _async_sync():
|
|
try:
|
|
# Get list of files
|
|
file_list = await self.list_files(max_files=1000) # Adjust as needed
|
|
files = file_list.get("files", [])
|
|
|
|
for file_info in files:
|
|
try:
|
|
file_id = file_info.get("id")
|
|
if not file_id:
|
|
continue
|
|
|
|
# Get full document content
|
|
doc = await self.get_file_content(file_id)
|
|
self.emit(doc)
|
|
|
|
except Exception as e:
|
|
logger.error(
|
|
f"Failed to sync SharePoint file {file_info.get('name', 'unknown')}: {e}"
|
|
)
|
|
continue
|
|
|
|
except Exception as e:
|
|
logger.error(f"SharePoint sync_once failed: {e}")
|
|
raise
|
|
|
|
# Run the async sync
|
|
if hasattr(asyncio, "run"):
|
|
asyncio.run(_async_sync())
|
|
else:
|
|
# Python < 3.7 compatibility
|
|
loop = asyncio.get_event_loop()
|
|
loop.run_until_complete(_async_sync())
|
|
|
|
async def setup_subscription(self) -> str:
|
|
"""Set up real-time subscription for file changes - BaseConnector interface"""
|
|
webhook_url = self.config.get("webhook_url")
|
|
if not webhook_url:
|
|
logger.warning(
|
|
"No webhook URL configured, skipping SharePoint subscription setup"
|
|
)
|
|
return "no-webhook-configured"
|
|
|
|
try:
|
|
# Ensure we're authenticated
|
|
if not await self.authenticate():
|
|
raise RuntimeError(
|
|
"SharePoint authentication failed during subscription setup"
|
|
)
|
|
|
|
token = self.oauth.get_access_token()
|
|
|
|
# Microsoft Graph subscription for SharePoint site
|
|
site_info = self._parse_sharepoint_url()
|
|
if site_info:
|
|
resource = f"sites/{site_info['host_name']}:/sites/{site_info['site_name']}:/drive/root"
|
|
else:
|
|
resource = "/me/drive/root"
|
|
|
|
subscription_data = {
|
|
"changeType": "created,updated,deleted",
|
|
"notificationUrl": f"{webhook_url}/webhook/sharepoint",
|
|
"resource": resource,
|
|
"expirationDateTime": self._get_subscription_expiry(),
|
|
"clientState": f"sharepoint_{self.tenant_id}",
|
|
}
|
|
|
|
headers = {
|
|
"Authorization": f"Bearer {token}",
|
|
"Content-Type": "application/json",
|
|
}
|
|
|
|
url = f"{self._graph_base_url}/subscriptions"
|
|
|
|
async with httpx.AsyncClient() as client:
|
|
response = await client.post(
|
|
url, json=subscription_data, headers=headers, timeout=30
|
|
)
|
|
response.raise_for_status()
|
|
|
|
result = response.json()
|
|
subscription_id = result.get("id")
|
|
|
|
if subscription_id:
|
|
self._subscription_id = subscription_id
|
|
logger.info(f"SharePoint subscription created: {subscription_id}")
|
|
return subscription_id
|
|
else:
|
|
raise ValueError("No subscription ID returned from Microsoft Graph")
|
|
|
|
except Exception as e:
|
|
logger.error(f"Failed to setup SharePoint subscription: {e}")
|
|
raise
|
|
|
|
def _get_subscription_expiry(self) -> str:
|
|
"""Get subscription expiry time (max 3 days for Graph API)"""
|
|
from datetime import datetime, timedelta
|
|
|
|
expiry = datetime.utcnow() + timedelta(days=3) # 3 days max for Graph
|
|
return expiry.strftime("%Y-%m-%dT%H:%M:%S.%fZ")
|
|
|
|
def _parse_sharepoint_url(self) -> Optional[Dict[str, str]]:
|
|
"""Parse SharePoint URL to extract site information for Graph API"""
|
|
if not self.sharepoint_url:
|
|
return None
|
|
|
|
try:
|
|
parsed = urlparse(self.sharepoint_url)
|
|
# Extract hostname and site name from URL like: https://contoso.sharepoint.com/sites/teamsite
|
|
host_name = parsed.netloc
|
|
path_parts = parsed.path.strip("/").split("/")
|
|
|
|
if len(path_parts) >= 2 and path_parts[0] == "sites":
|
|
site_name = path_parts[1]
|
|
return {"host_name": host_name, "site_name": site_name}
|
|
except Exception as e:
|
|
logger.warning(f"Could not parse SharePoint URL {self.sharepoint_url}: {e}")
|
|
|
|
return None
|
|
|
|
async def list_files(
|
|
self,
|
|
page_token: Optional[str] = None,
|
|
max_files: Optional[int] = None,
|
|
**kwargs,
|
|
) -> Dict[str, Any]:
|
|
"""List all files using Microsoft Graph API - BaseConnector interface"""
|
|
try:
|
|
# Ensure authentication
|
|
if not await self.authenticate():
|
|
raise RuntimeError(
|
|
"SharePoint authentication failed during file listing"
|
|
)
|
|
|
|
# If file_ids or folder_ids are specified in config, use selective sync
|
|
if self.cfg.file_ids or self.cfg.folder_ids:
|
|
return await self._list_selected_files()
|
|
|
|
files = []
|
|
max_files_value = max_files if max_files is not None else 100
|
|
|
|
# Build Graph API URL for the site or fallback to user's OneDrive
|
|
site_info = self._parse_sharepoint_url()
|
|
if site_info:
|
|
base_url = f"{self._graph_base_url}/sites/{site_info['host_name']}:/sites/{site_info['site_name']}:/drive/root/children"
|
|
else:
|
|
base_url = f"{self._graph_base_url}/me/drive/root/children"
|
|
|
|
params = dict(self._default_params)
|
|
params["$top"] = str(max_files_value)
|
|
|
|
if page_token:
|
|
params["$skiptoken"] = page_token
|
|
|
|
response = await self._make_graph_request(base_url, params=params)
|
|
data = response.json()
|
|
|
|
items = data.get("value", [])
|
|
for item in items:
|
|
# Only include files, not folders
|
|
if item.get("file"):
|
|
files.append(
|
|
{
|
|
"id": item.get("id", ""),
|
|
"name": item.get("name", ""),
|
|
"path": f"/drive/items/{item.get('id')}",
|
|
"size": int(item.get("size", 0)),
|
|
"modified": item.get("lastModifiedDateTime"),
|
|
"created": item.get("createdDateTime"),
|
|
"mime_type": item.get("file", {}).get(
|
|
"mimeType", self._get_mime_type(item.get("name", ""))
|
|
),
|
|
"url": item.get("webUrl", ""),
|
|
"download_url": item.get("@microsoft.graph.downloadUrl"),
|
|
}
|
|
)
|
|
|
|
# Check for next page
|
|
next_page_token = None
|
|
next_link = data.get("@odata.nextLink")
|
|
if next_link:
|
|
from urllib.parse import urlparse, parse_qs
|
|
|
|
parsed = urlparse(next_link)
|
|
query_params = parse_qs(parsed.query)
|
|
if "$skiptoken" in query_params:
|
|
next_page_token = query_params["$skiptoken"][0]
|
|
|
|
return {"files": files, "next_page_token": next_page_token}
|
|
|
|
except Exception as e:
|
|
logger.error(f"Failed to list SharePoint files: {e}")
|
|
return {
|
|
"files": [],
|
|
"next_page_token": None,
|
|
} # Return empty result instead of raising
|
|
|
|
async def get_file_content(self, file_id: str) -> ConnectorDocument:
|
|
"""Get file content and metadata - BaseConnector interface"""
|
|
try:
|
|
# Ensure authentication
|
|
if not await self.authenticate():
|
|
raise RuntimeError(
|
|
"SharePoint authentication failed during file content retrieval"
|
|
)
|
|
|
|
# First get file metadata using Graph API
|
|
file_metadata = await self._get_file_metadata_by_id(file_id)
|
|
|
|
if not file_metadata:
|
|
raise ValueError(f"File not found: {file_id}")
|
|
|
|
# Download file content
|
|
download_url = file_metadata.get("download_url")
|
|
if download_url:
|
|
content = await self._download_file_from_url(download_url)
|
|
else:
|
|
content = await self._download_file_content(file_id)
|
|
|
|
# Create ACL from metadata
|
|
acl = DocumentACL(
|
|
owner="", # Graph API requires additional calls for detailed permissions
|
|
user_permissions={},
|
|
group_permissions={},
|
|
)
|
|
|
|
# Parse dates
|
|
modified_time = self._parse_graph_date(file_metadata.get("modified"))
|
|
created_time = self._parse_graph_date(file_metadata.get("created"))
|
|
|
|
return ConnectorDocument(
|
|
id=file_id,
|
|
filename=file_metadata.get("name", ""),
|
|
mimetype=file_metadata.get("mime_type", "application/octet-stream"),
|
|
content=content,
|
|
source_url=file_metadata.get("url", ""),
|
|
acl=acl,
|
|
modified_time=modified_time,
|
|
created_time=created_time,
|
|
metadata={
|
|
"sharepoint_path": file_metadata.get("path", ""),
|
|
"sharepoint_url": self.sharepoint_url,
|
|
"size": file_metadata.get("size", 0),
|
|
},
|
|
)
|
|
|
|
except Exception as e:
|
|
logger.error(f"Failed to get SharePoint file content {file_id}: {e}")
|
|
raise
|
|
|
|
async def _get_file_metadata_by_id(self, file_id: str) -> Optional[Dict[str, Any]]:
|
|
"""Get file metadata by ID using Graph API"""
|
|
try:
|
|
# Try site-specific path first, then fallback to user drive
|
|
site_info = self._parse_sharepoint_url()
|
|
if site_info:
|
|
url = f"{self._graph_base_url}/sites/{site_info['host_name']}:/sites/{site_info['site_name']}:/drive/items/{file_id}"
|
|
else:
|
|
url = f"{self._graph_base_url}/me/drive/items/{file_id}"
|
|
|
|
params = dict(self._default_params)
|
|
|
|
response = await self._make_graph_request(url, params=params)
|
|
item = response.json()
|
|
|
|
if item.get("file"):
|
|
return {
|
|
"id": file_id,
|
|
"name": item.get("name", ""),
|
|
"path": f"/drive/items/{file_id}",
|
|
"size": int(item.get("size", 0)),
|
|
"modified": item.get("lastModifiedDateTime"),
|
|
"created": item.get("createdDateTime"),
|
|
"mime_type": item.get("file", {}).get(
|
|
"mimeType", self._get_mime_type(item.get("name", ""))
|
|
),
|
|
"url": item.get("webUrl", ""),
|
|
"download_url": item.get("@microsoft.graph.downloadUrl"),
|
|
}
|
|
|
|
# Check if it's a folder
|
|
if item.get("folder"):
|
|
return {
|
|
"id": file_id,
|
|
"name": item.get("name", ""),
|
|
"isFolder": True,
|
|
}
|
|
|
|
return None
|
|
|
|
except Exception as e:
|
|
logger.error(f"Failed to get file metadata for {file_id}: {e}")
|
|
return None
|
|
|
|
async def _download_file_content(self, file_id: str) -> bytes:
|
|
"""Download file content by file ID using Graph API"""
|
|
try:
|
|
site_info = self._parse_sharepoint_url()
|
|
if site_info:
|
|
url = f"{self._graph_base_url}/sites/{site_info['host_name']}:/sites/{site_info['site_name']}:/drive/items/{file_id}/content"
|
|
else:
|
|
url = f"{self._graph_base_url}/me/drive/items/{file_id}/content"
|
|
|
|
token = self.oauth.get_access_token()
|
|
headers = {"Authorization": f"Bearer {token}"}
|
|
|
|
async with httpx.AsyncClient() as client:
|
|
response = await client.get(url, headers=headers, timeout=60)
|
|
response.raise_for_status()
|
|
return response.content
|
|
|
|
except Exception as e:
|
|
logger.error(f"Failed to download file content for {file_id}: {e}")
|
|
raise
|
|
|
|
async def _list_selected_files(self) -> Dict[str, Any]:
|
|
"""List only selected files/folders (selective sync)."""
|
|
files: List[Dict[str, Any]] = []
|
|
|
|
# Process selected file IDs
|
|
if self.cfg.file_ids:
|
|
for file_id in self.cfg.file_ids:
|
|
try:
|
|
file_meta = await self._get_file_metadata_by_id(file_id)
|
|
if file_meta and not file_meta.get("isFolder", False):
|
|
files.append(file_meta)
|
|
elif file_meta and file_meta.get("isFolder", False):
|
|
# If it's a folder, expand its contents
|
|
folder_files = await self._list_folder_contents(file_id)
|
|
files.extend(folder_files)
|
|
except Exception as e:
|
|
logger.warning(f"Failed to get file {file_id}: {e}")
|
|
continue
|
|
|
|
# Process selected folder IDs
|
|
if self.cfg.folder_ids:
|
|
for folder_id in self.cfg.folder_ids:
|
|
try:
|
|
folder_files = await self._list_folder_contents(folder_id)
|
|
files.extend(folder_files)
|
|
except Exception as e:
|
|
logger.warning(f"Failed to list folder {folder_id}: {e}")
|
|
continue
|
|
|
|
return {"files": files, "next_page_token": None}
|
|
|
|
async def _list_folder_contents(self, folder_id: str) -> List[Dict[str, Any]]:
|
|
"""List all files in a folder recursively."""
|
|
files: List[Dict[str, Any]] = []
|
|
|
|
try:
|
|
site_info = self._parse_sharepoint_url()
|
|
if site_info:
|
|
url = f"{self._graph_base_url}/sites/{site_info['host_name']}:/sites/{site_info['site_name']}:/drive/items/{folder_id}/children"
|
|
else:
|
|
url = f"{self._graph_base_url}/me/drive/items/{folder_id}/children"
|
|
|
|
params = dict(self._default_params)
|
|
|
|
response = await self._make_graph_request(url, params=params)
|
|
data = response.json()
|
|
|
|
items = data.get("value", [])
|
|
for item in items:
|
|
if item.get("file"): # It's a file
|
|
file_meta = await self._get_file_metadata_by_id(item.get("id"))
|
|
if file_meta:
|
|
files.append(file_meta)
|
|
elif item.get("folder"): # It's a subfolder, recurse
|
|
subfolder_files = await self._list_folder_contents(item.get("id"))
|
|
files.extend(subfolder_files)
|
|
except Exception as e:
|
|
logger.error(f"Failed to list folder contents for {folder_id}: {e}")
|
|
|
|
return files
|
|
|
|
async def _download_file_from_url(self, download_url: str) -> bytes:
|
|
"""Download file content from direct download URL"""
|
|
try:
|
|
async with httpx.AsyncClient() as client:
|
|
response = await client.get(download_url, timeout=60)
|
|
response.raise_for_status()
|
|
return response.content
|
|
except Exception as e:
|
|
logger.error(f"Failed to download from URL {download_url}: {e}")
|
|
raise
|
|
|
|
def _parse_graph_date(self, date_str: Optional[str]) -> datetime:
|
|
"""Parse Microsoft Graph date string to datetime"""
|
|
if not date_str:
|
|
return datetime.now()
|
|
|
|
try:
|
|
if date_str.endswith("Z"):
|
|
return datetime.fromisoformat(date_str[:-1]).replace(tzinfo=None)
|
|
else:
|
|
return datetime.fromisoformat(date_str.replace("T", " "))
|
|
except (ValueError, AttributeError):
|
|
return datetime.now()
|
|
|
|
async def _make_graph_request(
|
|
self,
|
|
url: str,
|
|
method: str = "GET",
|
|
data: Optional[Dict] = None,
|
|
params: Optional[Dict] = None,
|
|
) -> httpx.Response:
|
|
"""Make authenticated API request to Microsoft Graph"""
|
|
token = self.oauth.get_access_token()
|
|
headers = {
|
|
"Authorization": f"Bearer {token}",
|
|
"Content-Type": "application/json",
|
|
}
|
|
|
|
async with httpx.AsyncClient() as client:
|
|
if method.upper() == "GET":
|
|
response = await client.get(
|
|
url, headers=headers, params=params, timeout=30
|
|
)
|
|
elif method.upper() == "POST":
|
|
response = await client.post(
|
|
url, headers=headers, json=data, timeout=30
|
|
)
|
|
elif method.upper() == "DELETE":
|
|
response = await client.delete(url, headers=headers, timeout=30)
|
|
else:
|
|
raise ValueError(f"Unsupported HTTP method: {method}")
|
|
|
|
response.raise_for_status()
|
|
return response
|
|
|
|
def _get_mime_type(self, filename: str) -> str:
|
|
"""Get MIME type based on file extension"""
|
|
import mimetypes
|
|
|
|
mime_type, _ = mimetypes.guess_type(filename)
|
|
return mime_type or "application/octet-stream"
|
|
|
|
# Webhook methods - BaseConnector interface
|
|
def handle_webhook_validation(
|
|
self, request_method: str, headers: Dict[str, str], query_params: Dict[str, str]
|
|
) -> Optional[str]:
|
|
"""Handle webhook validation (Graph API specific)"""
|
|
if request_method == "POST" and "validationToken" in query_params:
|
|
return query_params["validationToken"]
|
|
return None
|
|
|
|
def extract_webhook_channel_id(
|
|
self, payload: Dict[str, Any], headers: Dict[str, str]
|
|
) -> Optional[str]:
|
|
"""Extract channel/subscription ID from webhook payload"""
|
|
notifications = payload.get("value", [])
|
|
if notifications:
|
|
return notifications[0].get("subscriptionId")
|
|
return None
|
|
|
|
async def handle_webhook(self, payload: Dict[str, Any]) -> List[str]:
|
|
"""Handle webhook notification and return affected file IDs"""
|
|
affected_files = []
|
|
|
|
# Process Microsoft Graph webhook payload
|
|
notifications = payload.get("value", [])
|
|
for notification in notifications:
|
|
resource = notification.get("resource")
|
|
if resource and "/drive/items/" in resource:
|
|
file_id = resource.split("/drive/items/")[-1]
|
|
affected_files.append(file_id)
|
|
|
|
return affected_files
|
|
|
|
async def cleanup_subscription(self, subscription_id: str) -> bool:
|
|
"""Clean up subscription - BaseConnector interface"""
|
|
if subscription_id == "no-webhook-configured":
|
|
logger.info("No subscription to cleanup (webhook was not configured)")
|
|
return True
|
|
|
|
try:
|
|
# Ensure authentication
|
|
if not await self.authenticate():
|
|
logger.error(
|
|
"SharePoint authentication failed during subscription cleanup"
|
|
)
|
|
return False
|
|
|
|
token = self.oauth.get_access_token()
|
|
headers = {"Authorization": f"Bearer {token}"}
|
|
|
|
url = f"{self._graph_base_url}/subscriptions/{subscription_id}"
|
|
|
|
async with httpx.AsyncClient() as client:
|
|
response = await client.delete(url, headers=headers, timeout=30)
|
|
|
|
if response.status_code in [200, 204, 404]:
|
|
logger.info(
|
|
f"SharePoint subscription {subscription_id} cleaned up successfully"
|
|
)
|
|
return True
|
|
else:
|
|
logger.warning(
|
|
f"Unexpected response cleaning up subscription: {response.status_code}"
|
|
)
|
|
return False
|
|
|
|
except Exception as e:
|
|
logger.error(
|
|
f"Failed to cleanup SharePoint subscription {subscription_id}: {e}"
|
|
)
|
|
return False
|