645 lines
27 KiB
Python
645 lines
27 KiB
Python
import logging
|
|
from pathlib import Path
|
|
from typing import List, Dict, Any, Optional
|
|
from urllib.parse import urlparse
|
|
from datetime import datetime
|
|
import httpx
|
|
|
|
from ..base import BaseConnector, ConnectorDocument, DocumentACL
|
|
from .oauth import SharePointOAuth
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
class SharePointConnector(BaseConnector):
|
|
"""SharePoint connector using MSAL-based OAuth for authentication"""
|
|
|
|
# Required BaseConnector class attributes
|
|
CLIENT_ID_ENV_VAR = "MICROSOFT_GRAPH_OAUTH_CLIENT_ID"
|
|
CLIENT_SECRET_ENV_VAR = "MICROSOFT_GRAPH_OAUTH_CLIENT_SECRET" # pragma: allowlist secret
|
|
|
|
# Connector metadata
|
|
CONNECTOR_NAME = "SharePoint"
|
|
CONNECTOR_DESCRIPTION = "Add knowledge from SharePoint"
|
|
CONNECTOR_ICON = "sharepoint"
|
|
|
|
def __init__(self, config: Dict[str, Any]):
|
|
super().__init__(config)
|
|
|
|
logger.debug(f"SharePoint connector __init__ called with config type: {type(config)}")
|
|
logger.debug(f"SharePoint connector __init__ config value: {config}")
|
|
|
|
# Ensure we always pass a valid config to the base class
|
|
if config is None:
|
|
logger.debug("Config was None, using empty dict")
|
|
config = {}
|
|
|
|
try:
|
|
logger.debug("Calling super().__init__")
|
|
super().__init__(config) # Now safe to call with empty dict instead of None
|
|
logger.debug("super().__init__ completed successfully")
|
|
except Exception as e:
|
|
logger.error(f"super().__init__ failed: {e}")
|
|
raise
|
|
|
|
# Initialize with defaults that allow the connector to be listed
|
|
self.client_id = None
|
|
self.client_secret = None
|
|
self.tenant_id = config.get("tenant_id", "common")
|
|
self.sharepoint_url = config.get("sharepoint_url")
|
|
self.redirect_uri = config.get("redirect_uri", "http://localhost")
|
|
|
|
# Try to get credentials, but don't fail if they're missing
|
|
try:
|
|
logger.debug("Attempting to get client_id")
|
|
self.client_id = self.get_client_id()
|
|
logger.debug(f"Got client_id: {self.client_id is not None}")
|
|
except Exception as e:
|
|
logger.debug(f"Failed to get client_id: {e}")
|
|
pass # Credentials not available, that's OK for listing
|
|
|
|
try:
|
|
logger.debug("Attempting to get client_secret")
|
|
self.client_secret = self.get_client_secret()
|
|
logger.debug(f"Got client_secret: {self.client_secret is not None}")
|
|
except Exception as e:
|
|
logger.debug(f"Failed to get client_secret: {e}")
|
|
pass # Credentials not available, that's OK for listing
|
|
|
|
# Token file setup - use data/ directory for persistence
|
|
token_file = config.get("token_file") or "data/sharepoint_token.json"
|
|
Path(token_file).parent.mkdir(parents=True, exist_ok=True)
|
|
|
|
# Only initialize OAuth if we have credentials
|
|
if self.client_id and self.client_secret:
|
|
connection_id = config.get("connection_id", "default")
|
|
|
|
# Use token_file from config if provided, otherwise generate one
|
|
if config.get("token_file"):
|
|
oauth_token_file = config["token_file"]
|
|
else:
|
|
oauth_token_file = f"data/sharepoint_token_{connection_id}.json"
|
|
|
|
authority = f"https://login.microsoftonline.com/{self.tenant_id}" if self.tenant_id != "common" else "https://login.microsoftonline.com/common"
|
|
|
|
self.oauth = SharePointOAuth(
|
|
client_id=self.client_id,
|
|
client_secret=self.client_secret,
|
|
token_file=oauth_token_file,
|
|
authority=authority
|
|
)
|
|
else:
|
|
self.oauth = None
|
|
|
|
# Track subscription ID for webhooks
|
|
self._subscription_id: Optional[str] = None
|
|
|
|
# Add Graph API defaults similar to Google Drive flags
|
|
self._graph_api_version = "v1.0"
|
|
self._default_params = {
|
|
"$select": "id,name,size,lastModifiedDateTime,createdDateTime,webUrl,file,folder,@microsoft.graph.downloadUrl"
|
|
}
|
|
|
|
# Selective sync support (similar to Google Drive and OneDrive)
|
|
self.cfg = type('SharePointConfig', (), {
|
|
'file_ids': config.get('file_ids') or config.get('selected_files') or config.get('selected_file_ids'),
|
|
'folder_ids': config.get('folder_ids') or config.get('selected_folders') or config.get('selected_folder_ids'),
|
|
})()
|
|
|
|
@property
|
|
def _graph_base_url(self) -> str:
|
|
"""Base URL for Microsoft Graph API calls"""
|
|
return f"https://graph.microsoft.com/{self._graph_api_version}"
|
|
|
|
def emit(self, doc: ConnectorDocument) -> None:
|
|
"""
|
|
Emit a ConnectorDocument instance.
|
|
"""
|
|
logger.debug(f"Emitting SharePoint document: {doc.id} ({doc.filename})")
|
|
|
|
async def authenticate(self) -> bool:
|
|
"""Test authentication - BaseConnector interface"""
|
|
logger.debug(f"SharePoint authenticate() called, oauth is None: {self.oauth is None}")
|
|
try:
|
|
if not self.oauth:
|
|
logger.debug("SharePoint authentication failed: OAuth not initialized")
|
|
self._authenticated = False
|
|
return False
|
|
|
|
logger.debug("Loading SharePoint credentials...")
|
|
# Try to load existing credentials first
|
|
load_result = await self.oauth.load_credentials()
|
|
logger.debug(f"Load credentials result: {load_result}")
|
|
|
|
logger.debug("Checking SharePoint authentication status...")
|
|
authenticated = await self.oauth.is_authenticated()
|
|
logger.debug(f"SharePoint is_authenticated result: {authenticated}")
|
|
|
|
self._authenticated = authenticated
|
|
return authenticated
|
|
except Exception as e:
|
|
logger.error(f"SharePoint authentication failed: {e}")
|
|
import traceback
|
|
traceback.print_exc()
|
|
self._authenticated = False
|
|
return False
|
|
|
|
def get_auth_url(self) -> str:
|
|
"""Get OAuth authorization URL"""
|
|
if not self.oauth:
|
|
raise RuntimeError("SharePoint OAuth not initialized - missing credentials")
|
|
return self.oauth.create_authorization_url(self.redirect_uri)
|
|
|
|
async def handle_oauth_callback(self, auth_code: str) -> Dict[str, Any]:
|
|
"""Handle OAuth callback"""
|
|
if not self.oauth:
|
|
raise RuntimeError("SharePoint OAuth not initialized - missing credentials")
|
|
try:
|
|
success = await self.oauth.handle_authorization_callback(auth_code, self.redirect_uri)
|
|
if success:
|
|
self._authenticated = True
|
|
return {"status": "success"}
|
|
else:
|
|
raise ValueError("OAuth callback failed")
|
|
except Exception as e:
|
|
logger.error(f"OAuth callback failed: {e}")
|
|
raise
|
|
|
|
def sync_once(self) -> None:
|
|
"""
|
|
Perform a one-shot sync of SharePoint files and emit documents.
|
|
This method mirrors the Google Drive connector's sync_once functionality.
|
|
"""
|
|
import asyncio
|
|
|
|
async def _async_sync():
|
|
try:
|
|
# Get list of files
|
|
file_list = await self.list_files(max_files=1000) # Adjust as needed
|
|
files = file_list.get("files", [])
|
|
|
|
for file_info in files:
|
|
try:
|
|
file_id = file_info.get("id")
|
|
if not file_id:
|
|
continue
|
|
|
|
# Get full document content
|
|
doc = await self.get_file_content(file_id)
|
|
self.emit(doc)
|
|
|
|
except Exception as e:
|
|
logger.error(f"Failed to sync SharePoint file {file_info.get('name', 'unknown')}: {e}")
|
|
continue
|
|
|
|
except Exception as e:
|
|
logger.error(f"SharePoint sync_once failed: {e}")
|
|
raise
|
|
|
|
# Run the async sync
|
|
if hasattr(asyncio, 'run'):
|
|
asyncio.run(_async_sync())
|
|
else:
|
|
# Python < 3.7 compatibility
|
|
loop = asyncio.get_event_loop()
|
|
loop.run_until_complete(_async_sync())
|
|
|
|
async def setup_subscription(self) -> str:
|
|
"""Set up real-time subscription for file changes - BaseConnector interface"""
|
|
webhook_url = self.config.get('webhook_url')
|
|
if not webhook_url:
|
|
logger.warning("No webhook URL configured, skipping SharePoint subscription setup")
|
|
return "no-webhook-configured"
|
|
|
|
try:
|
|
# Ensure we're authenticated
|
|
if not await self.authenticate():
|
|
raise RuntimeError("SharePoint authentication failed during subscription setup")
|
|
|
|
token = self.oauth.get_access_token()
|
|
|
|
# Microsoft Graph subscription for SharePoint site
|
|
site_info = self._parse_sharepoint_url()
|
|
if site_info:
|
|
resource = f"sites/{site_info['host_name']}:/sites/{site_info['site_name']}:/drive/root"
|
|
else:
|
|
resource = "/me/drive/root"
|
|
|
|
subscription_data = {
|
|
"changeType": "created,updated,deleted",
|
|
"notificationUrl": f"{webhook_url}/webhook/sharepoint",
|
|
"resource": resource,
|
|
"expirationDateTime": self._get_subscription_expiry(),
|
|
"clientState": f"sharepoint_{self.tenant_id}"
|
|
}
|
|
|
|
headers = {
|
|
"Authorization": f"Bearer {token}",
|
|
"Content-Type": "application/json"
|
|
}
|
|
|
|
url = f"{self._graph_base_url}/subscriptions"
|
|
|
|
async with httpx.AsyncClient() as client:
|
|
response = await client.post(url, json=subscription_data, headers=headers, timeout=30)
|
|
response.raise_for_status()
|
|
|
|
result = response.json()
|
|
subscription_id = result.get("id")
|
|
|
|
if subscription_id:
|
|
self._subscription_id = subscription_id
|
|
logger.info(f"SharePoint subscription created: {subscription_id}")
|
|
return subscription_id
|
|
else:
|
|
raise ValueError("No subscription ID returned from Microsoft Graph")
|
|
|
|
except Exception as e:
|
|
logger.error(f"Failed to setup SharePoint subscription: {e}")
|
|
raise
|
|
|
|
def _get_subscription_expiry(self) -> str:
|
|
"""Get subscription expiry time (max 3 days for Graph API)"""
|
|
from datetime import datetime, timedelta
|
|
expiry = datetime.utcnow() + timedelta(days=3) # 3 days max for Graph
|
|
return expiry.strftime("%Y-%m-%dT%H:%M:%S.%fZ")
|
|
|
|
def _parse_sharepoint_url(self) -> Optional[Dict[str, str]]:
|
|
"""Parse SharePoint URL to extract site information for Graph API"""
|
|
if not self.sharepoint_url:
|
|
return None
|
|
|
|
try:
|
|
parsed = urlparse(self.sharepoint_url)
|
|
# Extract hostname and site name from URL like: https://contoso.sharepoint.com/sites/teamsite
|
|
host_name = parsed.netloc
|
|
path_parts = parsed.path.strip('/').split('/')
|
|
|
|
if len(path_parts) >= 2 and path_parts[0] == 'sites':
|
|
site_name = path_parts[1]
|
|
return {
|
|
"host_name": host_name,
|
|
"site_name": site_name
|
|
}
|
|
except Exception as e:
|
|
logger.warning(f"Could not parse SharePoint URL {self.sharepoint_url}: {e}")
|
|
|
|
return None
|
|
|
|
async def list_files(
|
|
self,
|
|
page_token: Optional[str] = None,
|
|
max_files: Optional[int] = None,
|
|
**kwargs
|
|
) -> Dict[str, Any]:
|
|
"""List all files using Microsoft Graph API - BaseConnector interface"""
|
|
try:
|
|
# Ensure authentication
|
|
if not await self.authenticate():
|
|
raise RuntimeError("SharePoint authentication failed during file listing")
|
|
|
|
# If file_ids or folder_ids are specified in config, use selective sync
|
|
if self.cfg.file_ids or self.cfg.folder_ids:
|
|
return await self._list_selected_files()
|
|
|
|
files = []
|
|
max_files_value = max_files if max_files is not None else 100
|
|
|
|
# Build Graph API URL for the site or fallback to user's OneDrive
|
|
site_info = self._parse_sharepoint_url()
|
|
if site_info:
|
|
base_url = f"{self._graph_base_url}/sites/{site_info['host_name']}:/sites/{site_info['site_name']}:/drive/root/children"
|
|
else:
|
|
base_url = f"{self._graph_base_url}/me/drive/root/children"
|
|
|
|
params = dict(self._default_params)
|
|
params["$top"] = str(max_files_value)
|
|
|
|
if page_token:
|
|
params["$skiptoken"] = page_token
|
|
|
|
response = await self._make_graph_request(base_url, params=params)
|
|
data = response.json()
|
|
|
|
items = data.get("value", [])
|
|
for item in items:
|
|
# Only include files, not folders
|
|
if item.get("file"):
|
|
files.append({
|
|
"id": item.get("id", ""),
|
|
"name": item.get("name", ""),
|
|
"path": f"/drive/items/{item.get('id')}",
|
|
"size": int(item.get("size", 0)),
|
|
"modified": item.get("lastModifiedDateTime"),
|
|
"created": item.get("createdDateTime"),
|
|
"mime_type": item.get("file", {}).get("mimeType", self._get_mime_type(item.get("name", ""))),
|
|
"url": item.get("webUrl", ""),
|
|
"download_url": item.get("@microsoft.graph.downloadUrl")
|
|
})
|
|
|
|
# Check for next page
|
|
next_page_token = None
|
|
next_link = data.get("@odata.nextLink")
|
|
if next_link:
|
|
from urllib.parse import urlparse, parse_qs
|
|
parsed = urlparse(next_link)
|
|
query_params = parse_qs(parsed.query)
|
|
if "$skiptoken" in query_params:
|
|
next_page_token = query_params["$skiptoken"][0]
|
|
|
|
return {
|
|
"files": files,
|
|
"next_page_token": next_page_token
|
|
}
|
|
|
|
except Exception as e:
|
|
logger.error(f"Failed to list SharePoint files: {e}")
|
|
return {"files": [], "next_page_token": None} # Return empty result instead of raising
|
|
|
|
async def get_file_content(self, file_id: str) -> ConnectorDocument:
|
|
"""Get file content and metadata - BaseConnector interface"""
|
|
try:
|
|
# Ensure authentication
|
|
if not await self.authenticate():
|
|
raise RuntimeError("SharePoint authentication failed during file content retrieval")
|
|
|
|
# First get file metadata using Graph API
|
|
file_metadata = await self._get_file_metadata_by_id(file_id)
|
|
|
|
if not file_metadata:
|
|
raise ValueError(f"File not found: {file_id}")
|
|
|
|
# Download file content
|
|
download_url = file_metadata.get("download_url")
|
|
if download_url:
|
|
content = await self._download_file_from_url(download_url)
|
|
else:
|
|
content = await self._download_file_content(file_id)
|
|
|
|
# Create ACL from metadata
|
|
acl = DocumentACL(
|
|
owner="", # Graph API requires additional calls for detailed permissions
|
|
user_permissions={},
|
|
group_permissions={}
|
|
)
|
|
|
|
# Parse dates
|
|
modified_time = self._parse_graph_date(file_metadata.get("modified"))
|
|
created_time = self._parse_graph_date(file_metadata.get("created"))
|
|
|
|
return ConnectorDocument(
|
|
id=file_id,
|
|
filename=file_metadata.get("name", ""),
|
|
mimetype=file_metadata.get("mime_type", "application/octet-stream"),
|
|
content=content,
|
|
source_url=file_metadata.get("url", ""),
|
|
acl=acl,
|
|
modified_time=modified_time,
|
|
created_time=created_time,
|
|
metadata={
|
|
"sharepoint_path": file_metadata.get("path", ""),
|
|
"sharepoint_url": self.sharepoint_url,
|
|
"size": file_metadata.get("size", 0)
|
|
}
|
|
)
|
|
|
|
except Exception as e:
|
|
logger.error(f"Failed to get SharePoint file content {file_id}: {e}")
|
|
raise
|
|
|
|
async def _get_file_metadata_by_id(self, file_id: str) -> Optional[Dict[str, Any]]:
|
|
"""Get file metadata by ID using Graph API"""
|
|
try:
|
|
# Try site-specific path first, then fallback to user drive
|
|
site_info = self._parse_sharepoint_url()
|
|
if site_info:
|
|
url = f"{self._graph_base_url}/sites/{site_info['host_name']}:/sites/{site_info['site_name']}:/drive/items/{file_id}"
|
|
else:
|
|
url = f"{self._graph_base_url}/me/drive/items/{file_id}"
|
|
|
|
params = dict(self._default_params)
|
|
|
|
response = await self._make_graph_request(url, params=params)
|
|
item = response.json()
|
|
|
|
if item.get("file"):
|
|
return {
|
|
"id": file_id,
|
|
"name": item.get("name", ""),
|
|
"path": f"/drive/items/{file_id}",
|
|
"size": int(item.get("size", 0)),
|
|
"modified": item.get("lastModifiedDateTime"),
|
|
"created": item.get("createdDateTime"),
|
|
"mime_type": item.get("file", {}).get("mimeType", self._get_mime_type(item.get("name", ""))),
|
|
"url": item.get("webUrl", ""),
|
|
"download_url": item.get("@microsoft.graph.downloadUrl")
|
|
}
|
|
|
|
# Check if it's a folder
|
|
if item.get("folder"):
|
|
return {
|
|
"id": file_id,
|
|
"name": item.get("name", ""),
|
|
"isFolder": True,
|
|
}
|
|
|
|
return None
|
|
|
|
except Exception as e:
|
|
logger.error(f"Failed to get file metadata for {file_id}: {e}")
|
|
return None
|
|
|
|
async def _download_file_content(self, file_id: str) -> bytes:
|
|
"""Download file content by file ID using Graph API"""
|
|
try:
|
|
site_info = self._parse_sharepoint_url()
|
|
if site_info:
|
|
url = f"{self._graph_base_url}/sites/{site_info['host_name']}:/sites/{site_info['site_name']}:/drive/items/{file_id}/content"
|
|
else:
|
|
url = f"{self._graph_base_url}/me/drive/items/{file_id}/content"
|
|
|
|
token = self.oauth.get_access_token()
|
|
headers = {"Authorization": f"Bearer {token}"}
|
|
|
|
async with httpx.AsyncClient() as client:
|
|
response = await client.get(url, headers=headers, timeout=60)
|
|
response.raise_for_status()
|
|
return response.content
|
|
|
|
except Exception as e:
|
|
logger.error(f"Failed to download file content for {file_id}: {e}")
|
|
raise
|
|
|
|
async def _list_selected_files(self) -> Dict[str, Any]:
|
|
"""List only selected files/folders (selective sync)."""
|
|
files: List[Dict[str, Any]] = []
|
|
|
|
# Process selected file IDs
|
|
if self.cfg.file_ids:
|
|
for file_id in self.cfg.file_ids:
|
|
try:
|
|
file_meta = await self._get_file_metadata_by_id(file_id)
|
|
if file_meta and not file_meta.get('isFolder', False):
|
|
files.append(file_meta)
|
|
elif file_meta and file_meta.get('isFolder', False):
|
|
# If it's a folder, expand its contents
|
|
folder_files = await self._list_folder_contents(file_id)
|
|
files.extend(folder_files)
|
|
except Exception as e:
|
|
logger.warning(f"Failed to get file {file_id}: {e}")
|
|
continue
|
|
|
|
# Process selected folder IDs
|
|
if self.cfg.folder_ids:
|
|
for folder_id in self.cfg.folder_ids:
|
|
try:
|
|
folder_files = await self._list_folder_contents(folder_id)
|
|
files.extend(folder_files)
|
|
except Exception as e:
|
|
logger.warning(f"Failed to list folder {folder_id}: {e}")
|
|
continue
|
|
|
|
return {"files": files, "next_page_token": None}
|
|
|
|
async def _list_folder_contents(self, folder_id: str) -> List[Dict[str, Any]]:
|
|
"""List all files in a folder recursively."""
|
|
files: List[Dict[str, Any]] = []
|
|
|
|
try:
|
|
site_info = self._parse_sharepoint_url()
|
|
if site_info:
|
|
url = f"{self._graph_base_url}/sites/{site_info['host_name']}:/sites/{site_info['site_name']}:/drive/items/{folder_id}/children"
|
|
else:
|
|
url = f"{self._graph_base_url}/me/drive/items/{folder_id}/children"
|
|
|
|
params = dict(self._default_params)
|
|
|
|
response = await self._make_graph_request(url, params=params)
|
|
data = response.json()
|
|
|
|
items = data.get("value", [])
|
|
for item in items:
|
|
if item.get("file"): # It's a file
|
|
file_meta = await self._get_file_metadata_by_id(item.get("id"))
|
|
if file_meta:
|
|
files.append(file_meta)
|
|
elif item.get("folder"): # It's a subfolder, recurse
|
|
subfolder_files = await self._list_folder_contents(item.get("id"))
|
|
files.extend(subfolder_files)
|
|
except Exception as e:
|
|
logger.error(f"Failed to list folder contents for {folder_id}: {e}")
|
|
|
|
return files
|
|
|
|
async def _download_file_from_url(self, download_url: str) -> bytes:
|
|
"""Download file content from direct download URL"""
|
|
try:
|
|
async with httpx.AsyncClient() as client:
|
|
response = await client.get(download_url, timeout=60)
|
|
response.raise_for_status()
|
|
return response.content
|
|
except Exception as e:
|
|
logger.error(f"Failed to download from URL {download_url}: {e}")
|
|
raise
|
|
|
|
def _parse_graph_date(self, date_str: Optional[str]) -> datetime:
|
|
"""Parse Microsoft Graph date string to datetime"""
|
|
if not date_str:
|
|
return datetime.now()
|
|
|
|
try:
|
|
if date_str.endswith('Z'):
|
|
return datetime.fromisoformat(date_str[:-1]).replace(tzinfo=None)
|
|
else:
|
|
return datetime.fromisoformat(date_str.replace('T', ' '))
|
|
except (ValueError, AttributeError):
|
|
return datetime.now()
|
|
|
|
async def _make_graph_request(self, url: str, method: str = "GET",
|
|
data: Optional[Dict] = None, params: Optional[Dict] = None) -> httpx.Response:
|
|
"""Make authenticated API request to Microsoft Graph"""
|
|
token = self.oauth.get_access_token()
|
|
headers = {
|
|
"Authorization": f"Bearer {token}",
|
|
"Content-Type": "application/json"
|
|
}
|
|
|
|
async with httpx.AsyncClient() as client:
|
|
if method.upper() == "GET":
|
|
response = await client.get(url, headers=headers, params=params, timeout=30)
|
|
elif method.upper() == "POST":
|
|
response = await client.post(url, headers=headers, json=data, timeout=30)
|
|
elif method.upper() == "DELETE":
|
|
response = await client.delete(url, headers=headers, timeout=30)
|
|
else:
|
|
raise ValueError(f"Unsupported HTTP method: {method}")
|
|
|
|
response.raise_for_status()
|
|
return response
|
|
|
|
def _get_mime_type(self, filename: str) -> str:
|
|
"""Get MIME type based on file extension"""
|
|
import mimetypes
|
|
mime_type, _ = mimetypes.guess_type(filename)
|
|
return mime_type or "application/octet-stream"
|
|
|
|
# Webhook methods - BaseConnector interface
|
|
def handle_webhook_validation(self, request_method: str, headers: Dict[str, str],
|
|
query_params: Dict[str, str]) -> Optional[str]:
|
|
"""Handle webhook validation (Graph API specific)"""
|
|
if request_method == "POST" and "validationToken" in query_params:
|
|
return query_params["validationToken"]
|
|
return None
|
|
|
|
def extract_webhook_channel_id(self, payload: Dict[str, Any],
|
|
headers: Dict[str, str]) -> Optional[str]:
|
|
"""Extract channel/subscription ID from webhook payload"""
|
|
notifications = payload.get("value", [])
|
|
if notifications:
|
|
return notifications[0].get("subscriptionId")
|
|
return None
|
|
|
|
async def handle_webhook(self, payload: Dict[str, Any]) -> List[str]:
|
|
"""Handle webhook notification and return affected file IDs"""
|
|
affected_files = []
|
|
|
|
# Process Microsoft Graph webhook payload
|
|
notifications = payload.get("value", [])
|
|
for notification in notifications:
|
|
resource = notification.get("resource")
|
|
if resource and "/drive/items/" in resource:
|
|
file_id = resource.split("/drive/items/")[-1]
|
|
affected_files.append(file_id)
|
|
|
|
return affected_files
|
|
|
|
async def cleanup_subscription(self, subscription_id: str) -> bool:
|
|
"""Clean up subscription - BaseConnector interface"""
|
|
if subscription_id == "no-webhook-configured":
|
|
logger.info("No subscription to cleanup (webhook was not configured)")
|
|
return True
|
|
|
|
try:
|
|
# Ensure authentication
|
|
if not await self.authenticate():
|
|
logger.error("SharePoint authentication failed during subscription cleanup")
|
|
return False
|
|
|
|
token = self.oauth.get_access_token()
|
|
headers = {"Authorization": f"Bearer {token}"}
|
|
|
|
url = f"{self._graph_base_url}/subscriptions/{subscription_id}"
|
|
|
|
async with httpx.AsyncClient() as client:
|
|
response = await client.delete(url, headers=headers, timeout=30)
|
|
|
|
if response.status_code in [200, 204, 404]:
|
|
logger.info(f"SharePoint subscription {subscription_id} cleaned up successfully")
|
|
return True
|
|
else:
|
|
logger.warning(f"Unexpected response cleaning up subscription: {response.status_code}")
|
|
return False
|
|
|
|
except Exception as e:
|
|
logger.error(f"Failed to cleanup SharePoint subscription {subscription_id}: {e}")
|
|
return False
|