241 lines
8.9 KiB
Python
241 lines
8.9 KiB
Python
from pathlib import Path
|
|
import httpx
|
|
import uuid
|
|
from datetime import datetime, timedelta
|
|
from typing import Dict, List, Any, Optional
|
|
|
|
from ..base import BaseConnector, ConnectorDocument, DocumentACL
|
|
from .oauth import SharePointOAuth
|
|
|
|
|
|
class SharePointConnector(BaseConnector):
|
|
"""SharePoint Sites connector using Microsoft Graph API"""
|
|
|
|
CLIENT_ID_ENV_VAR = "MICROSOFT_GRAPH_OAUTH_CLIENT_ID"
|
|
CLIENT_SECRET_ENV_VAR = "MICROSOFT_GRAPH_OAUTH_CLIENT_SECRET"
|
|
|
|
# Connector metadata
|
|
CONNECTOR_NAME = "SharePoint"
|
|
CONNECTOR_DESCRIPTION = "Connect to SharePoint sites to sync team documents"
|
|
CONNECTOR_ICON = "sharepoint"
|
|
|
|
def __init__(self, config: Dict[str, Any]):
|
|
super().__init__(config)
|
|
project_root = Path(__file__).resolve().parent.parent.parent.parent
|
|
token_file = config.get("token_file") or str(project_root / "onedrive_token.json")
|
|
self.oauth = SharePointOAuth(
|
|
client_id=self.get_client_id(),
|
|
client_secret=self.get_client_secret(),
|
|
token_file=token_file,
|
|
)
|
|
self.subscription_id = config.get("subscription_id") or config.get(
|
|
"webhook_channel_id"
|
|
)
|
|
self.base_url = "https://graph.microsoft.com/v1.0"
|
|
|
|
# SharePoint site configuration
|
|
self.site_id = config.get("site_id") # Required for SharePoint
|
|
|
|
async def authenticate(self) -> bool:
|
|
if await self.oauth.is_authenticated():
|
|
self._authenticated = True
|
|
return True
|
|
return False
|
|
|
|
async def setup_subscription(self) -> str:
|
|
if not self._authenticated:
|
|
raise ValueError("Not authenticated")
|
|
|
|
webhook_url = self.config.get("webhook_url")
|
|
if not webhook_url:
|
|
raise ValueError("webhook_url required in config for subscriptions")
|
|
|
|
expiration = (datetime.utcnow() + timedelta(days=2)).isoformat() + "Z"
|
|
body = {
|
|
"changeType": "created,updated,deleted",
|
|
"notificationUrl": webhook_url,
|
|
"resource": f"/sites/{self.site_id}/drive/root",
|
|
"expirationDateTime": expiration,
|
|
"clientState": str(uuid.uuid4()),
|
|
}
|
|
|
|
token = self.oauth.get_access_token()
|
|
async with httpx.AsyncClient() as client:
|
|
resp = await client.post(
|
|
f"{self.base_url}/subscriptions",
|
|
json=body,
|
|
headers={"Authorization": f"Bearer {token}"},
|
|
)
|
|
resp.raise_for_status()
|
|
data = resp.json()
|
|
|
|
self.subscription_id = data["id"]
|
|
return self.subscription_id
|
|
|
|
async def list_files(
|
|
self, page_token: Optional[str] = None, limit: int = 100
|
|
) -> Dict[str, Any]:
|
|
if not self._authenticated:
|
|
raise ValueError("Not authenticated")
|
|
|
|
params = {"$top": str(limit)}
|
|
if page_token:
|
|
params["$skiptoken"] = page_token
|
|
|
|
token = self.oauth.get_access_token()
|
|
async with httpx.AsyncClient() as client:
|
|
resp = await client.get(
|
|
f"{self.base_url}/sites/{self.site_id}/drive/root/children",
|
|
params=params,
|
|
headers={"Authorization": f"Bearer {token}"},
|
|
)
|
|
resp.raise_for_status()
|
|
data = resp.json()
|
|
|
|
files = []
|
|
for item in data.get("value", []):
|
|
if item.get("file"):
|
|
files.append(
|
|
{
|
|
"id": item["id"],
|
|
"name": item["name"],
|
|
"mimeType": item.get("file", {}).get(
|
|
"mimeType", "application/octet-stream"
|
|
),
|
|
"webViewLink": item.get("webUrl"),
|
|
"createdTime": item.get("createdDateTime"),
|
|
"modifiedTime": item.get("lastModifiedDateTime"),
|
|
}
|
|
)
|
|
|
|
next_token = None
|
|
next_link = data.get("@odata.nextLink")
|
|
if next_link:
|
|
from urllib.parse import urlparse, parse_qs
|
|
|
|
parsed = urlparse(next_link)
|
|
next_token = parse_qs(parsed.query).get("$skiptoken", [None])[0]
|
|
|
|
return {"files": files, "nextPageToken": next_token}
|
|
|
|
async def get_file_content(self, file_id: str) -> ConnectorDocument:
|
|
if not self._authenticated:
|
|
raise ValueError("Not authenticated")
|
|
|
|
token = self.oauth.get_access_token()
|
|
headers = {"Authorization": f"Bearer {token}"}
|
|
async with httpx.AsyncClient() as client:
|
|
meta_resp = await client.get(
|
|
f"{self.base_url}/sites/{self.site_id}/drive/items/{file_id}",
|
|
headers=headers,
|
|
)
|
|
meta_resp.raise_for_status()
|
|
metadata = meta_resp.json()
|
|
|
|
content_resp = await client.get(
|
|
f"{self.base_url}/sites/{self.site_id}/drive/items/{file_id}/content",
|
|
headers=headers,
|
|
)
|
|
content = content_resp.content
|
|
|
|
# Handle the possibility of this being a redirect
|
|
if content_resp.status_code in (301, 302, 303, 307, 308):
|
|
redirect_url = content_resp.headers.get("Location")
|
|
if redirect_url:
|
|
content_resp = await client.get(redirect_url)
|
|
content_resp.raise_for_status()
|
|
content = content_resp.content
|
|
else:
|
|
content_resp.raise_for_status()
|
|
|
|
perm_resp = await client.get(
|
|
f"{self.base_url}/sites/{self.site_id}/drive/items/{file_id}/permissions",
|
|
headers=headers,
|
|
)
|
|
perm_resp.raise_for_status()
|
|
permissions = perm_resp.json()
|
|
|
|
acl = self._parse_permissions(metadata, permissions)
|
|
modified = datetime.fromisoformat(
|
|
metadata["lastModifiedDateTime"].replace("Z", "+00:00")
|
|
).replace(tzinfo=None)
|
|
created = datetime.fromisoformat(
|
|
metadata["createdDateTime"].replace("Z", "+00:00")
|
|
).replace(tzinfo=None)
|
|
|
|
document = ConnectorDocument(
|
|
id=metadata["id"],
|
|
filename=metadata["name"],
|
|
mimetype=metadata.get("file", {}).get(
|
|
"mimeType", "application/octet-stream"
|
|
),
|
|
content=content,
|
|
source_url=metadata.get("webUrl"),
|
|
acl=acl,
|
|
modified_time=modified,
|
|
created_time=created,
|
|
metadata={"size": metadata.get("size")},
|
|
)
|
|
return document
|
|
|
|
def _parse_permissions(
|
|
self, metadata: Dict[str, Any], permissions: Dict[str, Any]
|
|
) -> DocumentACL:
|
|
acl = DocumentACL()
|
|
owner = metadata.get("createdBy", {}).get("user", {}).get("email")
|
|
if owner:
|
|
acl.owner = owner
|
|
for perm in permissions.get("value", []):
|
|
role = perm.get("roles", ["read"])[0]
|
|
grantee = perm.get("grantedToV2") or perm.get("grantedTo")
|
|
if not grantee:
|
|
continue
|
|
user = grantee.get("user")
|
|
if user and user.get("email"):
|
|
acl.user_permissions[user["email"]] = role
|
|
group = grantee.get("group")
|
|
if group and group.get("email"):
|
|
acl.group_permissions[group["email"]] = role
|
|
return acl
|
|
|
|
def handle_webhook_validation(
|
|
self, request_method: str, headers: Dict[str, str], query_params: Dict[str, str]
|
|
) -> Optional[str]:
|
|
"""Handle Microsoft Graph webhook validation"""
|
|
if request_method == "GET":
|
|
validation_token = query_params.get("validationtoken") or query_params.get(
|
|
"validationToken"
|
|
)
|
|
if validation_token:
|
|
return validation_token
|
|
return None
|
|
|
|
def extract_webhook_channel_id(
|
|
self, payload: Dict[str, Any], headers: Dict[str, str]
|
|
) -> Optional[str]:
|
|
"""Extract SharePoint subscription ID from webhook payload"""
|
|
values = payload.get("value", [])
|
|
return values[0].get("subscriptionId") if values else None
|
|
|
|
async def handle_webhook(self, payload: Dict[str, Any]) -> List[str]:
|
|
values = payload.get("value", [])
|
|
file_ids = []
|
|
for item in values:
|
|
resource_data = item.get("resourceData", {})
|
|
file_id = resource_data.get("id")
|
|
if file_id:
|
|
file_ids.append(file_id)
|
|
return file_ids
|
|
|
|
async def cleanup_subscription(
|
|
self, subscription_id: str, resource_id: str = None
|
|
) -> bool:
|
|
if not self._authenticated:
|
|
return False
|
|
token = self.oauth.get_access_token()
|
|
async with httpx.AsyncClient() as client:
|
|
resp = await client.delete(
|
|
f"{self.base_url}/subscriptions/{subscription_id}",
|
|
headers={"Authorization": f"Bearer {token}"},
|
|
)
|
|
return resp.status_code in (200, 204)
|