Fix sharepoint and onedrive connectors

This commit is contained in:
Eric Hare 2025-11-19 12:39:39 -08:00
parent 1cfa72d20e
commit 0394df2052
No known key found for this signature in database
GPG key ID: A73DF73724270AB7
3 changed files with 193 additions and 14 deletions

View file

@ -196,20 +196,45 @@ export class OneDriveHandler {
}, },
success: (response: any) => { success: (response: any) => {
const newFiles: CloudFile[] = const newFiles: CloudFile[] =
response.value?.map((item: any, index: number) => ({ response.value?.map((item: any) => {
id: item.id, // Extract mimeType from file object or infer from name
name: let mimeType = item.file?.mimeType;
item.name || if (!mimeType && item.name) {
`${this.getProviderName()} File ${index + 1} (${item.id.slice( // Infer from extension if mimeType not provided
-8, const ext = item.name.split('.').pop()?.toLowerCase();
)})`, const mimeTypes: { [key: string]: string } = {
mimeType: item.file?.mimeType || "application/octet-stream", pdf: 'application/pdf',
webUrl: item.webUrl || "", doc: 'application/msword',
downloadUrl: item["@microsoft.graph.downloadUrl"] || "", docx: 'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
size: item.size, xls: 'application/vnd.ms-excel',
modifiedTime: item.lastModifiedDateTime, xlsx: 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
isFolder: !!item.folder, ppt: 'application/vnd.ms-powerpoint',
})) || []; pptx: 'application/vnd.openxmlformats-officedocument.presentationml.presentation',
txt: 'text/plain',
csv: 'text/csv',
json: 'application/json',
xml: 'application/xml',
html: 'text/html',
jpg: 'image/jpeg',
jpeg: 'image/jpeg',
png: 'image/png',
gif: 'image/gif',
svg: 'image/svg+xml',
};
mimeType = mimeTypes[ext || ''] || 'application/octet-stream';
}
return {
id: item.id,
name: item.name || `${this.getProviderName()} File`,
mimeType: mimeType || "application/octet-stream",
webUrl: item.webUrl || "",
downloadUrl: item["@microsoft.graph.downloadUrl"] || "",
size: item.size,
modifiedTime: item.lastModifiedDateTime,
isFolder: !!item.folder,
};
}) || [];
onFileSelected(newFiles); onFileSelected(newFiles);
}, },

View file

@ -95,6 +95,12 @@ class OneDriveConnector(BaseConnector):
self._default_params = { self._default_params = {
"$select": "id,name,size,lastModifiedDateTime,createdDateTime,webUrl,file,folder,@microsoft.graph.downloadUrl" "$select": "id,name,size,lastModifiedDateTime,createdDateTime,webUrl,file,folder,@microsoft.graph.downloadUrl"
} }
# Selective sync support (similar to Google Drive)
self.cfg = type('OneDriveConfig', (), {
'file_ids': config.get('file_ids') or config.get('selected_files') or config.get('selected_file_ids'),
'folder_ids': config.get('folder_ids') or config.get('selected_folders') or config.get('selected_folder_ids'),
})()
@property @property
def _graph_base_url(self) -> str: def _graph_base_url(self) -> str:
@ -251,6 +257,10 @@ class OneDriveConnector(BaseConnector):
if not await self.authenticate(): if not await self.authenticate():
raise RuntimeError("OneDrive authentication failed during file listing") raise RuntimeError("OneDrive authentication failed during file listing")
# If file_ids or folder_ids are specified in config, use selective sync
if self.cfg.file_ids or self.cfg.folder_ids:
return await self._list_selected_files()
files: List[Dict[str, Any]] = [] files: List[Dict[str, Any]] = []
max_files_value = max_files if max_files is not None else 100 max_files_value = max_files if max_files is not None else 100
@ -349,6 +359,14 @@ class OneDriveConnector(BaseConnector):
response = await self._make_graph_request(url, params=params) response = await self._make_graph_request(url, params=params)
item = response.json() item = response.json()
# Check if it's a folder
if item.get("folder"):
return {
"id": file_id,
"name": item.get("name", ""),
"isFolder": True,
}
if item.get("file"): if item.get("file"):
return { return {
"id": file_id, "id": file_id,
@ -360,6 +378,7 @@ class OneDriveConnector(BaseConnector):
"mime_type": item.get("file", {}).get("mimeType", self._get_mime_type(item.get("name", ""))), "mime_type": item.get("file", {}).get("mimeType", self._get_mime_type(item.get("name", ""))),
"url": item.get("webUrl", ""), "url": item.get("webUrl", ""),
"download_url": item.get("@microsoft.graph.downloadUrl"), "download_url": item.get("@microsoft.graph.downloadUrl"),
"isFolder": False,
} }
return None return None
@ -429,6 +448,62 @@ class OneDriveConnector(BaseConnector):
response.raise_for_status() response.raise_for_status()
return response return response
async def _list_selected_files(self) -> Dict[str, Any]:
"""List only selected files/folders (selective sync)."""
files: List[Dict[str, Any]] = []
# Process selected file IDs
if self.cfg.file_ids:
for file_id in self.cfg.file_ids:
try:
file_meta = await self._get_file_metadata_by_id(file_id)
if file_meta and not file_meta.get('isFolder', False):
files.append(file_meta)
elif file_meta and file_meta.get('isFolder', False):
# If it's a folder, expand its contents
folder_files = await self._list_folder_contents(file_id)
files.extend(folder_files)
except Exception as e:
logger.warning(f"Failed to get file {file_id}: {e}")
continue
# Process selected folder IDs
if self.cfg.folder_ids:
for folder_id in self.cfg.folder_ids:
try:
folder_files = await self._list_folder_contents(folder_id)
files.extend(folder_files)
except Exception as e:
logger.warning(f"Failed to list folder {folder_id}: {e}")
continue
return {"files": files, "next_page_token": None}
async def _list_folder_contents(self, folder_id: str) -> List[Dict[str, Any]]:
"""List all files in a folder recursively."""
files: List[Dict[str, Any]] = []
try:
url = f"{self._graph_base_url}/me/drive/items/{folder_id}/children"
params = dict(self._default_params)
response = await self._make_graph_request(url, params=params)
data = response.json()
items = data.get("value", [])
for item in items:
if item.get("file"): # It's a file
file_meta = await self._get_file_metadata_by_id(item.get("id"))
if file_meta:
files.append(file_meta)
elif item.get("folder"): # It's a subfolder, recurse
subfolder_files = await self._list_folder_contents(item.get("id"))
files.extend(subfolder_files)
except Exception as e:
logger.error(f"Failed to list folder contents for {folder_id}: {e}")
return files
def _get_mime_type(self, filename: str) -> str: def _get_mime_type(self, filename: str) -> str:
"""Get MIME type based on file extension.""" """Get MIME type based on file extension."""
import mimetypes import mimetypes

View file

@ -100,6 +100,12 @@ class SharePointConnector(BaseConnector):
self._default_params = { self._default_params = {
"$select": "id,name,size,lastModifiedDateTime,createdDateTime,webUrl,file,folder,@microsoft.graph.downloadUrl" "$select": "id,name,size,lastModifiedDateTime,createdDateTime,webUrl,file,folder,@microsoft.graph.downloadUrl"
} }
# Selective sync support (similar to Google Drive and OneDrive)
self.cfg = type('SharePointConfig', (), {
'file_ids': config.get('file_ids') or config.get('selected_files') or config.get('selected_file_ids'),
'folder_ids': config.get('folder_ids') or config.get('selected_folders') or config.get('selected_folder_ids'),
})()
@property @property
def _graph_base_url(self) -> str: def _graph_base_url(self) -> str:
@ -293,6 +299,10 @@ class SharePointConnector(BaseConnector):
if not await self.authenticate(): if not await self.authenticate():
raise RuntimeError("SharePoint authentication failed during file listing") raise RuntimeError("SharePoint authentication failed during file listing")
# If file_ids or folder_ids are specified in config, use selective sync
if self.cfg.file_ids or self.cfg.folder_ids:
return await self._list_selected_files()
files = [] files = []
max_files_value = max_files if max_files is not None else 100 max_files_value = max_files if max_files is not None else 100
@ -426,6 +436,14 @@ class SharePointConnector(BaseConnector):
"download_url": item.get("@microsoft.graph.downloadUrl") "download_url": item.get("@microsoft.graph.downloadUrl")
} }
# Check if it's a folder
if item.get("folder"):
return {
"id": file_id,
"name": item.get("name", ""),
"isFolder": True,
}
return None return None
except Exception as e: except Exception as e:
@ -453,6 +471,67 @@ class SharePointConnector(BaseConnector):
logger.error(f"Failed to download file content for {file_id}: {e}") logger.error(f"Failed to download file content for {file_id}: {e}")
raise raise
async def _list_selected_files(self) -> Dict[str, Any]:
"""List only selected files/folders (selective sync)."""
files: List[Dict[str, Any]] = []
# Process selected file IDs
if self.cfg.file_ids:
for file_id in self.cfg.file_ids:
try:
file_meta = await self._get_file_metadata_by_id(file_id)
if file_meta and not file_meta.get('isFolder', False):
files.append(file_meta)
elif file_meta and file_meta.get('isFolder', False):
# If it's a folder, expand its contents
folder_files = await self._list_folder_contents(file_id)
files.extend(folder_files)
except Exception as e:
logger.warning(f"Failed to get file {file_id}: {e}")
continue
# Process selected folder IDs
if self.cfg.folder_ids:
for folder_id in self.cfg.folder_ids:
try:
folder_files = await self._list_folder_contents(folder_id)
files.extend(folder_files)
except Exception as e:
logger.warning(f"Failed to list folder {folder_id}: {e}")
continue
return {"files": files, "next_page_token": None}
async def _list_folder_contents(self, folder_id: str) -> List[Dict[str, Any]]:
"""List all files in a folder recursively."""
files: List[Dict[str, Any]] = []
try:
site_info = self._parse_sharepoint_url()
if site_info:
url = f"{self._graph_base_url}/sites/{site_info['host_name']}:/sites/{site_info['site_name']}:/drive/items/{folder_id}/children"
else:
url = f"{self._graph_base_url}/me/drive/items/{folder_id}/children"
params = dict(self._default_params)
response = await self._make_graph_request(url, params=params)
data = response.json()
items = data.get("value", [])
for item in items:
if item.get("file"): # It's a file
file_meta = await self._get_file_metadata_by_id(item.get("id"))
if file_meta:
files.append(file_meta)
elif item.get("folder"): # It's a subfolder, recurse
subfolder_files = await self._list_folder_contents(item.get("id"))
files.extend(subfolder_files)
except Exception as e:
logger.error(f"Failed to list folder contents for {folder_id}: {e}")
return files
async def _download_file_from_url(self, download_url: str) -> bytes: async def _download_file_from_url(self, download_url: str) -> bytes:
"""Download file content from direct download URL""" """Download file content from direct download URL"""
try: try: