From 0394df2052f783e3196640fcb0ee4c301b7ce9f6 Mon Sep 17 00:00:00 2001 From: Eric Hare Date: Wed, 19 Nov 2025 12:39:39 -0800 Subject: [PATCH] Fix sharepoint and onedrive connectors --- .../cloud-picker/provider-handlers.ts | 53 +++++++++---- src/connectors/onedrive/connector.py | 75 ++++++++++++++++++ src/connectors/sharepoint/connector.py | 79 +++++++++++++++++++ 3 files changed, 193 insertions(+), 14 deletions(-) diff --git a/frontend/components/cloud-picker/provider-handlers.ts b/frontend/components/cloud-picker/provider-handlers.ts index ec0b516c..8394d4ba 100644 --- a/frontend/components/cloud-picker/provider-handlers.ts +++ b/frontend/components/cloud-picker/provider-handlers.ts @@ -196,20 +196,45 @@ export class OneDriveHandler { }, success: (response: any) => { const newFiles: CloudFile[] = - response.value?.map((item: any, index: number) => ({ - id: item.id, - name: - item.name || - `${this.getProviderName()} File ${index + 1} (${item.id.slice( - -8, - )})`, - mimeType: item.file?.mimeType || "application/octet-stream", - webUrl: item.webUrl || "", - downloadUrl: item["@microsoft.graph.downloadUrl"] || "", - size: item.size, - modifiedTime: item.lastModifiedDateTime, - isFolder: !!item.folder, - })) || []; + response.value?.map((item: any) => { + // Extract mimeType from file object or infer from name + let mimeType = item.file?.mimeType; + if (!mimeType && item.name) { + // Infer from extension if mimeType not provided + const ext = item.name.split('.').pop()?.toLowerCase(); + const mimeTypes: { [key: string]: string } = { + pdf: 'application/pdf', + doc: 'application/msword', + docx: 'application/vnd.openxmlformats-officedocument.wordprocessingml.document', + xls: 'application/vnd.ms-excel', + xlsx: 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet', + ppt: 'application/vnd.ms-powerpoint', + pptx: 'application/vnd.openxmlformats-officedocument.presentationml.presentation', + txt: 'text/plain', + csv: 'text/csv', + json: 'application/json', + xml: 'application/xml', + html: 'text/html', + jpg: 'image/jpeg', + jpeg: 'image/jpeg', + png: 'image/png', + gif: 'image/gif', + svg: 'image/svg+xml', + }; + mimeType = mimeTypes[ext || ''] || 'application/octet-stream'; + } + + return { + id: item.id, + name: item.name || `${this.getProviderName()} File`, + mimeType: mimeType || "application/octet-stream", + webUrl: item.webUrl || "", + downloadUrl: item["@microsoft.graph.downloadUrl"] || "", + size: item.size, + modifiedTime: item.lastModifiedDateTime, + isFolder: !!item.folder, + }; + }) || []; onFileSelected(newFiles); }, diff --git a/src/connectors/onedrive/connector.py b/src/connectors/onedrive/connector.py index e61cff46..a88321d3 100644 --- a/src/connectors/onedrive/connector.py +++ b/src/connectors/onedrive/connector.py @@ -95,6 +95,12 @@ class OneDriveConnector(BaseConnector): self._default_params = { "$select": "id,name,size,lastModifiedDateTime,createdDateTime,webUrl,file,folder,@microsoft.graph.downloadUrl" } + + # Selective sync support (similar to Google Drive) + self.cfg = type('OneDriveConfig', (), { + 'file_ids': config.get('file_ids') or config.get('selected_files') or config.get('selected_file_ids'), + 'folder_ids': config.get('folder_ids') or config.get('selected_folders') or config.get('selected_folder_ids'), + })() @property def _graph_base_url(self) -> str: @@ -251,6 +257,10 @@ class OneDriveConnector(BaseConnector): if not await self.authenticate(): raise RuntimeError("OneDrive authentication failed during file listing") + # If file_ids or folder_ids are specified in config, use selective sync + if self.cfg.file_ids or self.cfg.folder_ids: + return await self._list_selected_files() + files: List[Dict[str, Any]] = [] max_files_value = max_files if max_files is not None else 100 @@ -349,6 +359,14 @@ class OneDriveConnector(BaseConnector): response = await self._make_graph_request(url, params=params) item = response.json() + # Check if it's a folder + if item.get("folder"): + return { + "id": file_id, + "name": item.get("name", ""), + "isFolder": True, + } + if item.get("file"): return { "id": file_id, @@ -360,6 +378,7 @@ class OneDriveConnector(BaseConnector): "mime_type": item.get("file", {}).get("mimeType", self._get_mime_type(item.get("name", ""))), "url": item.get("webUrl", ""), "download_url": item.get("@microsoft.graph.downloadUrl"), + "isFolder": False, } return None @@ -429,6 +448,62 @@ class OneDriveConnector(BaseConnector): response.raise_for_status() return response + async def _list_selected_files(self) -> Dict[str, Any]: + """List only selected files/folders (selective sync).""" + files: List[Dict[str, Any]] = [] + + # Process selected file IDs + if self.cfg.file_ids: + for file_id in self.cfg.file_ids: + try: + file_meta = await self._get_file_metadata_by_id(file_id) + if file_meta and not file_meta.get('isFolder', False): + files.append(file_meta) + elif file_meta and file_meta.get('isFolder', False): + # If it's a folder, expand its contents + folder_files = await self._list_folder_contents(file_id) + files.extend(folder_files) + except Exception as e: + logger.warning(f"Failed to get file {file_id}: {e}") + continue + + # Process selected folder IDs + if self.cfg.folder_ids: + for folder_id in self.cfg.folder_ids: + try: + folder_files = await self._list_folder_contents(folder_id) + files.extend(folder_files) + except Exception as e: + logger.warning(f"Failed to list folder {folder_id}: {e}") + continue + + return {"files": files, "next_page_token": None} + + async def _list_folder_contents(self, folder_id: str) -> List[Dict[str, Any]]: + """List all files in a folder recursively.""" + files: List[Dict[str, Any]] = [] + + try: + url = f"{self._graph_base_url}/me/drive/items/{folder_id}/children" + params = dict(self._default_params) + + response = await self._make_graph_request(url, params=params) + data = response.json() + + items = data.get("value", []) + for item in items: + if item.get("file"): # It's a file + file_meta = await self._get_file_metadata_by_id(item.get("id")) + if file_meta: + files.append(file_meta) + elif item.get("folder"): # It's a subfolder, recurse + subfolder_files = await self._list_folder_contents(item.get("id")) + files.extend(subfolder_files) + except Exception as e: + logger.error(f"Failed to list folder contents for {folder_id}: {e}") + + return files + def _get_mime_type(self, filename: str) -> str: """Get MIME type based on file extension.""" import mimetypes diff --git a/src/connectors/sharepoint/connector.py b/src/connectors/sharepoint/connector.py index 11313ecd..f84d3575 100644 --- a/src/connectors/sharepoint/connector.py +++ b/src/connectors/sharepoint/connector.py @@ -100,6 +100,12 @@ class SharePointConnector(BaseConnector): self._default_params = { "$select": "id,name,size,lastModifiedDateTime,createdDateTime,webUrl,file,folder,@microsoft.graph.downloadUrl" } + + # Selective sync support (similar to Google Drive and OneDrive) + self.cfg = type('SharePointConfig', (), { + 'file_ids': config.get('file_ids') or config.get('selected_files') or config.get('selected_file_ids'), + 'folder_ids': config.get('folder_ids') or config.get('selected_folders') or config.get('selected_folder_ids'), + })() @property def _graph_base_url(self) -> str: @@ -293,6 +299,10 @@ class SharePointConnector(BaseConnector): if not await self.authenticate(): raise RuntimeError("SharePoint authentication failed during file listing") + # If file_ids or folder_ids are specified in config, use selective sync + if self.cfg.file_ids or self.cfg.folder_ids: + return await self._list_selected_files() + files = [] max_files_value = max_files if max_files is not None else 100 @@ -426,6 +436,14 @@ class SharePointConnector(BaseConnector): "download_url": item.get("@microsoft.graph.downloadUrl") } + # Check if it's a folder + if item.get("folder"): + return { + "id": file_id, + "name": item.get("name", ""), + "isFolder": True, + } + return None except Exception as e: @@ -453,6 +471,67 @@ class SharePointConnector(BaseConnector): logger.error(f"Failed to download file content for {file_id}: {e}") raise + async def _list_selected_files(self) -> Dict[str, Any]: + """List only selected files/folders (selective sync).""" + files: List[Dict[str, Any]] = [] + + # Process selected file IDs + if self.cfg.file_ids: + for file_id in self.cfg.file_ids: + try: + file_meta = await self._get_file_metadata_by_id(file_id) + if file_meta and not file_meta.get('isFolder', False): + files.append(file_meta) + elif file_meta and file_meta.get('isFolder', False): + # If it's a folder, expand its contents + folder_files = await self._list_folder_contents(file_id) + files.extend(folder_files) + except Exception as e: + logger.warning(f"Failed to get file {file_id}: {e}") + continue + + # Process selected folder IDs + if self.cfg.folder_ids: + for folder_id in self.cfg.folder_ids: + try: + folder_files = await self._list_folder_contents(folder_id) + files.extend(folder_files) + except Exception as e: + logger.warning(f"Failed to list folder {folder_id}: {e}") + continue + + return {"files": files, "next_page_token": None} + + async def _list_folder_contents(self, folder_id: str) -> List[Dict[str, Any]]: + """List all files in a folder recursively.""" + files: List[Dict[str, Any]] = [] + + try: + site_info = self._parse_sharepoint_url() + if site_info: + url = f"{self._graph_base_url}/sites/{site_info['host_name']}:/sites/{site_info['site_name']}:/drive/items/{folder_id}/children" + else: + url = f"{self._graph_base_url}/me/drive/items/{folder_id}/children" + + params = dict(self._default_params) + + response = await self._make_graph_request(url, params=params) + data = response.json() + + items = data.get("value", []) + for item in items: + if item.get("file"): # It's a file + file_meta = await self._get_file_metadata_by_id(item.get("id")) + if file_meta: + files.append(file_meta) + elif item.get("folder"): # It's a subfolder, recurse + subfolder_files = await self._list_folder_contents(item.get("id")) + files.extend(subfolder_files) + except Exception as e: + logger.error(f"Failed to list folder contents for {folder_id}: {e}") + + return files + async def _download_file_from_url(self, download_url: str) -> bytes: """Download file content from direct download URL""" try: