Fix sharepoint and onedrive connectors
This commit is contained in:
parent
1cfa72d20e
commit
0394df2052
3 changed files with 193 additions and 14 deletions
|
|
@ -196,20 +196,45 @@ export class OneDriveHandler {
|
||||||
},
|
},
|
||||||
success: (response: any) => {
|
success: (response: any) => {
|
||||||
const newFiles: CloudFile[] =
|
const newFiles: CloudFile[] =
|
||||||
response.value?.map((item: any, index: number) => ({
|
response.value?.map((item: any) => {
|
||||||
id: item.id,
|
// Extract mimeType from file object or infer from name
|
||||||
name:
|
let mimeType = item.file?.mimeType;
|
||||||
item.name ||
|
if (!mimeType && item.name) {
|
||||||
`${this.getProviderName()} File ${index + 1} (${item.id.slice(
|
// Infer from extension if mimeType not provided
|
||||||
-8,
|
const ext = item.name.split('.').pop()?.toLowerCase();
|
||||||
)})`,
|
const mimeTypes: { [key: string]: string } = {
|
||||||
mimeType: item.file?.mimeType || "application/octet-stream",
|
pdf: 'application/pdf',
|
||||||
webUrl: item.webUrl || "",
|
doc: 'application/msword',
|
||||||
downloadUrl: item["@microsoft.graph.downloadUrl"] || "",
|
docx: 'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
|
||||||
size: item.size,
|
xls: 'application/vnd.ms-excel',
|
||||||
modifiedTime: item.lastModifiedDateTime,
|
xlsx: 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
|
||||||
isFolder: !!item.folder,
|
ppt: 'application/vnd.ms-powerpoint',
|
||||||
})) || [];
|
pptx: 'application/vnd.openxmlformats-officedocument.presentationml.presentation',
|
||||||
|
txt: 'text/plain',
|
||||||
|
csv: 'text/csv',
|
||||||
|
json: 'application/json',
|
||||||
|
xml: 'application/xml',
|
||||||
|
html: 'text/html',
|
||||||
|
jpg: 'image/jpeg',
|
||||||
|
jpeg: 'image/jpeg',
|
||||||
|
png: 'image/png',
|
||||||
|
gif: 'image/gif',
|
||||||
|
svg: 'image/svg+xml',
|
||||||
|
};
|
||||||
|
mimeType = mimeTypes[ext || ''] || 'application/octet-stream';
|
||||||
|
}
|
||||||
|
|
||||||
|
return {
|
||||||
|
id: item.id,
|
||||||
|
name: item.name || `${this.getProviderName()} File`,
|
||||||
|
mimeType: mimeType || "application/octet-stream",
|
||||||
|
webUrl: item.webUrl || "",
|
||||||
|
downloadUrl: item["@microsoft.graph.downloadUrl"] || "",
|
||||||
|
size: item.size,
|
||||||
|
modifiedTime: item.lastModifiedDateTime,
|
||||||
|
isFolder: !!item.folder,
|
||||||
|
};
|
||||||
|
}) || [];
|
||||||
|
|
||||||
onFileSelected(newFiles);
|
onFileSelected(newFiles);
|
||||||
},
|
},
|
||||||
|
|
|
||||||
|
|
@ -96,6 +96,12 @@ class OneDriveConnector(BaseConnector):
|
||||||
"$select": "id,name,size,lastModifiedDateTime,createdDateTime,webUrl,file,folder,@microsoft.graph.downloadUrl"
|
"$select": "id,name,size,lastModifiedDateTime,createdDateTime,webUrl,file,folder,@microsoft.graph.downloadUrl"
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# Selective sync support (similar to Google Drive)
|
||||||
|
self.cfg = type('OneDriveConfig', (), {
|
||||||
|
'file_ids': config.get('file_ids') or config.get('selected_files') or config.get('selected_file_ids'),
|
||||||
|
'folder_ids': config.get('folder_ids') or config.get('selected_folders') or config.get('selected_folder_ids'),
|
||||||
|
})()
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def _graph_base_url(self) -> str:
|
def _graph_base_url(self) -> str:
|
||||||
"""Base URL for Microsoft Graph API calls."""
|
"""Base URL for Microsoft Graph API calls."""
|
||||||
|
|
@ -251,6 +257,10 @@ class OneDriveConnector(BaseConnector):
|
||||||
if not await self.authenticate():
|
if not await self.authenticate():
|
||||||
raise RuntimeError("OneDrive authentication failed during file listing")
|
raise RuntimeError("OneDrive authentication failed during file listing")
|
||||||
|
|
||||||
|
# If file_ids or folder_ids are specified in config, use selective sync
|
||||||
|
if self.cfg.file_ids or self.cfg.folder_ids:
|
||||||
|
return await self._list_selected_files()
|
||||||
|
|
||||||
files: List[Dict[str, Any]] = []
|
files: List[Dict[str, Any]] = []
|
||||||
max_files_value = max_files if max_files is not None else 100
|
max_files_value = max_files if max_files is not None else 100
|
||||||
|
|
||||||
|
|
@ -349,6 +359,14 @@ class OneDriveConnector(BaseConnector):
|
||||||
response = await self._make_graph_request(url, params=params)
|
response = await self._make_graph_request(url, params=params)
|
||||||
item = response.json()
|
item = response.json()
|
||||||
|
|
||||||
|
# Check if it's a folder
|
||||||
|
if item.get("folder"):
|
||||||
|
return {
|
||||||
|
"id": file_id,
|
||||||
|
"name": item.get("name", ""),
|
||||||
|
"isFolder": True,
|
||||||
|
}
|
||||||
|
|
||||||
if item.get("file"):
|
if item.get("file"):
|
||||||
return {
|
return {
|
||||||
"id": file_id,
|
"id": file_id,
|
||||||
|
|
@ -360,6 +378,7 @@ class OneDriveConnector(BaseConnector):
|
||||||
"mime_type": item.get("file", {}).get("mimeType", self._get_mime_type(item.get("name", ""))),
|
"mime_type": item.get("file", {}).get("mimeType", self._get_mime_type(item.get("name", ""))),
|
||||||
"url": item.get("webUrl", ""),
|
"url": item.get("webUrl", ""),
|
||||||
"download_url": item.get("@microsoft.graph.downloadUrl"),
|
"download_url": item.get("@microsoft.graph.downloadUrl"),
|
||||||
|
"isFolder": False,
|
||||||
}
|
}
|
||||||
|
|
||||||
return None
|
return None
|
||||||
|
|
@ -429,6 +448,62 @@ class OneDriveConnector(BaseConnector):
|
||||||
response.raise_for_status()
|
response.raise_for_status()
|
||||||
return response
|
return response
|
||||||
|
|
||||||
|
async def _list_selected_files(self) -> Dict[str, Any]:
|
||||||
|
"""List only selected files/folders (selective sync)."""
|
||||||
|
files: List[Dict[str, Any]] = []
|
||||||
|
|
||||||
|
# Process selected file IDs
|
||||||
|
if self.cfg.file_ids:
|
||||||
|
for file_id in self.cfg.file_ids:
|
||||||
|
try:
|
||||||
|
file_meta = await self._get_file_metadata_by_id(file_id)
|
||||||
|
if file_meta and not file_meta.get('isFolder', False):
|
||||||
|
files.append(file_meta)
|
||||||
|
elif file_meta and file_meta.get('isFolder', False):
|
||||||
|
# If it's a folder, expand its contents
|
||||||
|
folder_files = await self._list_folder_contents(file_id)
|
||||||
|
files.extend(folder_files)
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"Failed to get file {file_id}: {e}")
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Process selected folder IDs
|
||||||
|
if self.cfg.folder_ids:
|
||||||
|
for folder_id in self.cfg.folder_ids:
|
||||||
|
try:
|
||||||
|
folder_files = await self._list_folder_contents(folder_id)
|
||||||
|
files.extend(folder_files)
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"Failed to list folder {folder_id}: {e}")
|
||||||
|
continue
|
||||||
|
|
||||||
|
return {"files": files, "next_page_token": None}
|
||||||
|
|
||||||
|
async def _list_folder_contents(self, folder_id: str) -> List[Dict[str, Any]]:
|
||||||
|
"""List all files in a folder recursively."""
|
||||||
|
files: List[Dict[str, Any]] = []
|
||||||
|
|
||||||
|
try:
|
||||||
|
url = f"{self._graph_base_url}/me/drive/items/{folder_id}/children"
|
||||||
|
params = dict(self._default_params)
|
||||||
|
|
||||||
|
response = await self._make_graph_request(url, params=params)
|
||||||
|
data = response.json()
|
||||||
|
|
||||||
|
items = data.get("value", [])
|
||||||
|
for item in items:
|
||||||
|
if item.get("file"): # It's a file
|
||||||
|
file_meta = await self._get_file_metadata_by_id(item.get("id"))
|
||||||
|
if file_meta:
|
||||||
|
files.append(file_meta)
|
||||||
|
elif item.get("folder"): # It's a subfolder, recurse
|
||||||
|
subfolder_files = await self._list_folder_contents(item.get("id"))
|
||||||
|
files.extend(subfolder_files)
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Failed to list folder contents for {folder_id}: {e}")
|
||||||
|
|
||||||
|
return files
|
||||||
|
|
||||||
def _get_mime_type(self, filename: str) -> str:
|
def _get_mime_type(self, filename: str) -> str:
|
||||||
"""Get MIME type based on file extension."""
|
"""Get MIME type based on file extension."""
|
||||||
import mimetypes
|
import mimetypes
|
||||||
|
|
|
||||||
|
|
@ -101,6 +101,12 @@ class SharePointConnector(BaseConnector):
|
||||||
"$select": "id,name,size,lastModifiedDateTime,createdDateTime,webUrl,file,folder,@microsoft.graph.downloadUrl"
|
"$select": "id,name,size,lastModifiedDateTime,createdDateTime,webUrl,file,folder,@microsoft.graph.downloadUrl"
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# Selective sync support (similar to Google Drive and OneDrive)
|
||||||
|
self.cfg = type('SharePointConfig', (), {
|
||||||
|
'file_ids': config.get('file_ids') or config.get('selected_files') or config.get('selected_file_ids'),
|
||||||
|
'folder_ids': config.get('folder_ids') or config.get('selected_folders') or config.get('selected_folder_ids'),
|
||||||
|
})()
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def _graph_base_url(self) -> str:
|
def _graph_base_url(self) -> str:
|
||||||
"""Base URL for Microsoft Graph API calls"""
|
"""Base URL for Microsoft Graph API calls"""
|
||||||
|
|
@ -293,6 +299,10 @@ class SharePointConnector(BaseConnector):
|
||||||
if not await self.authenticate():
|
if not await self.authenticate():
|
||||||
raise RuntimeError("SharePoint authentication failed during file listing")
|
raise RuntimeError("SharePoint authentication failed during file listing")
|
||||||
|
|
||||||
|
# If file_ids or folder_ids are specified in config, use selective sync
|
||||||
|
if self.cfg.file_ids or self.cfg.folder_ids:
|
||||||
|
return await self._list_selected_files()
|
||||||
|
|
||||||
files = []
|
files = []
|
||||||
max_files_value = max_files if max_files is not None else 100
|
max_files_value = max_files if max_files is not None else 100
|
||||||
|
|
||||||
|
|
@ -426,6 +436,14 @@ class SharePointConnector(BaseConnector):
|
||||||
"download_url": item.get("@microsoft.graph.downloadUrl")
|
"download_url": item.get("@microsoft.graph.downloadUrl")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# Check if it's a folder
|
||||||
|
if item.get("folder"):
|
||||||
|
return {
|
||||||
|
"id": file_id,
|
||||||
|
"name": item.get("name", ""),
|
||||||
|
"isFolder": True,
|
||||||
|
}
|
||||||
|
|
||||||
return None
|
return None
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
|
@ -453,6 +471,67 @@ class SharePointConnector(BaseConnector):
|
||||||
logger.error(f"Failed to download file content for {file_id}: {e}")
|
logger.error(f"Failed to download file content for {file_id}: {e}")
|
||||||
raise
|
raise
|
||||||
|
|
||||||
|
async def _list_selected_files(self) -> Dict[str, Any]:
|
||||||
|
"""List only selected files/folders (selective sync)."""
|
||||||
|
files: List[Dict[str, Any]] = []
|
||||||
|
|
||||||
|
# Process selected file IDs
|
||||||
|
if self.cfg.file_ids:
|
||||||
|
for file_id in self.cfg.file_ids:
|
||||||
|
try:
|
||||||
|
file_meta = await self._get_file_metadata_by_id(file_id)
|
||||||
|
if file_meta and not file_meta.get('isFolder', False):
|
||||||
|
files.append(file_meta)
|
||||||
|
elif file_meta and file_meta.get('isFolder', False):
|
||||||
|
# If it's a folder, expand its contents
|
||||||
|
folder_files = await self._list_folder_contents(file_id)
|
||||||
|
files.extend(folder_files)
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"Failed to get file {file_id}: {e}")
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Process selected folder IDs
|
||||||
|
if self.cfg.folder_ids:
|
||||||
|
for folder_id in self.cfg.folder_ids:
|
||||||
|
try:
|
||||||
|
folder_files = await self._list_folder_contents(folder_id)
|
||||||
|
files.extend(folder_files)
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"Failed to list folder {folder_id}: {e}")
|
||||||
|
continue
|
||||||
|
|
||||||
|
return {"files": files, "next_page_token": None}
|
||||||
|
|
||||||
|
async def _list_folder_contents(self, folder_id: str) -> List[Dict[str, Any]]:
|
||||||
|
"""List all files in a folder recursively."""
|
||||||
|
files: List[Dict[str, Any]] = []
|
||||||
|
|
||||||
|
try:
|
||||||
|
site_info = self._parse_sharepoint_url()
|
||||||
|
if site_info:
|
||||||
|
url = f"{self._graph_base_url}/sites/{site_info['host_name']}:/sites/{site_info['site_name']}:/drive/items/{folder_id}/children"
|
||||||
|
else:
|
||||||
|
url = f"{self._graph_base_url}/me/drive/items/{folder_id}/children"
|
||||||
|
|
||||||
|
params = dict(self._default_params)
|
||||||
|
|
||||||
|
response = await self._make_graph_request(url, params=params)
|
||||||
|
data = response.json()
|
||||||
|
|
||||||
|
items = data.get("value", [])
|
||||||
|
for item in items:
|
||||||
|
if item.get("file"): # It's a file
|
||||||
|
file_meta = await self._get_file_metadata_by_id(item.get("id"))
|
||||||
|
if file_meta:
|
||||||
|
files.append(file_meta)
|
||||||
|
elif item.get("folder"): # It's a subfolder, recurse
|
||||||
|
subfolder_files = await self._list_folder_contents(item.get("id"))
|
||||||
|
files.extend(subfolder_files)
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Failed to list folder contents for {folder_id}: {e}")
|
||||||
|
|
||||||
|
return files
|
||||||
|
|
||||||
async def _download_file_from_url(self, download_url: str) -> bytes:
|
async def _download_file_from_url(self, download_url: str) -> bytes:
|
||||||
"""Download file content from direct download URL"""
|
"""Download file content from direct download URL"""
|
||||||
try:
|
try:
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue