From 275a89a675fdd3a2594a02db35860b3f05f8927f Mon Sep 17 00:00:00 2001 From: Eric Hare Date: Thu, 25 Sep 2025 13:37:28 -0700 Subject: [PATCH] Fix list_files interface for connectors --- src/connectors/google_drive/connector.py | 7 ++----- src/connectors/onedrive/connector.py | 15 +++++++++++---- src/connectors/sharepoint/connector.py | 15 +++++++++------ 3 files changed, 22 insertions(+), 15 deletions(-) diff --git a/src/connectors/google_drive/connector.py b/src/connectors/google_drive/connector.py index 0aa4234a..37ebdc8a 100644 --- a/src/connectors/google_drive/connector.py +++ b/src/connectors/google_drive/connector.py @@ -477,7 +477,7 @@ class GoogleDriveConnector(BaseConnector): "next_page_token": None, # no more pages } except Exception as e: - # Optionally log error with your base class logger + # Log the error try: logger.error(f"GoogleDriveConnector.list_files failed: {e}") except Exception: @@ -495,7 +495,6 @@ class GoogleDriveConnector(BaseConnector): try: blob = self._download_file_bytes(meta) except Exception as e: - # Use your base class logger if available try: logger.error(f"Download failed for {file_id}: {e}") except Exception: @@ -562,7 +561,6 @@ class GoogleDriveConnector(BaseConnector): if not self.cfg.changes_page_token: self.cfg.changes_page_token = self.get_start_page_token() except Exception as e: - # Optional: use your base logger try: logger.error(f"Failed to get start page token: {e}") except Exception: @@ -593,7 +591,6 @@ class GoogleDriveConnector(BaseConnector): expiration = result.get("expiration") # Persist in-memory so cleanup can stop this channel later. - # If your project has a persistence layer, save these values there. self._active_channel = { "channel_id": channel_id, "resource_id": resource_id, @@ -803,7 +800,7 @@ class GoogleDriveConnector(BaseConnector): """ Perform a one-shot sync of the currently selected scope and emit documents. - Emits ConnectorDocument instances (adapt to your BaseConnector ingestion). + Emits ConnectorDocument instances """ items = self._iter_selected_items() for meta in items: diff --git a/src/connectors/onedrive/connector.py b/src/connectors/onedrive/connector.py index bea1e790..3ef4bdaf 100644 --- a/src/connectors/onedrive/connector.py +++ b/src/connectors/onedrive/connector.py @@ -23,6 +23,8 @@ class OneDriveConnector(BaseConnector): CONNECTOR_ICON = "onedrive" def __init__(self, config: Dict[str, Any]): + super().__init__(config) + logger.debug(f"OneDrive connector __init__ called with config type: {type(config)}") logger.debug(f"OneDrive connector __init__ config value: {config}") @@ -41,7 +43,7 @@ class OneDriveConnector(BaseConnector): # Initialize with defaults that allow the connector to be listed self.client_id = None self.client_secret = None - self.redirect_uri = config.get("redirect_uri", "http://localhost") # must match your app registration + self.redirect_uri = config.get("redirect_uri", "http://localhost") # Try to get credentials, but don't fail if they're missing try: @@ -100,7 +102,7 @@ class OneDriveConnector(BaseConnector): return f"https://graph.microsoft.com/{self._graph_api_version}" def emit(self, doc: ConnectorDocument) -> None: - """Emit a ConnectorDocument instance (integrate with your pipeline here).""" + """Emit a ConnectorDocument instance.""" logger.debug(f"Emitting OneDrive document: {doc.id} ({doc.filename})") async def authenticate(self) -> bool: @@ -238,7 +240,12 @@ class OneDriveConnector(BaseConnector): expiry = datetime.utcnow() + timedelta(days=3) return expiry.strftime("%Y-%m-%dT%H:%M:%S.%fZ") - async def list_files(self, page_token: Optional[str] = None, max_files: Optional[int] = None) -> Dict[str, Any]: + async def list_files( + self, + page_token: Optional[str] = None, + max_files: Optional[int] = None, + **kwargs + ) -> Dict[str, Any]: """List files from OneDrive using Microsoft Graph.""" try: if not await self.authenticate(): @@ -250,7 +257,7 @@ class OneDriveConnector(BaseConnector): base_url = f"{self._graph_base_url}/me/drive/root/children" params = dict(self._default_params) - params["$top"] = max_files_value + params["$top"] = str(max_files_value) if page_token: params["$skiptoken"] = page_token diff --git a/src/connectors/sharepoint/connector.py b/src/connectors/sharepoint/connector.py index 3b9769e7..f8283062 100644 --- a/src/connectors/sharepoint/connector.py +++ b/src/connectors/sharepoint/connector.py @@ -22,11 +22,10 @@ class SharePointConnector(BaseConnector): CONNECTOR_NAME = "SharePoint" CONNECTOR_DESCRIPTION = "Connect to SharePoint to sync documents and files" CONNECTOR_ICON = "sharepoint" - - def __init__(self, config: Dict[str, Any]): - super().__init__(config) # Fix: Call parent init first def __init__(self, config: Dict[str, Any]): + super().__init__(config) + logger.debug(f"SharePoint connector __init__ called with config type: {type(config)}") logger.debug(f"SharePoint connector __init__ config value: {config}") @@ -110,7 +109,6 @@ class SharePointConnector(BaseConnector): def emit(self, doc: ConnectorDocument) -> None: """ Emit a ConnectorDocument instance. - Override this method to integrate with your ingestion pipeline. """ logger.debug(f"Emitting SharePoint document: {doc.id} ({doc.filename})") @@ -283,7 +281,12 @@ class SharePointConnector(BaseConnector): return None - async def list_files(self, page_token: Optional[str] = None, max_files: Optional[int] = None) -> Dict[str, Any]: + async def list_files( + self, + page_token: Optional[str] = None, + max_files: Optional[int] = None, + **kwargs + ) -> Dict[str, Any]: """List all files using Microsoft Graph API - BaseConnector interface""" try: # Ensure authentication @@ -301,7 +304,7 @@ class SharePointConnector(BaseConnector): base_url = f"{self._graph_base_url}/me/drive/root/children" params = dict(self._default_params) - params["$top"] = max_files_value + params["$top"] = str(max_files_value) if page_token: params["$skiptoken"] = page_token