Fix list_files interface for connectors

This commit is contained in:
Eric Hare 2025-09-25 13:37:28 -07:00
parent f03889a2b3
commit 275a89a675
No known key found for this signature in database
GPG key ID: A73DF73724270AB7
3 changed files with 22 additions and 15 deletions

View file

@ -477,7 +477,7 @@ class GoogleDriveConnector(BaseConnector):
"next_page_token": None, # no more pages "next_page_token": None, # no more pages
} }
except Exception as e: except Exception as e:
# Optionally log error with your base class logger # Log the error
try: try:
logger.error(f"GoogleDriveConnector.list_files failed: {e}") logger.error(f"GoogleDriveConnector.list_files failed: {e}")
except Exception: except Exception:
@ -495,7 +495,6 @@ class GoogleDriveConnector(BaseConnector):
try: try:
blob = self._download_file_bytes(meta) blob = self._download_file_bytes(meta)
except Exception as e: except Exception as e:
# Use your base class logger if available
try: try:
logger.error(f"Download failed for {file_id}: {e}") logger.error(f"Download failed for {file_id}: {e}")
except Exception: except Exception:
@ -562,7 +561,6 @@ class GoogleDriveConnector(BaseConnector):
if not self.cfg.changes_page_token: if not self.cfg.changes_page_token:
self.cfg.changes_page_token = self.get_start_page_token() self.cfg.changes_page_token = self.get_start_page_token()
except Exception as e: except Exception as e:
# Optional: use your base logger
try: try:
logger.error(f"Failed to get start page token: {e}") logger.error(f"Failed to get start page token: {e}")
except Exception: except Exception:
@ -593,7 +591,6 @@ class GoogleDriveConnector(BaseConnector):
expiration = result.get("expiration") expiration = result.get("expiration")
# Persist in-memory so cleanup can stop this channel later. # Persist in-memory so cleanup can stop this channel later.
# If your project has a persistence layer, save these values there.
self._active_channel = { self._active_channel = {
"channel_id": channel_id, "channel_id": channel_id,
"resource_id": resource_id, "resource_id": resource_id,
@ -803,7 +800,7 @@ class GoogleDriveConnector(BaseConnector):
""" """
Perform a one-shot sync of the currently selected scope and emit documents. Perform a one-shot sync of the currently selected scope and emit documents.
Emits ConnectorDocument instances (adapt to your BaseConnector ingestion). Emits ConnectorDocument instances
""" """
items = self._iter_selected_items() items = self._iter_selected_items()
for meta in items: for meta in items:

View file

@ -23,6 +23,8 @@ class OneDriveConnector(BaseConnector):
CONNECTOR_ICON = "onedrive" CONNECTOR_ICON = "onedrive"
def __init__(self, config: Dict[str, Any]): def __init__(self, config: Dict[str, Any]):
super().__init__(config)
logger.debug(f"OneDrive connector __init__ called with config type: {type(config)}") logger.debug(f"OneDrive connector __init__ called with config type: {type(config)}")
logger.debug(f"OneDrive connector __init__ config value: {config}") logger.debug(f"OneDrive connector __init__ config value: {config}")
@ -41,7 +43,7 @@ class OneDriveConnector(BaseConnector):
# Initialize with defaults that allow the connector to be listed # Initialize with defaults that allow the connector to be listed
self.client_id = None self.client_id = None
self.client_secret = None self.client_secret = None
self.redirect_uri = config.get("redirect_uri", "http://localhost") # must match your app registration self.redirect_uri = config.get("redirect_uri", "http://localhost")
# Try to get credentials, but don't fail if they're missing # Try to get credentials, but don't fail if they're missing
try: try:
@ -100,7 +102,7 @@ class OneDriveConnector(BaseConnector):
return f"https://graph.microsoft.com/{self._graph_api_version}" return f"https://graph.microsoft.com/{self._graph_api_version}"
def emit(self, doc: ConnectorDocument) -> None: def emit(self, doc: ConnectorDocument) -> None:
"""Emit a ConnectorDocument instance (integrate with your pipeline here).""" """Emit a ConnectorDocument instance."""
logger.debug(f"Emitting OneDrive document: {doc.id} ({doc.filename})") logger.debug(f"Emitting OneDrive document: {doc.id} ({doc.filename})")
async def authenticate(self) -> bool: async def authenticate(self) -> bool:
@ -238,7 +240,12 @@ class OneDriveConnector(BaseConnector):
expiry = datetime.utcnow() + timedelta(days=3) expiry = datetime.utcnow() + timedelta(days=3)
return expiry.strftime("%Y-%m-%dT%H:%M:%S.%fZ") return expiry.strftime("%Y-%m-%dT%H:%M:%S.%fZ")
async def list_files(self, page_token: Optional[str] = None, max_files: Optional[int] = None) -> Dict[str, Any]: async def list_files(
self,
page_token: Optional[str] = None,
max_files: Optional[int] = None,
**kwargs
) -> Dict[str, Any]:
"""List files from OneDrive using Microsoft Graph.""" """List files from OneDrive using Microsoft Graph."""
try: try:
if not await self.authenticate(): if not await self.authenticate():
@ -250,7 +257,7 @@ class OneDriveConnector(BaseConnector):
base_url = f"{self._graph_base_url}/me/drive/root/children" base_url = f"{self._graph_base_url}/me/drive/root/children"
params = dict(self._default_params) params = dict(self._default_params)
params["$top"] = max_files_value params["$top"] = str(max_files_value)
if page_token: if page_token:
params["$skiptoken"] = page_token params["$skiptoken"] = page_token

View file

@ -24,9 +24,8 @@ class SharePointConnector(BaseConnector):
CONNECTOR_ICON = "sharepoint" CONNECTOR_ICON = "sharepoint"
def __init__(self, config: Dict[str, Any]): def __init__(self, config: Dict[str, Any]):
super().__init__(config) # Fix: Call parent init first super().__init__(config)
def __init__(self, config: Dict[str, Any]):
logger.debug(f"SharePoint connector __init__ called with config type: {type(config)}") logger.debug(f"SharePoint connector __init__ called with config type: {type(config)}")
logger.debug(f"SharePoint connector __init__ config value: {config}") logger.debug(f"SharePoint connector __init__ config value: {config}")
@ -110,7 +109,6 @@ class SharePointConnector(BaseConnector):
def emit(self, doc: ConnectorDocument) -> None: def emit(self, doc: ConnectorDocument) -> None:
""" """
Emit a ConnectorDocument instance. Emit a ConnectorDocument instance.
Override this method to integrate with your ingestion pipeline.
""" """
logger.debug(f"Emitting SharePoint document: {doc.id} ({doc.filename})") logger.debug(f"Emitting SharePoint document: {doc.id} ({doc.filename})")
@ -283,7 +281,12 @@ class SharePointConnector(BaseConnector):
return None return None
async def list_files(self, page_token: Optional[str] = None, max_files: Optional[int] = None) -> Dict[str, Any]: async def list_files(
self,
page_token: Optional[str] = None,
max_files: Optional[int] = None,
**kwargs
) -> Dict[str, Any]:
"""List all files using Microsoft Graph API - BaseConnector interface""" """List all files using Microsoft Graph API - BaseConnector interface"""
try: try:
# Ensure authentication # Ensure authentication
@ -301,7 +304,7 @@ class SharePointConnector(BaseConnector):
base_url = f"{self._graph_base_url}/me/drive/root/children" base_url = f"{self._graph_base_url}/me/drive/root/children"
params = dict(self._default_params) params = dict(self._default_params)
params["$top"] = max_files_value params["$top"] = str(max_files_value)
if page_token: if page_token:
params["$skiptoken"] = page_token params["$skiptoken"] = page_token