Merge branch 'main' into add-mcp-agent-flows

2025-10-06 23:17:14 -04:00 · 2025-10-06 23:17:14 -04:00 · bfd0bc2c87
commit bfd0bc2c87
parent eb1acde7df 46c0334c3f
8 changed files with 301 additions and 87 deletions
--- a/frontend/src/app/upload/[provider]/page.tsx
+++ b/frontend/src/app/upload/[provider]/page.tsx
@ -165,7 +165,7 @@ export default function UploadProviderPage() {
  const handleFileSelected = (files: CloudFile[]) => {
    setSelectedFiles(files);
-    console.log(`Selected ${files.length} files from ${provider}:`, files);
+    console.log(`Selected ${files.length} item(s) from ${provider}:`, files);
    // You can add additional handling here like triggering sync, etc.
  };
@ -376,19 +376,19 @@ export default function UploadProviderPage() {
                loading={isIngesting}
                disabled={!hasSelectedFiles || isIngesting}
              >
-                {!hasSelectedFiles ? (
+                {hasSelectedFiles ? (
                  <>Ingest files</>
                ) : (
                  <>
-                    Ingest {selectedFiles.length} file
+                    Ingest {selectedFiles.length} item
                    {selectedFiles.length > 1 ? "s" : ""}
                  </>
                ) : (
                  <>Ingest selected items</>
                )}
              </Button>
            </TooltipTrigger>
            {!hasSelectedFiles ? (
              <TooltipContent side="left">
-                Select at least one file before ingesting
+                Select at least one item before ingesting
              </TooltipContent>
            ) : null}
          </Tooltip>
--- a/frontend/src/components/cloud-connectors-dialog.tsx
+++ b/frontend/src/components/cloud-connectors-dialog.tsx
@ -201,7 +201,7 @@ export function CloudConnectorsDialog({
        <DialogHeader>
          <DialogTitle>Cloud File Connectors</DialogTitle>
          <DialogDescription>
-            Select files from your connected cloud storage providers
+            Select files or folders from your connected cloud storage providers
          </DialogDescription>
        </DialogHeader>
@ -232,7 +232,7 @@ export function CloudConnectorsDialog({
                        !connector.hasAccessToken
                          ? connector.accessTokenError ||
                            "Access token required - try reconnecting your account"
-                          : `Select files from ${connector.name}`
+                          : `Select files or folders from ${connector.name}`
                      }
                      onClick={e => {
                        e.preventDefault();
--- a/frontend/src/components/cloud-picker/file-list.tsx
+++ b/frontend/src/components/cloud-picker/file-list.tsx
@ -26,7 +26,7 @@ export const FileList = ({
  return (
    <div className="space-y-2 relative">
      <div className="flex items-center justify-between">
-        <p className="text-sm font-medium">Added files ({files.length})</p>
+        <p className="text-sm font-medium">Selected items ({files.length})</p>
        <Button
          ignoreTitleCase={true}
          onClick={onClearAll}
--- a/frontend/src/components/cloud-picker/picker-header.tsx
+++ b/frontend/src/components/cloud-picker/picker-header.tsx
@ -39,7 +39,7 @@ export const PickerHeader = ({
    return (
      <div className="text-sm text-muted-foreground p-4 bg-muted/20 rounded-md">
        Please connect to {getProviderName(provider)} first to select specific
-        files.
+        files or folders.
      </div>
    );
  }
@ -48,7 +48,7 @@ export const PickerHeader = ({
    <Card>
      <CardContent className="flex flex-col items-center text-center py-8">
        <p className="text-sm text-primary mb-4">
-          Select files from {getProviderName(provider)} to ingest.
+          Select files or folders from {getProviderName(provider)} to ingest.
        </p>
        <Button
          onClick={onAddFiles}
@ -56,7 +56,7 @@ export const PickerHeader = ({
          className="bg-foreground text-background hover:bg-foreground/90 font-semibold"
        >
          <Plus className="h-4 w-4" />
-          {isPickerOpen ? "Opening picker..." : "Add files"}
+          {isPickerOpen ? "Opening picker..." : "Add files or folders"}
        </Button>
      </CardContent>
    </Card>
--- a/frontend/src/components/cloud-picker/provider-handlers.ts
+++ b/frontend/src/components/cloud-picker/provider-handlers.ts
@ -52,12 +52,16 @@ export class GoogleDriveHandler {
    try {
      this.onPickerStateChange?.(true);
      // Create a view for regular documents
      const docsView = new window.google.picker.DocsView()
        .setIncludeFolders(true)
        .setSelectFolderEnabled(true);
      const picker = new window.google.picker.PickerBuilder()
-        .addView(window.google.picker.ViewId.DOCS)
+        .addView(docsView)
        .addView(window.google.picker.ViewId.FOLDERS)
        .setOAuthToken(this.accessToken)
        .enableFeature(window.google.picker.Feature.MULTISELECT_ENABLED)
-        .setTitle("Select files from Google Drive")
+        .setTitle("Select files or folders from Google Drive")
        .setCallback(data => this.pickerCallback(data, onFileSelected))
        .build();
--- a/src/connectors/google_drive/connector.py
+++ b/src/connectors/google_drive/connector.py
@ -1,21 +1,20 @@
 import io
 import os
 from pathlib import Path
 import time
 from collections import deque
 from dataclasses import dataclass
-from typing import Dict, List, Any, Optional, Iterable, Set
+from pathlib import Path
 from typing import Any, Dict, Iterable, List, Optional, Set
 from googleapiclient.errors import HttpError
 from googleapiclient.http import MediaIoBaseDownload
 from utils.logging_config import get_logger
 logger = get_logger(__name__)
 # Project-specific base types (adjust imports to your project)
 from ..base import BaseConnector, ConnectorDocument, DocumentACL
 from .oauth import GoogleDriveOAuth
 logger = get_logger(__name__)
 # -------------------------
 # Config model
@ -32,8 +31,8 @@ class GoogleDriveConfig:
    recursive: bool = True
    # Shared Drives control
-    drive_id: Optional[str] = None        # when set, we use corpora='drive'
+    drive_id: Optional[str] = None  # when set, we use corpora='drive'
-    corpora: Optional[str] = None         # 'user' | 'drive' | 'domain'; auto-picked if None
+    corpora: Optional[str] = None  # 'user' | 'drive' | 'domain'; auto-picked if None
    # Optional filtering
    include_mime_types: Optional[List[str]] = None
@ -80,7 +79,6 @@ class GoogleDriveConnector(BaseConnector):
    _FILE_ID_ALIASES = ("file_ids", "selected_file_ids", "selected_files")
    _FOLDER_ID_ALIASES = ("folder_ids", "selected_folder_ids", "selected_folders")
    def emit(self, doc: ConnectorDocument) -> None:
        """
        Emit a ConnectorDocument instance.
@ -100,7 +98,9 @@ class GoogleDriveConnector(BaseConnector):
        # Token file default (so callback & workers don’t need to pass it)
        project_root = Path(__file__).resolve().parent.parent.parent.parent
-        token_file = config.get("token_file") or str(project_root / "google_drive_token.json")
+        token_file = config.get("token_file") or str(
            project_root / "google_drive_token.json"
        )
        Path(token_file).parent.mkdir(parents=True, exist_ok=True)
        if not isinstance(client_id, str) or not client_id.strip():
@ -115,7 +115,9 @@ class GoogleDriveConnector(BaseConnector):
            )
        # Normalize incoming IDs from any of the supported alias keys
-        def _first_present_list(cfg: Dict[str, Any], keys: Iterable[str]) -> Optional[List[str]]:
+        def _first_present_list(
            cfg: Dict[str, Any], keys: Iterable[str]
        ) -> Optional[List[str]]:
            for k in keys:
                v = cfg.get(k)
                if v:  # accept non-empty list
@ -151,6 +153,7 @@ class GoogleDriveConnector(BaseConnector):
        # Drive client is built in authenticate()
        from google.oauth2.credentials import Credentials
        self.creds: Optional[Credentials] = None
        self.service: Any = None
@ -214,7 +217,7 @@ class GoogleDriveConnector(BaseConnector):
                        "id, name, mimeType, modifiedTime, createdTime, size, "
                        "webViewLink, parents, owners, driveId"
                    ),
-                    **self._drives_flags,
+                    **self._drives_get_flags,
                )
                .execute()
            )
@ -285,7 +288,9 @@ class GoogleDriveConnector(BaseConnector):
        Fetch metadata for a file by ID (resolving shortcuts).
        """
        if self.service is None:
-            raise RuntimeError("Google Drive service is not initialized. Please authenticate first.")
+            raise RuntimeError(
                "Google Drive service is not initialized. Please authenticate first."
            )
        try:
            meta = (
                self.service.files()
@ -323,24 +328,40 @@ class GoogleDriveConnector(BaseConnector):
    def _iter_selected_items(self) -> List[Dict[str, Any]]:
        """
        Return a de-duplicated list of file metadata for the selected scope:
-          - explicit file_ids
+          - explicit file_ids (automatically expands folders to their contents)
          - items inside folder_ids (with optional recursion)
        Shortcuts are resolved to their targets automatically.
        """
        seen: Set[str] = set()
        items: List[Dict[str, Any]] = []
        folders_to_expand: List[str] = []
-        # Explicit files
+        # Process file_ids: separate actual files from folders
        if self.cfg.file_ids:
            for fid in self.cfg.file_ids:
                meta = self._get_file_meta_by_id(fid)
-                if meta and meta["id"] not in seen:
+                if not meta:
                    continue
                # If it's a folder, add to folders_to_expand instead
                if meta.get("mimeType") == "application/vnd.google-apps.folder":
                    logger.debug(
                        f"Item {fid} ({meta.get('name')}) is a folder, "
                        f"will expand to contents"
                    )
                    folders_to_expand.append(fid)
                elif meta["id"] not in seen:
                    # It's a regular file, add it directly
                    seen.add(meta["id"])
                    items.append(meta)
-        # Folders
+        # Collect all folders to expand (from both file_ids and folder_ids)
        if self.cfg.folder_ids:
-            folder_children = self._bfs_expand_folders(self.cfg.folder_ids)
+            folders_to_expand.extend(self.cfg.folder_ids)
        # Expand all folders to their contents
        if folders_to_expand:
            folder_children = self._bfs_expand_folders(folders_to_expand)
            for meta in folder_children:
                meta = self._resolve_shortcut(meta)
                if meta.get("id") in seen:
@ -357,7 +378,11 @@ class GoogleDriveConnector(BaseConnector):
        items = self._filter_by_mime(items)
        # Exclude folders from final emits:
-        items = [m for m in items if m.get("mimeType") != "application/vnd.google-apps.folder"]
+        items = [
            m
            for m in items
            if m.get("mimeType") != "application/vnd.google-apps.folder"
        ]
        return items
    # -------------------------
@ -389,29 +414,85 @@ class GoogleDriveConnector(BaseConnector):
    def _download_file_bytes(self, file_meta: Dict[str, Any]) -> bytes:
        """
        Download bytes for a given file (exporting if Google-native).
        Raises ValueError if the item is a folder (folders cannot be downloaded).
        """
        file_id = file_meta["id"]
        file_name = file_meta.get("name", "unknown")
        mime_type = file_meta.get("mimeType") or ""
-        # Google-native: export
+        logger.debug(
-        export_mime = self._pick_export_mime(mime_type)
+            f"Downloading file {file_id} ({file_name}) with mimetype: {mime_type}"
-        if mime_type.startswith("application/vnd.google-apps."):
+        )
-            # default fallback if not overridden
+
-            #if not export_mime:
+        # Folders cannot be downloaded or exported - this should never be reached
-            #    export_mime = "application/pdf"
+        # as folders are automatically expanded in _iter_selected_items()
-            export_mime = "application/pdf"
+        if mime_type == "application/vnd.google-apps.folder":
            raise ValueError(
                f"Cannot download folder {file_id} ({file_name}). "
                f"This is a bug - folders should be automatically expanded before download."
            )
        # According to https://stackoverflow.com/questions/65053558/google-drive-api-v3-files-export-method-throws-a-403-error-export-only-support
        # export_media ONLY works for Google Docs Editors files (Docs, Sheets, Slides, Drawings)
        # All other files (including other Google Apps types like Forms, Sites, Maps) must use get_media
        # Define which Google Workspace files are exportable
        exportable_types = {
            "application/vnd.google-apps.document",  # Google Docs
            "application/vnd.google-apps.spreadsheet",  # Google Sheets
            "application/vnd.google-apps.presentation",  # Google Slides
            "application/vnd.google-apps.drawing",  # Google Drawings
        }
        if mime_type in exportable_types:
            # This is an exportable Google Workspace file - must use export_media
            export_mime = self._pick_export_mime(mime_type)
            if not export_mime:
                # Default fallback for unsupported Google native types
                export_mime = "application/pdf"
            logger.debug(
                f"Using export_media for {file_id} ({mime_type} -> {export_mime})"
            )
            # NOTE: export_media does not accept supportsAllDrives/includeItemsFromAllDrives
-            request = self.service.files().export_media(fileId=file_id, mimeType=export_mime)
+            request = self.service.files().export_media(
                fileId=file_id, mimeType=export_mime
            )
        else:
            # This is a regular uploaded file (PDF, image, video, etc.) - use get_media
            # Also handles non-exportable Google Apps files (Forms, Sites, Maps, etc.)
            logger.debug(f"Using get_media for {file_id} ({mime_type})")
            # Binary download (get_media also doesn't accept the Drive flags)
            request = self.service.files().get_media(fileId=file_id)
        # Download the file with error handling for misclassified Google Docs
        fh = io.BytesIO()
        downloader = MediaIoBaseDownload(fh, request, chunksize=1024 * 1024)
        done = False
-        while not done:
+
-            status, done = downloader.next_chunk()
+        try:
-            # Optional: you can log progress via status.progress()
+            while not done:
                status, done = downloader.next_chunk()
                # Optional: you can log progress via status.progress()
        except HttpError as e:
            # If download fails with "fileNotDownloadable", it's a Docs Editor file
            # that wasn't properly detected. Retry with export_media.
            if "fileNotDownloadable" in str(e) and mime_type not in exportable_types:
                logger.warning(
                    f"Download failed for {file_id} ({mime_type}) with fileNotDownloadable error. "
                    f"Retrying with export_media (file might be a Google Doc)"
                )
                export_mime = "application/pdf"
                request = self.service.files().export_media(
                    fileId=file_id, mimeType=export_mime
                )
                fh = io.BytesIO()
                downloader = MediaIoBaseDownload(fh, request, chunksize=1024 * 1024)
                done = False
                while not done:
                    status, done = downloader.next_chunk()
            else:
                raise
        return fh.getvalue()
@ -430,7 +511,9 @@ class GoogleDriveConnector(BaseConnector):
            # If still not authenticated, bail (caller should kick off OAuth init)
            if not await self.oauth.is_authenticated():
-                logger.debug("authenticate: no valid credentials; run OAuth init/callback first.")
+                logger.debug(
                    "authenticate: no valid credentials; run OAuth init/callback first."
                )
                return False
            # Build Drive service from OAuth helper
@ -450,7 +533,7 @@ class GoogleDriveConnector(BaseConnector):
        self,
        page_token: Optional[str] = None,
        max_files: Optional[int] = None,
-        **kwargs
+        **kwargs,
    ) -> Dict[str, Any]:
        """
        List files in the currently selected scope (file_ids/folder_ids/recursive).
@ -487,11 +570,20 @@ class GoogleDriveConnector(BaseConnector):
    async def get_file_content(self, file_id: str) -> ConnectorDocument:
        """
        Fetch a file's metadata and content from Google Drive and wrap it in a ConnectorDocument.
        Raises FileNotFoundError if the ID is a folder (folders cannot be downloaded).
        """
        meta = self._get_file_meta_by_id(file_id)
        if not meta:
            raise FileNotFoundError(f"Google Drive file not found: {file_id}")
        # Check if this is a folder - folders cannot be downloaded
        if meta.get("mimeType") == "application/vnd.google-apps.folder":
            raise FileNotFoundError(
                f"Cannot download folder {file_id} ({meta.get('name')}). "
                f"Folders must be expanded to list their contents. "
                f"This ID should not have been passed to get_file_content()."
            )
        try:
            blob = self._download_file_bytes(meta)
        except Exception as e:
@ -527,7 +619,9 @@ class GoogleDriveConnector(BaseConnector):
            metadata={
                "parents": meta.get("parents"),
                "driveId": meta.get("driveId"),
-                "size": int(meta.get("size", 0)) if str(meta.get("size", "")).isdigit() else None,
+                "size": int(meta.get("size", 0))
                if str(meta.get("size", "")).isdigit()
                else None,
            },
        )
        return doc
@ -546,10 +640,14 @@ class GoogleDriveConnector(BaseConnector):
        # 1) Ensure we are authenticated and have a live Drive service
        ok = await self.authenticate()
        if not ok:
-            raise RuntimeError("GoogleDriveConnector.setup_subscription: not authenticated")
+            raise RuntimeError(
                "GoogleDriveConnector.setup_subscription: not authenticated"
            )
        # 2) Resolve webhook address (no param in ABC, so pull from config/env)
-        webhook_address = getattr(self.cfg, "webhook_address", None) or os.getenv("GOOGLE_DRIVE_WEBHOOK_URL")
+        webhook_address = getattr(self.cfg, "webhook_address", None) or os.getenv(
            "GOOGLE_DRIVE_WEBHOOK_URL"
        )
        if not webhook_address:
            raise RuntimeError(
                "GoogleDriveConnector.setup_subscription: webhook URL not configured. "
@ -600,7 +698,9 @@ class GoogleDriveConnector(BaseConnector):
            }
            if not isinstance(channel_id, str) or not channel_id:
-                raise RuntimeError(f"Drive watch returned invalid channel id: {channel_id!r}")
+                raise RuntimeError(
                    f"Drive watch returned invalid channel id: {channel_id!r}"
                )
            return channel_id
@ -665,13 +765,20 @@ class GoogleDriveConnector(BaseConnector):
            return False
        try:
-            self.service.channels().stop(body={"id": subscription_id, "resourceId": resource_id}).execute()
+            self.service.channels().stop(
                body={"id": subscription_id, "resourceId": resource_id}
            ).execute()
            # 4) Clear local bookkeeping
-            if getattr(self, "_active_channel", None) and self._active_channel.get("channel_id") == subscription_id:
+            if (
                getattr(self, "_active_channel", None)
                and self._active_channel.get("channel_id") == subscription_id
            ):
                self._active_channel = {}
-            if hasattr(self, "_subscriptions") and isinstance(self._subscriptions, dict):
+            if hasattr(self, "_subscriptions") and isinstance(
                self._subscriptions, dict
            ):
                self._subscriptions.pop(subscription_id, None)
            return True
@ -722,7 +829,9 @@ class GoogleDriveConnector(BaseConnector):
            except Exception as e:
                selected_ids = set()
                try:
-                    logger.error(f"handle_webhook: scope build failed, proceeding unfiltered: {e}")
+                    logger.error(
                        f"handle_webhook: scope build failed, proceeding unfiltered: {e}"
                    )
                except Exception:
                    pass
@ -759,7 +868,11 @@ class GoogleDriveConnector(BaseConnector):
                    # Filter to our selected scope if we have one; otherwise accept all
                    if selected_ids and (rid not in selected_ids):
                        # Shortcut target might be in scope even if the shortcut isn't
-                        tgt = fobj.get("shortcutDetails", {}).get("targetId") if fobj else None
+                        tgt = (
                            fobj.get("shortcutDetails", {}).get("targetId")
                            if fobj
                            else None
                        )
                        if not (tgt and tgt in selected_ids):
                            continue
@ -808,7 +921,9 @@ class GoogleDriveConnector(BaseConnector):
                blob = self._download_file_bytes(meta)
            except HttpError as e:
                # Skip/record failures
-                logger.error(f"Failed to download {meta.get('name')} ({meta.get('id')}): {e}")
+                logger.error(
                    f"Failed to download {meta.get('name')} ({meta.get('id')}): {e}"
                )
                continue
            from datetime import datetime
@ -838,7 +953,9 @@ class GoogleDriveConnector(BaseConnector):
                    "webViewLink": meta.get("webViewLink"),
                    "parents": meta.get("parents"),
                    "driveId": meta.get("driveId"),
-                    "size": int(meta.get("size", 0)) if str(meta.get("size", "")).isdigit() else None,
+                    "size": int(meta.get("size", 0))
                    if str(meta.get("size", "")).isdigit()
                    else None,
                },
                content=blob,
            )
@ -849,7 +966,9 @@ class GoogleDriveConnector(BaseConnector):
    # -------------------------
    def get_start_page_token(self) -> str:
        # getStartPageToken accepts supportsAllDrives (not includeItemsFromAllDrives)
-        resp = self.service.changes().getStartPageToken(**self._drives_get_flags).execute()
+        resp = (
            self.service.changes().getStartPageToken(**self._drives_get_flags).execute()
        )
        return resp["startPageToken"]
    def poll_changes_and_sync(self) -> Optional[str]:
@ -888,7 +1007,10 @@ class GoogleDriveConnector(BaseConnector):
                # Match scope
                if fid not in selected_ids:
                    # also consider shortcut target
-                    if file_obj.get("mimeType") == "application/vnd.google-apps.shortcut":
+                    if (
                        file_obj.get("mimeType")
                        == "application/vnd.google-apps.shortcut"
                    ):
                        tgt = file_obj.get("shortcutDetails", {}).get("targetId")
                        if tgt and tgt in selected_ids:
                            pass
@ -923,7 +1045,10 @@ class GoogleDriveConnector(BaseConnector):
                    modified_time=parse_datetime(resolved.get("modifiedTime")),
                    mimetype=str(resolved.get("mimeType", "")),
                    acl=DocumentACL(),  # Set appropriate ACL if needed
-                    metadata={"parents": resolved.get("parents"), "driveId": resolved.get("driveId")},
+                    metadata={
                        "parents": resolved.get("parents"),
                        "driveId": resolved.get("driveId"),
                    },
                    content=blob,
                )
                self.emit(doc)
@ -945,7 +1070,9 @@ class GoogleDriveConnector(BaseConnector):
    # -------------------------
    # Optional: webhook stubs
    # -------------------------
-    def build_watch_body(self, webhook_address: str, channel_id: Optional[str] = None) -> Dict[str, Any]:
+    def build_watch_body(
        self, webhook_address: str, channel_id: Optional[str] = None
    ) -> Dict[str, Any]:
        """
        Prepare the request body for changes.watch if you use webhooks.
        """
@ -964,7 +1091,7 @@ class GoogleDriveConnector(BaseConnector):
        body = self.build_watch_body(webhook_address)
        result = (
            self.service.changes()
-            .watch(pageToken=page_token, body=body, **self._drives_flags)
+            .watch(pageToken=page_token, body=body, **self._drives_get_flags)
            .execute()
        )
        return result
@ -974,7 +1101,9 @@ class GoogleDriveConnector(BaseConnector):
        Stop a previously started webhook watch.
        """
        try:
-            self.service.channels().stop(body={"id": channel_id, "resourceId": resource_id}).execute()
+            self.service.channels().stop(
                body={"id": channel_id, "resourceId": resource_id}
            ).execute()
            return True
        except HttpError as e:
--- a/src/connectors/langflow_connector_service.py
+++ b/src/connectors/langflow_connector_service.py
@ -1,5 +1,3 @@
 import os
 import tempfile
 from typing import Any, Dict, List, Optional
 # Create custom processor for connector files using Langflow
@ -60,14 +58,14 @@ class LangflowConnectorService:
        # Create temporary file from document content
        with auto_cleanup_tempfile(suffix=suffix) as tmp_path:
            # Write document content to temp file
-            with open(tmp_path, 'wb') as f:
+            with open(tmp_path, "wb") as f:
                f.write(document.content)
            # Step 1: Upload file to Langflow
            logger.debug("Uploading file to Langflow", filename=document.filename)
            content = document.content
            file_tuple = (
-                document.filename.replace(" ", "_").replace("/", "_")+suffix,
+                document.filename.replace(" ", "_").replace("/", "_") + suffix,
                content,
                document.mimetype or "application/octet-stream",
            )
@ -256,7 +254,10 @@ class LangflowConnectorService:
        file_ids: List[str],
        jwt_token: str = None,
    ) -> str:
-        """Sync specific files by their IDs using Langflow processing"""
+        """
        Sync specific files by their IDs using Langflow processing.
        Automatically expands folders to their contents.
        """
        if not self.task_service:
            raise ValueError(
                "TaskService not available - connector sync requires task service dependency"
@ -279,10 +280,50 @@ class LangflowConnectorService:
        owner_name = user.name if user else None
        owner_email = user.email if user else None
        # Temporarily set file_ids in the connector's config so list_files() can use them
        # Store the original values to restore later
        cfg = getattr(connector, "cfg", None)
        original_file_ids = None
        original_folder_ids = None
        if cfg is not None:
            original_file_ids = getattr(cfg, "file_ids", None)
            original_folder_ids = getattr(cfg, "folder_ids", None)
        try:
            # Set the file_ids we want to sync in the connector's config
            if cfg is not None:
                cfg.file_ids = file_ids  # type: ignore
                cfg.folder_ids = None  # type: ignore
            # Get the expanded list of file IDs (folders will be expanded to their contents)
            # This uses the connector's list_files() which calls _iter_selected_items()
            result = await connector.list_files()
            expanded_file_ids = [f["id"] for f in result.get("files", [])]
            if not expanded_file_ids:
                logger.warning(
                    f"No files found after expanding file_ids. "
                    f"Original IDs: {file_ids}. This may indicate all IDs were folders "
                    f"with no contents, or files that were filtered out."
                )
                # Return empty task rather than failing
                raise ValueError("No files to sync after expanding folders")
        except Exception as e:
            logger.error(f"Failed to expand file_ids via list_files(): {e}")
            # Fallback to original file_ids if expansion fails
            expanded_file_ids = file_ids
        finally:
            # Restore original config values
            if cfg is not None:
                cfg.file_ids = original_file_ids  # type: ignore
                cfg.folder_ids = original_folder_ids  # type: ignore
        processor = LangflowConnectorFileProcessor(
            self,
            connection_id,
-            file_ids,
+            expanded_file_ids,
            user_id,
            jwt_token=jwt_token,
            owner_name=owner_name,
@ -291,7 +332,7 @@ class LangflowConnectorService:
        # Create custom task using TaskService
        task_id = await self.task_service.create_custom_task(
-            user_id, file_ids, processor
+            user_id, expanded_file_ids, processor
        )
        return task_id
--- a/src/connectors/service.py
+++ b/src/connectors/service.py
@ -1,16 +1,11 @@
-import tempfile
+from typing import Any, Dict, List, Optional
 import os
 from typing import Dict, Any, List, Optional
 from .base import BaseConnector, ConnectorDocument
 from utils.logging_config import get_logger
-logger = get_logger(__name__)
+from .base import BaseConnector, ConnectorDocument
 from .google_drive import GoogleDriveConnector
 from .sharepoint import SharePointConnector
 from .onedrive import OneDriveConnector
 from .connection_manager import ConnectionManager
 logger = get_logger(__name__)
@ -56,9 +51,11 @@ class ConnectorService:
        # Create temporary file from document content
        from utils.file_utils import auto_cleanup_tempfile
-        with auto_cleanup_tempfile(suffix=self._get_file_extension(document.mimetype)) as tmp_path:
+        with auto_cleanup_tempfile(
            suffix=self._get_file_extension(document.mimetype)
        ) as tmp_path:
            # Write document content to temp file
-            with open(tmp_path, 'wb') as f:
+            with open(tmp_path, "wb") as f:
                f.write(document.content)
            # Use existing process_file_common function with connector document metadata
@ -71,6 +68,7 @@ class ConnectorService:
            # Process using consolidated processing pipeline
            from models.processors import TaskProcessor
            processor = TaskProcessor(document_service=doc_service)
            result = await processor.process_document_standard(
                file_path=tmp_path,
@ -301,7 +299,10 @@ class ConnectorService:
        file_ids: List[str],
        jwt_token: str = None,
    ) -> str:
-        """Sync specific files by their IDs (used for webhook-triggered syncs)"""
+        """
        Sync specific files by their IDs (used for webhook-triggered syncs or manual selection).
        Automatically expands folders to their contents.
        """
        if not self.task_service:
            raise ValueError(
                "TaskService not available - connector sync requires task service dependency"
@ -324,14 +325,53 @@ class ConnectorService:
        owner_name = user.name if user else None
        owner_email = user.email if user else None
        # Temporarily set file_ids in the connector's config so list_files() can use them
        # Store the original values to restore later
        original_file_ids = None
        original_folder_ids = None
        if hasattr(connector, "cfg"):
            original_file_ids = getattr(connector.cfg, "file_ids", None)
            original_folder_ids = getattr(connector.cfg, "folder_ids", None)
        try:
            # Set the file_ids we want to sync in the connector's config
            if hasattr(connector, "cfg"):
                connector.cfg.file_ids = file_ids  # type: ignore
                connector.cfg.folder_ids = None  # type: ignore
            # Get the expanded list of file IDs (folders will be expanded to their contents)
            # This uses the connector's list_files() which calls _iter_selected_items()
            result = await connector.list_files()
            expanded_file_ids = [f["id"] for f in result.get("files", [])]
            if not expanded_file_ids:
                logger.warning(
                    f"No files found after expanding file_ids. "
                    f"Original IDs: {file_ids}. This may indicate all IDs were folders "
                    f"with no contents, or files that were filtered out."
                )
                # Return empty task rather than failing
                raise ValueError("No files to sync after expanding folders")
        except Exception as e:
            logger.error(f"Failed to expand file_ids via list_files(): {e}")
            # Fallback to original file_ids if expansion fails
            expanded_file_ids = file_ids
        finally:
            # Restore original config values
            if hasattr(connector, "cfg"):
                connector.cfg.file_ids = original_file_ids  # type: ignore
                connector.cfg.folder_ids = original_folder_ids  # type: ignore
        # Create custom processor for specific connector files
        from models.processors import ConnectorFileProcessor
-        # We'll pass file_ids as the files_info, the processor will handle ID-only files
+        # Use expanded_file_ids which has folders already expanded
        processor = ConnectorFileProcessor(
            self,
            connection_id,
-            file_ids,
+            expanded_file_ids,
            user_id,
            jwt_token=jwt_token,
            owner_name=owner_name,
@ -340,7 +380,7 @@ class ConnectorService:
        # Create custom task using TaskService
        task_id = await self.task_service.create_custom_task(
-            user_id, file_ids, processor
+            user_id, expanded_file_ids, processor
        )
        return task_id