diff --git a/common/constants.py b/common/constants.py index 1c3404786..d9e75f66a 100644 --- a/common/constants.py +++ b/common/constants.py @@ -119,6 +119,7 @@ class FileSource(StrEnum): SLACK = "slack" TEAMS = "teams" MOODLE = "moodle" + DROPBOX = "dropbox" class PipelineTaskType(StrEnum): diff --git a/common/data_source/dropbox_connector.py b/common/data_source/dropbox_connector.py index 97a4f0b60..0a0a3c2de 100644 --- a/common/data_source/dropbox_connector.py +++ b/common/data_source/dropbox_connector.py @@ -105,7 +105,7 @@ class DropboxConnector(LoadConnector, PollConnector): modified_time = modified_time.astimezone(timezone.utc) time_as_seconds = modified_time.timestamp() - if start is not None and time_as_seconds < start: + if start is not None and time_as_seconds <= start: continue if end is not None and time_as_seconds > end: continue diff --git a/rag/svr/sync_data_source.py b/rag/svr/sync_data_source.py index b29ad15de..bc9412205 100644 --- a/rag/svr/sync_data_source.py +++ b/rag/svr/sync_data_source.py @@ -37,7 +37,7 @@ from api.db.services.connector_service import ConnectorService, SyncLogsService from api.db.services.knowledgebase_service import KnowledgebaseService from common import settings from common.config_utils import show_configs -from common.data_source import BlobStorageConnector, NotionConnector, DiscordConnector, GoogleDriveConnector, MoodleConnector, JiraConnector +from common.data_source import BlobStorageConnector, NotionConnector, DiscordConnector, GoogleDriveConnector, MoodleConnector, JiraConnector, DropboxConnector from common.constants import FileSource, TaskStatus from common.data_source.config import INDEX_BATCH_SIZE from common.data_source.confluence_connector import ConfluenceConnector @@ -211,6 +211,27 @@ class Gmail(SyncBase): pass +class Dropbox(SyncBase): + SOURCE_NAME: str = FileSource.DROPBOX + + async def _generate(self, task: dict): + self.connector = DropboxConnector(batch_size=self.conf.get("batch_size", INDEX_BATCH_SIZE)) + self.connector.load_credentials(self.conf["credentials"]) + + if task["reindex"] == "1" or not task["poll_range_start"]: + document_generator = self.connector.load_from_state() + begin_info = "totally" + else: + poll_start = task["poll_range_start"] + document_generator = self.connector.poll_source( + poll_start.timestamp(), datetime.now(timezone.utc).timestamp() + ) + begin_info = f"from {poll_start}" + + logging.info(f"[Dropbox] Connect to Dropbox {begin_info}") + return document_generator + + class GoogleDrive(SyncBase): SOURCE_NAME: str = FileSource.GOOGLE_DRIVE @@ -454,7 +475,8 @@ func_factory = { FileSource.SHAREPOINT: SharePoint, FileSource.SLACK: Slack, FileSource.TEAMS: Teams, - FileSource.MOODLE: Moodle + FileSource.MOODLE: Moodle, + FileSource.DROPBOX: Dropbox, } diff --git a/web/src/assets/svg/data-source/dropbox.svg b/web/src/assets/svg/data-source/dropbox.svg new file mode 100644 index 000000000..2890b48af --- /dev/null +++ b/web/src/assets/svg/data-source/dropbox.svg @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/web/src/locales/en.ts b/web/src/locales/en.ts index 44eff8144..233a0d1fc 100644 --- a/web/src/locales/en.ts +++ b/web/src/locales/en.ts @@ -742,6 +742,10 @@ Example: https://fsn1.your-objectstorage.com`, 'Comma-separated emails whose "My Drive" contents should be indexed (include the primary admin).', google_driveSharedFoldersTip: 'Comma-separated Google Drive folder links to crawl.', + dropboxDescription: + 'Connect your Dropbox to sync files and folders from a chosen account.', + dropboxAccessTokenTip: + 'Generate a long-lived access token in the Dropbox App Console with files.metadata.read, files.content.read, and sharing.read scopes.', moodleDescription: 'Connect to your Moodle LMS to sync course content, forums, and resources.', moodleUrlTip: diff --git a/web/src/locales/zh.ts b/web/src/locales/zh.ts index d2f4b1d16..46db2c2c3 100644 --- a/web/src/locales/zh.ts +++ b/web/src/locales/zh.ts @@ -722,6 +722,9 @@ General:实体和关系提取提示来自 GitHub - microsoft/graphrag:基于 '需要索引其 “我的云端硬盘” 的邮箱,多个邮箱用逗号分隔(建议包含管理员)。', google_driveSharedFoldersTip: '需要同步的 Google Drive 文件夹链接,多个链接用逗号分隔。', + dropboxDescription: '连接 Dropbox,同步指定账号下的文件与文件夹。', + dropboxAccessTokenTip: + '请在 Dropbox App Console 生成 Access Token,并勾选 files.metadata.read、files.content.read、sharing.read 等必要权限。', jiraDescription: '接入 Jira 工作区,持续同步Issues、评论与附件。', jiraBaseUrlTip: 'Jira 的 Base URL,例如:https://your-domain.atlassian.net。', diff --git a/web/src/pages/user-setting/data-source/contant.tsx b/web/src/pages/user-setting/data-source/contant.tsx index cc45ad869..a39614177 100644 --- a/web/src/pages/user-setting/data-source/contant.tsx +++ b/web/src/pages/user-setting/data-source/contant.tsx @@ -12,6 +12,7 @@ export enum DataSourceKey { MOODLE = 'moodle', // GMAIL = 'gmail', JIRA = 'jira', + DROPBOX = 'dropbox', // SHAREPOINT = 'sharepoint', // SLACK = 'slack', // TEAMS = 'teams', @@ -53,6 +54,11 @@ export const DataSourceInfo = { description: t(`setting.${DataSourceKey.JIRA}Description`), icon: , }, + [DataSourceKey.DROPBOX]: { + name: 'Dropbox', + description: t(`setting.${DataSourceKey.DROPBOX}Description`), + icon: , + }, }; export const DataSourceFormBaseFields = [ @@ -408,6 +414,22 @@ export const DataSourceFormFields = { tooltip: t('setting.jiraPasswordTip'), }, ], + [DataSourceKey.DROPBOX]: [ + { + label: 'Access Token', + name: 'config.credentials.dropbox_access_token', + type: FormFieldType.Password, + required: true, + tooltip: t('setting.dropboxAccessTokenTip'), + }, + { + label: 'Batch Size', + name: 'config.batch_size', + type: FormFieldType.Number, + required: false, + placeholder: 'Defaults to 2', + }, + ], }; export const DataSourceFormDefaultValues = { @@ -508,4 +530,14 @@ export const DataSourceFormDefaultValues = { }, }, }, + [DataSourceKey.DROPBOX]: { + name: '', + source: DataSourceKey.DROPBOX, + config: { + batch_size: 2, + credentials: { + dropbox_access_token: '', + }, + }, + }, }; diff --git a/web/src/pages/user-setting/data-source/index.tsx b/web/src/pages/user-setting/data-source/index.tsx index 2ba7cecd0..6fc3bf9e0 100644 --- a/web/src/pages/user-setting/data-source/index.tsx +++ b/web/src/pages/user-setting/data-source/index.tsx @@ -56,6 +56,12 @@ const dataSourceTemplates = [ description: DataSourceInfo[DataSourceKey.JIRA].description, icon: DataSourceInfo[DataSourceKey.JIRA].icon, }, + { + id: DataSourceKey.DROPBOX, + name: DataSourceInfo[DataSourceKey.DROPBOX].name, + description: DataSourceInfo[DataSourceKey.DROPBOX].description, + icon: DataSourceInfo[DataSourceKey.DROPBOX].icon, + }, ]; const DataSource = () => { const { t } = useTranslation();