From 4d8698624c9bb65ff590685ff4c2c5a55cc6f48d Mon Sep 17 00:00:00 2001 From: writinwaters <93570324+writinwaters@users.noreply.github.com> Date: Mon, 24 Nov 2025 17:38:04 +0800 Subject: [PATCH 1/3] Docs: Updated use_kg and toc_enhance switch descriptions (#11485) ### What problem does this PR solve? ### Type of change - [x] Documentation Update --- docs/references/http_api_reference.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/docs/references/http_api_reference.md b/docs/references/http_api_reference.md index 253745432..3c73cf58c 100644 --- a/docs/references/http_api_reference.md +++ b/docs/references/http_api_reference.md @@ -2122,9 +2122,9 @@ curl --request POST \ - `"top_k"`: (*Body parameter*), `integer` The number of chunks engaged in vector cosine computation. Defaults to `1024`. - `"use_kg"`: (*Body parameter*), `boolean` - The search includes text chunks related to the knowledge graph of the selected dataset to handle complex multi-hop queries. Defaults to `False`. + Whether to search chunks related to the generated knowledge graph for multi-hop queries. Defaults to `False`. Before enabling this, ensure you have successfully constructed a knowledge graph for the specified datasets. See [here](https://ragflow.io/docs/dev/construct_knowledge_graph) for details. - `"toc_enhance"`: (*Body parameter*), `boolean` - The search includes table of content enhancement in order to boost rank of relevant chunks. Files parsed with `TOC Enhance` enabled is prerequisite. Defaults to `False`. + Whether to search chunks with extracted table of content. Defaults to `False`. Before enabling this, ensure you have enabled `TOC_Enhance` and successfully extracted table of contents for the specified datasets. See [here](https://ragflow.io/docs/dev/enable_table_of_contents) for details. - `"rerank_id"`: (*Body parameter*), `integer` The ID of the rerank model. - `"keyword"`: (*Body parameter*), `boolean` @@ -2140,8 +2140,8 @@ curl --request POST \ - `"metadata_condition"`: (*Body parameter*), `object` The metadata condition used for filtering chunks: - `"logic"`: (*Body parameter*), `string` - - `"and"` Intersection of the result from each condition (default). - - `"or"` union of the result from each condition. + - `"and"`: Return only results that satisfy *every* condition (default). + - `"or"`: Return results that satisfy *any* condition. - `"conditions"`: (*Body parameter*), `array` A list of metadata filter conditions. - `"name"`: `string` - The metadata field name to filter by, e.g., `"author"`, `"company"`, `"url"`. Ensure this parameter before use. See [Set metadata](../guides/dataset/set_metadata.md) for details. From d5f85482005ce5d798609ef228a782e5361c24cb Mon Sep 17 00:00:00 2001 From: zhipeng <5310853+Ox0400@users.noreply.github.com> Date: Mon, 24 Nov 2025 19:02:08 +0800 Subject: [PATCH 2/3] Allow create super user when start rag server. (#10634) ### What problem does this PR solve? New options for rag server scripts to create the super admin user when start server. ### Type of change - [x] New Feature (non-breaking change which adds functionality) --------- Co-authored-by: Zhichang Yu Co-authored-by: Jin Hai --- api/db/init_data.py | 15 +++++++++------ api/ragflow_server.py | 7 ++++++- docker/entrypoint.sh | 9 ++++++++- 3 files changed, 23 insertions(+), 8 deletions(-) diff --git a/api/db/init_data.py b/api/db/init_data.py index 4a9ad067a..d4873d332 100644 --- a/api/db/init_data.py +++ b/api/db/init_data.py @@ -34,14 +34,17 @@ from common.file_utils import get_project_base_directory from common import settings from api.common.base64 import encode_to_base64 +DEFAULT_SUPERUSER_NICKNAME = os.getenv("DEFAULT_SUPERUSER_NICKNAME", "admin") +DEFAULT_SUPERUSER_EMAIL = os.getenv("DEFAULT_SUPERUSER_EMAIL", "admin@ragflow.io") +DEFAULT_SUPERUSER_PASSWORD = os.getenv("DEFAULT_SUPERUSER_PASSWORD", "admin") -def init_superuser(): +def init_superuser(nickname=DEFAULT_SUPERUSER_NICKNAME, email=DEFAULT_SUPERUSER_EMAIL, password=DEFAULT_SUPERUSER_PASSWORD, role=UserTenantRole.OWNER): user_info = { "id": uuid.uuid1().hex, - "password": encode_to_base64("admin"), - "nickname": "admin", + "password": encode_to_base64(password), + "nickname": nickname, "is_superuser": True, - "email": "admin@ragflow.io", + "email": email, "creator": "system", "status": "1", } @@ -58,7 +61,7 @@ def init_superuser(): "tenant_id": user_info["id"], "user_id": user_info["id"], "invited_by": user_info["id"], - "role": UserTenantRole.OWNER + "role": role } tenant_llm = get_init_tenant_llm(user_info["id"]) @@ -70,7 +73,7 @@ def init_superuser(): UserTenantService.insert(**usr_tenant) TenantLLMService.insert_many(tenant_llm) logging.info( - "Super user initialized. email: admin@ragflow.io, password: admin. Changing the password after login is strongly recommended.") + f"Super user initialized. email: {email}, password: {password}. Changing the password after login is strongly recommended.") chat_mdl = LLMBundle(tenant["id"], LLMType.CHAT, tenant["llm_id"]) msg = chat_mdl.chat(system="", history=[ diff --git a/api/ragflow_server.py b/api/ragflow_server.py index 852372d0b..a2d9d6a6e 100644 --- a/api/ragflow_server.py +++ b/api/ragflow_server.py @@ -37,7 +37,7 @@ from api.db.services.document_service import DocumentService from common.file_utils import get_project_base_directory from common import settings from api.db.db_models import init_database_tables as init_web_db -from api.db.init_data import init_web_data +from api.db.init_data import init_web_data, init_superuser from common.versions import get_ragflow_version from common.config_utils import show_configs from common.mcp_tool_call_conn import shutdown_all_mcp_sessions @@ -109,11 +109,16 @@ if __name__ == '__main__': parser.add_argument( "--debug", default=False, help="debug mode", action="store_true" ) + parser.add_argument( + "--init-superuser", default=False, help="init superuser", action="store_true" + ) args = parser.parse_args() if args.version: print(get_ragflow_version()) sys.exit(0) + if args.init_superuser: + init_superuser() RuntimeConfig.DEBUG = args.debug if RuntimeConfig.DEBUG: logging.info("run on debug mode") diff --git a/docker/entrypoint.sh b/docker/entrypoint.sh index cfde08d0c..a5942c5b8 100755 --- a/docker/entrypoint.sh +++ b/docker/entrypoint.sh @@ -13,6 +13,7 @@ function usage() { echo " --disable-datasync Disables synchronization of datasource workers." echo " --enable-mcpserver Enables the MCP server." echo " --enable-adminserver Enables the Admin server." + echo " --init-superuser Initializes the superuser." echo " --consumer-no-beg= Start range for consumers (if using range-based)." echo " --consumer-no-end= End range for consumers (if using range-based)." echo " --workers= Number of task executors to run (if range is not used)." @@ -24,6 +25,7 @@ function usage() { echo " $0 --disable-webserver --workers=2 --host-id=myhost123" echo " $0 --enable-mcpserver" echo " $0 --enable-adminserver" + echo " $0 --init-superuser" exit 1 } @@ -32,6 +34,7 @@ ENABLE_TASKEXECUTOR=1 # Default to enable task executor ENABLE_DATASYNC=1 ENABLE_MCP_SERVER=0 ENABLE_ADMIN_SERVER=0 # Default close admin server +INIT_SUPERUSER_ARGS="" # Default to not initialize superuser CONSUMER_NO_BEG=0 CONSUMER_NO_END=0 WORKERS=1 @@ -83,6 +86,10 @@ for arg in "$@"; do ENABLE_ADMIN_SERVER=1 shift ;; + --init-superuser) + INIT_SUPERUSER_ARGS="--init-superuser" + shift + ;; --mcp-host=*) MCP_HOST="${arg#*=}" shift @@ -240,7 +247,7 @@ if [[ "${ENABLE_WEBSERVER}" -eq 1 ]]; then echo "Starting ragflow_server..." while true; do - "$PY" api/ragflow_server.py & + "$PY" api/ragflow_server.py ${INIT_SUPERUSER_ARGS} & wait; sleep 1; done & From d1744aaaf3b7ba7f9adf7a8da58e42ef9b9f8558 Mon Sep 17 00:00:00 2001 From: Yongteng Lei Date: Tue, 25 Nov 2025 09:40:03 +0800 Subject: [PATCH 3/3] Feat: add datasource Dropbox (#11488) ### What problem does this PR solve? Add datasource Dropbox. ### Type of change - [x] New Feature (non-breaking change which adds functionality) --- common/constants.py | 1 + common/data_source/config.py | 1 + common/data_source/dropbox_connector.py | 160 ++++++++++++++---- rag/svr/sync_data_source.py | 26 ++- web/src/assets/svg/data-source/dropbox.svg | 1 + web/src/locales/en.ts | 4 + web/src/locales/zh.ts | 3 + .../user-setting/data-source/contant.tsx | 32 ++++ .../pages/user-setting/data-source/index.tsx | 6 + 9 files changed, 197 insertions(+), 37 deletions(-) create mode 100644 web/src/assets/svg/data-source/dropbox.svg diff --git a/common/constants.py b/common/constants.py index 1c3404786..d9e75f66a 100644 --- a/common/constants.py +++ b/common/constants.py @@ -119,6 +119,7 @@ class FileSource(StrEnum): SLACK = "slack" TEAMS = "teams" MOODLE = "moodle" + DROPBOX = "dropbox" class PipelineTaskType(StrEnum): diff --git a/common/data_source/config.py b/common/data_source/config.py index 0c038c6d7..751d1f33c 100644 --- a/common/data_source/config.py +++ b/common/data_source/config.py @@ -50,6 +50,7 @@ class DocumentSource(str, Enum): DISCORD = "discord" MOODLE = "moodle" S3_COMPATIBLE = "s3_compatible" + DROPBOX = "dropbox" class FileOrigin(str, Enum): diff --git a/common/data_source/dropbox_connector.py b/common/data_source/dropbox_connector.py index fd349baa1..0a0a3c2de 100644 --- a/common/data_source/dropbox_connector.py +++ b/common/data_source/dropbox_connector.py @@ -1,13 +1,24 @@ """Dropbox connector""" +import logging +from datetime import timezone from typing import Any from dropbox import Dropbox from dropbox.exceptions import ApiError, AuthError +from dropbox.files import FileMetadata, FolderMetadata -from common.data_source.config import INDEX_BATCH_SIZE -from common.data_source.exceptions import ConnectorValidationError, InsufficientPermissionsError, ConnectorMissingCredentialError +from common.data_source.config import INDEX_BATCH_SIZE, DocumentSource +from common.data_source.exceptions import ( + ConnectorMissingCredentialError, + ConnectorValidationError, + InsufficientPermissionsError, +) from common.data_source.interfaces import LoadConnector, PollConnector, SecondsSinceUnixEpoch +from common.data_source.models import Document, GenerateDocumentsOutput +from common.data_source.utils import get_file_ext + +logger = logging.getLogger(__name__) class DropboxConnector(LoadConnector, PollConnector): @@ -19,29 +30,29 @@ class DropboxConnector(LoadConnector, PollConnector): def load_credentials(self, credentials: dict[str, Any]) -> dict[str, Any] | None: """Load Dropbox credentials""" - try: - access_token = credentials.get("dropbox_access_token") - if not access_token: - raise ConnectorMissingCredentialError("Dropbox access token is required") - - self.dropbox_client = Dropbox(access_token) - return None - except Exception as e: - raise ConnectorMissingCredentialError(f"Dropbox: {e}") + access_token = credentials.get("dropbox_access_token") + if not access_token: + raise ConnectorMissingCredentialError("Dropbox access token is required") + + self.dropbox_client = Dropbox(access_token) + return None def validate_connector_settings(self) -> None: """Validate Dropbox connector settings""" - if not self.dropbox_client: + if self.dropbox_client is None: raise ConnectorMissingCredentialError("Dropbox") - + try: - # Test connection by getting current account info - self.dropbox_client.users_get_current_account() - except (AuthError, ApiError) as e: - if "invalid_access_token" in str(e).lower(): - raise InsufficientPermissionsError("Invalid Dropbox access token") - else: - raise ConnectorValidationError(f"Dropbox validation error: {e}") + self.dropbox_client.files_list_folder(path="", limit=1) + except AuthError as e: + logger.exception("[Dropbox]: Failed to validate Dropbox credentials") + raise ConnectorValidationError(f"Dropbox credential is invalid: {e}") + except ApiError as e: + if e.error is not None and "insufficient_permissions" in str(e.error).lower(): + raise InsufficientPermissionsError("Your Dropbox token does not have sufficient permissions.") + raise ConnectorValidationError(f"Unexpected Dropbox error during validation: {e.user_message_text or e}") + except Exception as e: + raise ConnectorValidationError(f"Unexpected error during Dropbox settings validation: {e}") def _download_file(self, path: str) -> bytes: """Download a single file from Dropbox.""" @@ -54,26 +65,105 @@ class DropboxConnector(LoadConnector, PollConnector): """Create a shared link for a file in Dropbox.""" if self.dropbox_client is None: raise ConnectorMissingCredentialError("Dropbox") - + try: - # Try to get existing shared links first shared_links = self.dropbox_client.sharing_list_shared_links(path=path) if shared_links.links: return shared_links.links[0].url - - # Create a new shared link - link_settings = self.dropbox_client.sharing_create_shared_link_with_settings(path) - return link_settings.url - except Exception: - # Fallback to basic link format - return f"https://www.dropbox.com/home{path}" - def poll_source(self, start: SecondsSinceUnixEpoch, end: SecondsSinceUnixEpoch) -> Any: + link_metadata = self.dropbox_client.sharing_create_shared_link_with_settings(path) + return link_metadata.url + except ApiError as err: + logger.exception(f"[Dropbox]: Failed to create a shared link for {path}: {err}") + return "" + + def _yield_files_recursive( + self, + path: str, + start: SecondsSinceUnixEpoch | None, + end: SecondsSinceUnixEpoch | None, + ) -> GenerateDocumentsOutput: + """Yield files in batches from a specified Dropbox folder, including subfolders.""" + if self.dropbox_client is None: + raise ConnectorMissingCredentialError("Dropbox") + + result = self.dropbox_client.files_list_folder( + path, + limit=self.batch_size, + recursive=False, + include_non_downloadable_files=False, + ) + + while True: + batch: list[Document] = [] + for entry in result.entries: + if isinstance(entry, FileMetadata): + modified_time = entry.client_modified + if modified_time.tzinfo is None: + modified_time = modified_time.replace(tzinfo=timezone.utc) + else: + modified_time = modified_time.astimezone(timezone.utc) + + time_as_seconds = modified_time.timestamp() + if start is not None and time_as_seconds <= start: + continue + if end is not None and time_as_seconds > end: + continue + + try: + downloaded_file = self._download_file(entry.path_display) + except Exception: + logger.exception(f"[Dropbox]: Error downloading file {entry.path_display}") + continue + + batch.append( + Document( + id=f"dropbox:{entry.id}", + blob=downloaded_file, + source=DocumentSource.DROPBOX, + semantic_identifier=entry.name, + extension=get_file_ext(entry.name), + doc_updated_at=modified_time, + size_bytes=entry.size if getattr(entry, "size", None) is not None else len(downloaded_file), + ) + ) + + elif isinstance(entry, FolderMetadata): + yield from self._yield_files_recursive(entry.path_lower, start, end) + + if batch: + yield batch + + if not result.has_more: + break + + result = self.dropbox_client.files_list_folder_continue(result.cursor) + + def poll_source(self, start: SecondsSinceUnixEpoch, end: SecondsSinceUnixEpoch) -> GenerateDocumentsOutput: """Poll Dropbox for recent file changes""" - # Simplified implementation - in production this would handle actual polling - return [] + if self.dropbox_client is None: + raise ConnectorMissingCredentialError("Dropbox") - def load_from_state(self) -> Any: + for batch in self._yield_files_recursive("", start, end): + yield batch + + def load_from_state(self) -> GenerateDocumentsOutput: """Load files from Dropbox state""" - # Simplified implementation - return [] \ No newline at end of file + return self._yield_files_recursive("", None, None) + + +if __name__ == "__main__": + import os + + logging.basicConfig(level=logging.DEBUG) + connector = DropboxConnector() + connector.load_credentials({"dropbox_access_token": os.environ.get("DROPBOX_ACCESS_TOKEN")}) + connector.validate_connector_settings() + document_batches = connector.load_from_state() + try: + first_batch = next(document_batches) + print(f"Loaded {len(first_batch)} documents in first batch.") + for doc in first_batch: + print(f"- {doc.semantic_identifier} ({doc.size_bytes} bytes)") + except StopIteration: + print("No documents available in Dropbox.") diff --git a/rag/svr/sync_data_source.py b/rag/svr/sync_data_source.py index b29ad15de..bc9412205 100644 --- a/rag/svr/sync_data_source.py +++ b/rag/svr/sync_data_source.py @@ -37,7 +37,7 @@ from api.db.services.connector_service import ConnectorService, SyncLogsService from api.db.services.knowledgebase_service import KnowledgebaseService from common import settings from common.config_utils import show_configs -from common.data_source import BlobStorageConnector, NotionConnector, DiscordConnector, GoogleDriveConnector, MoodleConnector, JiraConnector +from common.data_source import BlobStorageConnector, NotionConnector, DiscordConnector, GoogleDriveConnector, MoodleConnector, JiraConnector, DropboxConnector from common.constants import FileSource, TaskStatus from common.data_source.config import INDEX_BATCH_SIZE from common.data_source.confluence_connector import ConfluenceConnector @@ -211,6 +211,27 @@ class Gmail(SyncBase): pass +class Dropbox(SyncBase): + SOURCE_NAME: str = FileSource.DROPBOX + + async def _generate(self, task: dict): + self.connector = DropboxConnector(batch_size=self.conf.get("batch_size", INDEX_BATCH_SIZE)) + self.connector.load_credentials(self.conf["credentials"]) + + if task["reindex"] == "1" or not task["poll_range_start"]: + document_generator = self.connector.load_from_state() + begin_info = "totally" + else: + poll_start = task["poll_range_start"] + document_generator = self.connector.poll_source( + poll_start.timestamp(), datetime.now(timezone.utc).timestamp() + ) + begin_info = f"from {poll_start}" + + logging.info(f"[Dropbox] Connect to Dropbox {begin_info}") + return document_generator + + class GoogleDrive(SyncBase): SOURCE_NAME: str = FileSource.GOOGLE_DRIVE @@ -454,7 +475,8 @@ func_factory = { FileSource.SHAREPOINT: SharePoint, FileSource.SLACK: Slack, FileSource.TEAMS: Teams, - FileSource.MOODLE: Moodle + FileSource.MOODLE: Moodle, + FileSource.DROPBOX: Dropbox, } diff --git a/web/src/assets/svg/data-source/dropbox.svg b/web/src/assets/svg/data-source/dropbox.svg new file mode 100644 index 000000000..2890b48af --- /dev/null +++ b/web/src/assets/svg/data-source/dropbox.svg @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/web/src/locales/en.ts b/web/src/locales/en.ts index 44eff8144..233a0d1fc 100644 --- a/web/src/locales/en.ts +++ b/web/src/locales/en.ts @@ -742,6 +742,10 @@ Example: https://fsn1.your-objectstorage.com`, 'Comma-separated emails whose "My Drive" contents should be indexed (include the primary admin).', google_driveSharedFoldersTip: 'Comma-separated Google Drive folder links to crawl.', + dropboxDescription: + 'Connect your Dropbox to sync files and folders from a chosen account.', + dropboxAccessTokenTip: + 'Generate a long-lived access token in the Dropbox App Console with files.metadata.read, files.content.read, and sharing.read scopes.', moodleDescription: 'Connect to your Moodle LMS to sync course content, forums, and resources.', moodleUrlTip: diff --git a/web/src/locales/zh.ts b/web/src/locales/zh.ts index d2f4b1d16..46db2c2c3 100644 --- a/web/src/locales/zh.ts +++ b/web/src/locales/zh.ts @@ -722,6 +722,9 @@ General:实体和关系提取提示来自 GitHub - microsoft/graphrag:基于 '需要索引其 “我的云端硬盘” 的邮箱,多个邮箱用逗号分隔(建议包含管理员)。', google_driveSharedFoldersTip: '需要同步的 Google Drive 文件夹链接,多个链接用逗号分隔。', + dropboxDescription: '连接 Dropbox,同步指定账号下的文件与文件夹。', + dropboxAccessTokenTip: + '请在 Dropbox App Console 生成 Access Token,并勾选 files.metadata.read、files.content.read、sharing.read 等必要权限。', jiraDescription: '接入 Jira 工作区,持续同步Issues、评论与附件。', jiraBaseUrlTip: 'Jira 的 Base URL,例如:https://your-domain.atlassian.net。', diff --git a/web/src/pages/user-setting/data-source/contant.tsx b/web/src/pages/user-setting/data-source/contant.tsx index cc45ad869..a39614177 100644 --- a/web/src/pages/user-setting/data-source/contant.tsx +++ b/web/src/pages/user-setting/data-source/contant.tsx @@ -12,6 +12,7 @@ export enum DataSourceKey { MOODLE = 'moodle', // GMAIL = 'gmail', JIRA = 'jira', + DROPBOX = 'dropbox', // SHAREPOINT = 'sharepoint', // SLACK = 'slack', // TEAMS = 'teams', @@ -53,6 +54,11 @@ export const DataSourceInfo = { description: t(`setting.${DataSourceKey.JIRA}Description`), icon: , }, + [DataSourceKey.DROPBOX]: { + name: 'Dropbox', + description: t(`setting.${DataSourceKey.DROPBOX}Description`), + icon: , + }, }; export const DataSourceFormBaseFields = [ @@ -408,6 +414,22 @@ export const DataSourceFormFields = { tooltip: t('setting.jiraPasswordTip'), }, ], + [DataSourceKey.DROPBOX]: [ + { + label: 'Access Token', + name: 'config.credentials.dropbox_access_token', + type: FormFieldType.Password, + required: true, + tooltip: t('setting.dropboxAccessTokenTip'), + }, + { + label: 'Batch Size', + name: 'config.batch_size', + type: FormFieldType.Number, + required: false, + placeholder: 'Defaults to 2', + }, + ], }; export const DataSourceFormDefaultValues = { @@ -508,4 +530,14 @@ export const DataSourceFormDefaultValues = { }, }, }, + [DataSourceKey.DROPBOX]: { + name: '', + source: DataSourceKey.DROPBOX, + config: { + batch_size: 2, + credentials: { + dropbox_access_token: '', + }, + }, + }, }; diff --git a/web/src/pages/user-setting/data-source/index.tsx b/web/src/pages/user-setting/data-source/index.tsx index 2ba7cecd0..6fc3bf9e0 100644 --- a/web/src/pages/user-setting/data-source/index.tsx +++ b/web/src/pages/user-setting/data-source/index.tsx @@ -56,6 +56,12 @@ const dataSourceTemplates = [ description: DataSourceInfo[DataSourceKey.JIRA].description, icon: DataSourceInfo[DataSourceKey.JIRA].icon, }, + { + id: DataSourceKey.DROPBOX, + name: DataSourceInfo[DataSourceKey.DROPBOX].name, + description: DataSourceInfo[DataSourceKey.DROPBOX].description, + icon: DataSourceInfo[DataSourceKey.DROPBOX].icon, + }, ]; const DataSource = () => { const { t } = useTranslation();