From e7e89d3ecbf9638865b15f951549875534b62538 Mon Sep 17 00:00:00 2001 From: Billy Bao Date: Mon, 17 Nov 2025 11:16:34 +0800 Subject: [PATCH 1/5] Doc: style fix (#11295) ### What problem does this PR solve? Style fix based on #11283 ### Type of change - [x] Documentation Update --- deepdoc/parser/mineru_parser.py | 2 +- docs/guides/accessing_admin_ui.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/deepdoc/parser/mineru_parser.py b/deepdoc/parser/mineru_parser.py index bb663de0d..6d3b292d0 100644 --- a/deepdoc/parser/mineru_parser.py +++ b/deepdoc/parser/mineru_parser.py @@ -434,7 +434,7 @@ class MinerUParser(RAGFlowPdfParser): if not section.strip(): section = "FAILED TO PARSE TABLE" case MinerUContentType.IMAGE: - section = "".join(output.get(["image_caption"],[])) + "\n" + "".join(output.get(["image_footnote"],[])) + section = "".join(output.get("image_caption", [])) + "\n" + "".join(output.get("image_footnote", [])) case MinerUContentType.EQUATION: section = output["text"] case MinerUContentType.CODE: diff --git a/docs/guides/accessing_admin_ui.md b/docs/guides/accessing_admin_ui.md index 23521244b..181cff5ac 100644 --- a/docs/guides/accessing_admin_ui.md +++ b/docs/guides/accessing_admin_ui.md @@ -15,7 +15,7 @@ To access the RAGFlow admin UI, append `/admin` to the web UI's address, e.g. `h ### Default Credentials | Username | Password | |----------|----------| -| admin@ragflow.io | admin | +| `admin@ragflow.io` | `admin` | ## Admin UI Overview From 9cef3a26250667fa6761dbe9893a194de0c28aef Mon Sep 17 00:00:00 2001 From: chanx <1243304602@qq.com> Date: Mon, 17 Nov 2025 11:16:55 +0800 Subject: [PATCH 2/5] Fix: Fixed the issue of not being able to select the time zone in the user center. (#11298) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit … user center. ### What problem does this PR solve? Fix: Fixed the issue of not being able to select the time zone in the user center. ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue) --- web/src/components/ui/modal/modal.tsx | 4 +++ web/src/pages/user-setting/profile/index.tsx | 30 ++++++-------------- 2 files changed, 12 insertions(+), 22 deletions(-) diff --git a/web/src/components/ui/modal/modal.tsx b/web/src/components/ui/modal/modal.tsx index acae6c147..af516b1e6 100644 --- a/web/src/components/ui/modal/modal.tsx +++ b/web/src/components/ui/modal/modal.tsx @@ -86,6 +86,9 @@ const Modal: ModalType = ({ onOk?.(); }, [onOk, onOpenChange]); const handleChange = (open: boolean) => { + if (!open && !maskClosable) { + return; + } onOpenChange?.(open); console.log('open', open, onOpenChange); if (open && !disabled) { @@ -185,6 +188,7 @@ const Modal: ModalType = ({ diff --git a/web/src/pages/user-setting/profile/index.tsx b/web/src/pages/user-setting/profile/index.tsx index dceb2cdf3..5c2741cf6 100644 --- a/web/src/pages/user-setting/profile/index.tsx +++ b/web/src/pages/user-setting/profile/index.tsx @@ -13,13 +13,7 @@ import { } from '@/components/ui/form'; import { Input } from '@/components/ui/input'; import { Modal } from '@/components/ui/modal/modal'; -import { - Select, - SelectContent, - SelectItem, - SelectTrigger, - SelectValue, -} from '@/components/ui/select'; +import { RAGFlowSelect } from '@/components/ui/select'; import { useTranslate } from '@/hooks/common-hooks'; import { TimezoneList } from '@/pages/user-setting/constants'; import { zodResolver } from '@hookform/resolvers/zod'; @@ -230,6 +224,7 @@ const ProfilePage: FC = () => { title={modalTitle[editType]} open={isEditing} showfooter={false} + maskClosable={false} titleClassName="text-base" onOpenChange={(open) => { if (!open) { @@ -281,23 +276,14 @@ const ProfilePage: FC = () => { {t('timezone')} - + />
From 6b64641042342fafe9da6026c6391bc8280fd4f7 Mon Sep 17 00:00:00 2001 From: Scott Davidson <49713135+sd109@users.noreply.github.com> Date: Mon, 17 Nov 2025 06:21:27 +0000 Subject: [PATCH 3/5] Fix: default model base url extraction logic (#11263) ### What problem does this PR solve? Fixes an issue where default models which used the same factory but different base URLs would all be initialised with the default chat model's base URL and would ignore e.g. the embedding model's base URL config. For example, with the following service config, the embedding and reranker models would end up using the base URL for the default chat model (i.e. `llm1.example.com`): ```yaml ragflow: service_conf: user_default_llm: factory: OpenAI-API-Compatible api_key: not-used default_models: chat_model: name: llm1 base_url: https://llm1.example.com/v1 embedding_model: name: llm2 base_url: https://llm2.example.com/v1 rerank_model: name: llm3 base_url: https://llm3.example.com/v1/rerank llm_factories: factory_llm_infos: - name: OpenAI-API-Compatible logo: "" tags: "LLM,TEXT EMBEDDING,SPEECH2TEXT,MODERATION" status: "1" llm: - llm_name: llm1 base_url: 'https://llm1.example.com/v1' api_key: not-used tags: "LLM,CHAT,IMAGE2TEXT" max_tokens: 100000 model_type: chat is_tools: false - llm_name: llm2 base_url: https://llm2.example.com/v1 api_key: not-used tags: "TEXT EMBEDDING" max_tokens: 10000 model_type: embedding - llm_name: llm3 base_url: https://llm3.example.com/v1/rerank api_key: not-used tags: "RERANK,1k" max_tokens: 10000 model_type: rerank ``` ### Type of change - [X] Bug Fix (non-breaking change which fixes an issue) --- api/db/services/llm_service.py | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/api/db/services/llm_service.py b/api/db/services/llm_service.py index 6ccbf5a94..4d4ccaa57 100644 --- a/api/db/services/llm_service.py +++ b/api/db/services/llm_service.py @@ -19,6 +19,7 @@ import re from common.token_utils import num_tokens_from_string from functools import partial from typing import Generator +from common.constants import LLMType from api.db.db_models import LLM from api.db.services.common_service import CommonService from api.db.services.tenant_llm_service import LLM4Tenant, TenantLLMService @@ -32,6 +33,14 @@ def get_init_tenant_llm(user_id): from common import settings tenant_llm = [] + model_configs = { + LLMType.CHAT: settings.CHAT_CFG, + LLMType.EMBEDDING: settings.EMBEDDING_CFG, + LLMType.SPEECH2TEXT: settings.ASR_CFG, + LLMType.IMAGE2TEXT: settings.IMAGE2TEXT_CFG, + LLMType.RERANK: settings.RERANK_CFG, + } + seen = set() factory_configs = [] for factory_config in [ @@ -54,8 +63,8 @@ def get_init_tenant_llm(user_id): "llm_factory": factory_config["factory"], "llm_name": llm.llm_name, "model_type": llm.model_type, - "api_key": factory_config["api_key"], - "api_base": factory_config["base_url"], + "api_key": model_configs.get(llm.model_type, {}).get("api_key", factory_config["api_key"]), + "api_base": model_configs.get(llm.model_type, {}).get("base_url", factory_config["base_url"]), "max_tokens": llm.max_tokens if llm.max_tokens else 8192, } ) @@ -80,8 +89,8 @@ class LLMBundle(LLM4Tenant): def encode(self, texts: list): if self.langfuse: - generation = self.langfuse.start_generation(trace_context=self.trace_context, name="encode", model=self.llm_name, input={"texts": texts}) - + generation = self.langfuse.start_generation(trace_context=self.trace_context, name="encode", model=self.llm_name, input={"texts": texts}) + safe_texts = [] for text in texts: token_size = num_tokens_from_string(text) @@ -90,7 +99,7 @@ class LLMBundle(LLM4Tenant): safe_texts.append(text[:target_len]) else: safe_texts.append(text) - + embeddings, used_tokens = self.mdl.encode(safe_texts) llm_name = getattr(self, "llm_name", None) From 0569b50fed95ee702db62e128a1e2820c01ca210 Mon Sep 17 00:00:00 2001 From: Billy Bao Date: Mon, 17 Nov 2025 15:27:19 +0800 Subject: [PATCH 4/5] Fix: create dataset return type inconsistent (#11272) ### What problem does this PR solve? Fix: create dataset return type inconsistent #11167 ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue) --- api/apps/kb_app.py | 4 ++++ api/db/services/knowledgebase_service.py | 18 +++++++++--------- .../test/test_frontend_api/test_dataset.py | 4 ++-- 3 files changed, 15 insertions(+), 11 deletions(-) diff --git a/api/apps/kb_app.py b/api/apps/kb_app.py index b7cf58a20..f173b56a0 100644 --- a/api/apps/kb_app.py +++ b/api/apps/kb_app.py @@ -55,6 +55,10 @@ def create(): **req ) + code = req.get("code") + if code: + return get_data_error_result(code=code, message=req.get("message")) + try: if not KnowledgebaseService.save(**req): return get_data_error_result() diff --git a/api/db/services/knowledgebase_service.py b/api/db/services/knowledgebase_service.py index ca30ca074..28936576d 100644 --- a/api/db/services/knowledgebase_service.py +++ b/api/db/services/knowledgebase_service.py @@ -24,9 +24,9 @@ from common.time_utils import current_timestamp, datetime_format from api.db.services import duplicate_name from api.db.services.user_service import TenantService from common.misc_utils import get_uuid -from common.constants import StatusEnum +from common.constants import StatusEnum, RetCode from api.constants import DATASET_NAME_LIMIT -from api.utils.api_utils import get_parser_config, get_data_error_result +from api.utils.api_utils import get_parser_config class KnowledgebaseService(CommonService): """Service class for managing knowledge base operations. @@ -391,12 +391,12 @@ class KnowledgebaseService(CommonService): """ # Validate name if not isinstance(name, str): - return get_data_error_result(message="Dataset name must be string.") + return {"code": RetCode.DATA_ERROR, "message": "Dataset name must be string."} dataset_name = name.strip() - if dataset_name == "": - return get_data_error_result(message="Dataset name can't be empty.") + if len(dataset_name) == 0: + return {"code": RetCode.DATA_ERROR, "message": "Dataset name can't be empty."} if len(dataset_name.encode("utf-8")) > DATASET_NAME_LIMIT: - return get_data_error_result(message=f"Dataset name length is {len(dataset_name)} which is larger than {DATASET_NAME_LIMIT}") + return {"code": RetCode.DATA_ERROR, "message": f"Dataset name length is {len(dataset_name)} which is larger than {DATASET_NAME_LIMIT}"} # Deduplicate name within tenant dataset_name = duplicate_name( @@ -409,7 +409,7 @@ class KnowledgebaseService(CommonService): # Verify tenant exists ok, _t = TenantService.get_by_id(tenant_id) if not ok: - return False, "Tenant not found." + return {"code": RetCode.DATA_ERROR, "message": "Tenant does not exist."} # Build payload kb_id = get_uuid() @@ -419,10 +419,10 @@ class KnowledgebaseService(CommonService): "tenant_id": tenant_id, "created_by": tenant_id, "parser_id": (parser_id or "naive"), - **kwargs + **kwargs # Includes optional fields such as description, language, permission, avatar, parser_config, etc. } - # Default parser_config (align with kb_app.create) — do not accept external overrides + # Update parser_config (always override with validated default/merged config) payload["parser_config"] = get_parser_config(parser_id, kwargs.get("parser_config")) return payload diff --git a/sdk/python/test/test_frontend_api/test_dataset.py b/sdk/python/test/test_frontend_api/test_dataset.py index b57a3543b..3fb3e93ff 100644 --- a/sdk/python/test/test_frontend_api/test_dataset.py +++ b/sdk/python/test/test_frontend_api/test_dataset.py @@ -104,7 +104,7 @@ def test_invalid_name_dataset(get_auth): assert res['code'] == 100 res = create_dataset(get_auth, "") - assert res['code'] == 100 + assert res['code'] == 102 long_string = "" @@ -112,7 +112,7 @@ def test_invalid_name_dataset(get_auth): long_string += random.choice(string.ascii_letters + string.digits) res = create_dataset(get_auth, long_string) - assert res['code'] == 100 + assert res['code'] == 102 print(res) From bd4bc57009fe2990b3be1000564a4d5559477cfc Mon Sep 17 00:00:00 2001 From: Jin Hai Date: Mon, 17 Nov 2025 15:34:17 +0800 Subject: [PATCH 5/5] Refactor: move mcp connection utilities to common (#11304) ### What problem does this PR solve? As title ### Type of change - [x] Refactoring --------- Signed-off-by: Jin Hai --- agent/component/agent_with_tools.py | 2 +- agent/tools/base.py | 3 +-- api/apps/mcp_server_app.py | 2 +- api/apps/sdk/agents.py | 4 ++-- api/ragflow_server.py | 2 +- api/utils/api_utils.py | 2 +- common/data_source/interfaces.py | 4 ++-- {rag/utils => common}/mcp_tool_call_conn.py | 18 ++++++++++++------ rag/llm/chat_model.py | 5 ----- rag/nlp/__init__.py | 12 ++++++------ 10 files changed, 27 insertions(+), 27 deletions(-) rename {rag/utils => common}/mcp_tool_call_conn.py (94%) diff --git a/agent/component/agent_with_tools.py b/agent/component/agent_with_tools.py index a27504139..906a9eca3 100644 --- a/agent/component/agent_with_tools.py +++ b/agent/component/agent_with_tools.py @@ -30,7 +30,7 @@ from api.db.services.mcp_server_service import MCPServerService from common.connection_utils import timeout from rag.prompts.generator import next_step, COMPLETE_TASK, analyze_task, \ citation_prompt, reflect, rank_memories, kb_prompt, citation_plus, full_question, message_fit_in -from rag.utils.mcp_tool_call_conn import MCPToolCallSession, mcp_tool_metadata_to_openai_tool +from common.mcp_tool_call_conn import MCPToolCallSession, mcp_tool_metadata_to_openai_tool from agent.component.llm import LLMParam, LLM diff --git a/agent/tools/base.py b/agent/tools/base.py index a3d569694..791242d59 100644 --- a/agent/tools/base.py +++ b/agent/tools/base.py @@ -21,9 +21,8 @@ from functools import partial from typing import TypedDict, List, Any from agent.component.base import ComponentParamBase, ComponentBase from common.misc_utils import hash_str2int -from rag.llm.chat_model import ToolCallSession from rag.prompts.generator import kb_prompt -from rag.utils.mcp_tool_call_conn import MCPToolCallSession +from common.mcp_tool_call_conn import MCPToolCallSession, ToolCallSession from timeit import default_timer as timer diff --git a/api/apps/mcp_server_app.py b/api/apps/mcp_server_app.py index 66d447491..a8ac2aef1 100644 --- a/api/apps/mcp_server_app.py +++ b/api/apps/mcp_server_app.py @@ -25,7 +25,7 @@ from common.misc_utils import get_uuid from api.utils.api_utils import get_data_error_result, get_json_result, server_error_response, validate_request, \ get_mcp_tools from api.utils.web_utils import get_float, safe_json_parse -from rag.utils.mcp_tool_call_conn import MCPToolCallSession, close_multiple_mcp_toolcall_sessions +from common.mcp_tool_call_conn import MCPToolCallSession, close_multiple_mcp_toolcall_sessions @manager.route("/list", methods=["POST"]) # noqa: F821 diff --git a/api/apps/sdk/agents.py b/api/apps/sdk/agents.py index 208b7a1be..14ea97fb6 100644 --- a/api/apps/sdk/agents.py +++ b/api/apps/sdk/agents.py @@ -41,12 +41,12 @@ def list_agents(tenant_id): return get_error_data_result("The agent doesn't exist.") page_number = int(request.args.get("page", 1)) items_per_page = int(request.args.get("page_size", 30)) - orderby = request.args.get("orderby", "update_time") + order_by = request.args.get("orderby", "update_time") if request.args.get("desc") == "False" or request.args.get("desc") == "false": desc = False else: desc = True - canvas = UserCanvasService.get_list(tenant_id, page_number, items_per_page, orderby, desc, id, title) + canvas = UserCanvasService.get_list(tenant_id, page_number, items_per_page, order_by, desc, id, title) return get_result(data=canvas) diff --git a/api/ragflow_server.py b/api/ragflow_server.py index 868e054ae..c340255e7 100644 --- a/api/ragflow_server.py +++ b/api/ragflow_server.py @@ -41,7 +41,7 @@ from api.db.db_models import init_database_tables as init_web_db from api.db.init_data import init_web_data from common.versions import get_ragflow_version from common.config_utils import show_configs -from rag.utils.mcp_tool_call_conn import shutdown_all_mcp_sessions +from common.mcp_tool_call_conn import shutdown_all_mcp_sessions from rag.utils.redis_conn import RedisDistributedLock stop_event = threading.Event() diff --git a/api/utils/api_utils.py b/api/utils/api_utils.py index 4cace9eca..1bd3f3e3c 100644 --- a/api/utils/api_utils.py +++ b/api/utils/api_utils.py @@ -37,7 +37,7 @@ from peewee import OperationalError from common.constants import ActiveEnum from api.db.db_models import APIToken from api.utils.json_encode import CustomJSONEncoder -from rag.utils.mcp_tool_call_conn import MCPToolCallSession, close_multiple_mcp_toolcall_sessions +from common.mcp_tool_call_conn import MCPToolCallSession, close_multiple_mcp_toolcall_sessions from api.db.services.tenant_llm_service import LLMFactoriesService from common.connection_utils import timeout from common.constants import RetCode diff --git a/common/data_source/interfaces.py b/common/data_source/interfaces.py index 9c5f00141..5e5d3aa2e 100644 --- a/common/data_source/interfaces.py +++ b/common/data_source/interfaces.py @@ -69,7 +69,7 @@ class SlimConnectorWithPermSync(ABC): class CheckpointedConnectorWithPermSync(ABC): - """Checkpointed connector interface (with permission sync)""" + """Checkpoint connector interface (with permission sync)""" @abstractmethod def load_from_checkpoint( @@ -143,7 +143,7 @@ class CredentialsProviderInterface(abc.ABC, Generic[T]): @abc.abstractmethod def is_dynamic(self) -> bool: - """If dynamic, the credentials may change during usage ... maening the client + """If dynamic, the credentials may change during usage ... meaning the client needs to use the locking features of the credentials provider to operate correctly. diff --git a/rag/utils/mcp_tool_call_conn.py b/common/mcp_tool_call_conn.py similarity index 94% rename from rag/utils/mcp_tool_call_conn.py rename to common/mcp_tool_call_conn.py index 2093f7bc8..b19f063e1 100644 --- a/rag/utils/mcp_tool_call_conn.py +++ b/common/mcp_tool_call_conn.py @@ -21,7 +21,7 @@ import weakref from concurrent.futures import ThreadPoolExecutor from concurrent.futures import TimeoutError as FuturesTimeoutError from string import Template -from typing import Any, Literal +from typing import Any, Literal, Protocol from typing_extensions import override @@ -30,12 +30,15 @@ from mcp.client.session import ClientSession from mcp.client.sse import sse_client from mcp.client.streamable_http import streamablehttp_client from mcp.types import CallToolResult, ListToolsResult, TextContent, Tool -from rag.llm.chat_model import ToolCallSession MCPTaskType = Literal["list_tools", "tool_call"] MCPTask = tuple[MCPTaskType, dict[str, Any], asyncio.Queue[Any]] +class ToolCallSession(Protocol): + def tool_call(self, name: str, arguments: dict[str, Any]) -> str: ... + + class MCPToolCallSession(ToolCallSession): _ALL_INSTANCES: weakref.WeakSet["MCPToolCallSession"] = weakref.WeakSet() @@ -106,7 +109,8 @@ class MCPToolCallSession(ToolCallSession): await self._process_mcp_tasks(None, msg) else: - await self._process_mcp_tasks(None, f"Unsupported MCP server type: {self._mcp_server.server_type}, id: {self._mcp_server.id}") + await self._process_mcp_tasks(None, + f"Unsupported MCP server type: {self._mcp_server.server_type}, id: {self._mcp_server.id}") async def _process_mcp_tasks(self, client_session: ClientSession | None, error_message: str | None = None) -> None: while not self._close: @@ -164,7 +168,8 @@ class MCPToolCallSession(ToolCallSession): raise async def _call_mcp_tool(self, name: str, arguments: dict[str, Any], timeout: float | int = 10) -> str: - result: CallToolResult = await self._call_mcp_server("tool_call", name=name, arguments=arguments, timeout=timeout) + result: CallToolResult = await self._call_mcp_server("tool_call", name=name, arguments=arguments, + timeout=timeout) if result.isError: return f"MCP server error: {result.content}" @@ -283,7 +288,8 @@ def close_multiple_mcp_toolcall_sessions(sessions: list[MCPToolCallSession]) -> except Exception: logging.exception("Exception during MCP session cleanup thread management") - logging.info(f"{len(sessions)} MCP sessions has been cleaned up. {len(list(MCPToolCallSession._ALL_INSTANCES))} in global context.") + logging.info( + f"{len(sessions)} MCP sessions has been cleaned up. {len(list(MCPToolCallSession._ALL_INSTANCES))} in global context.") def shutdown_all_mcp_sessions(): @@ -298,7 +304,7 @@ def shutdown_all_mcp_sessions(): logging.info("All MCPToolCallSession instances have been closed.") -def mcp_tool_metadata_to_openai_tool(mcp_tool: Tool|dict) -> dict[str, Any]: +def mcp_tool_metadata_to_openai_tool(mcp_tool: Tool | dict) -> dict[str, Any]: if isinstance(mcp_tool, dict): return { "type": "function", diff --git a/rag/llm/chat_model.py b/rag/llm/chat_model.py index 17ddbc138..c9e3b29f7 100644 --- a/rag/llm/chat_model.py +++ b/rag/llm/chat_model.py @@ -22,7 +22,6 @@ import re import time from abc import ABC from copy import deepcopy -from typing import Any, Protocol from urllib.parse import urljoin import json_repair @@ -65,10 +64,6 @@ LENGTH_NOTIFICATION_CN = "······\n由于大模型的上下文窗口大小 LENGTH_NOTIFICATION_EN = "...\nThe answer is truncated by your chosen LLM due to its limitation on context length." -class ToolCallSession(Protocol): - def tool_call(self, name: str, arguments: dict[str, Any]) -> str: ... - - class Base(ABC): def __init__(self, key, model_name, base_url, **kwargs): timeout = int(os.environ.get("LM_TIMEOUT_SECONDS", 600)) diff --git a/rag/nlp/__init__.py b/rag/nlp/__init__.py index 80acf1d8f..de7c2ce60 100644 --- a/rag/nlp/__init__.py +++ b/rag/nlp/__init__.py @@ -155,13 +155,13 @@ def qbullets_category(sections): if re.match(pro, sec) and not not_bullet(sec): hits[i] += 1 break - maxium = 0 + maximum = 0 res = -1 for i, h in enumerate(hits): - if h <= maxium: + if h <= maximum: continue res = i - maxium = h + maximum = h return res, QUESTION_PATTERN[res] @@ -222,13 +222,13 @@ def bullets_category(sections): if re.match(p, sec) and not not_bullet(sec): hits[i] += 1 break - maxium = 0 + maximum = 0 res = -1 for i, h in enumerate(hits): - if h <= maxium: + if h <= maximum: continue res = i - maxium = h + maximum = h return res