From 5c9672a2656084c99b4dd1e5820453cbe338dc8d Mon Sep 17 00:00:00 2001 From: lin Date: Tue, 9 Dec 2025 09:32:42 +0800 Subject: [PATCH 1/8] Fix: advanced_ingestion_pipeline (#11816) ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue) Co-authored-by: kche0169 --- agent/templates/advanced_ingestion_pipeline.json | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/agent/templates/advanced_ingestion_pipeline.json b/agent/templates/advanced_ingestion_pipeline.json index cfd211f46..2e996e248 100644 --- a/agent/templates/advanced_ingestion_pipeline.json +++ b/agent/templates/advanced_ingestion_pipeline.json @@ -193,7 +193,7 @@ "presence_penalty": 0.4, "prompts": [ { - "content": "Text Content:\n{Splitter:KindDingosJam@chunks}\n", + "content": "Text Content:\n{Splitter:NineTiesSin@chunks}\n", "role": "user" } ], @@ -226,7 +226,7 @@ "presence_penalty": 0.4, "prompts": [ { - "content": "Text Content:\n\n{Splitter:KindDingosJam@chunks}\n", + "content": "Text Content:\n\n{Splitter:TastyPointsLay@chunks}\n", "role": "user" } ], @@ -259,7 +259,7 @@ "presence_penalty": 0.4, "prompts": [ { - "content": "Content: \n\n{Splitter:KindDingosJam@chunks}", + "content": "Content: \n\n{Splitter:CuteBusesBet@chunks}", "role": "user" } ], @@ -485,7 +485,7 @@ "outputs": {}, "presencePenaltyEnabled": false, "presence_penalty": 0.4, - "prompts": "Text Content:\n{Splitter:KindDingosJam@chunks}\n", + "prompts": "Text Content:\n{Splitter:NineTiesSin@chunks}\n", "sys_prompt": "Role\nYou are a text analyzer.\n\nTask\nExtract the most important keywords/phrases of a given piece of text content.\n\nRequirements\n- Summarize the text content, and give the top 5 important keywords/phrases.\n- The keywords MUST be in the same language as the given piece of text content.\n- The keywords are delimited by ENGLISH COMMA.\n- Output keywords ONLY.", "temperature": 0.1, "temperatureEnabled": false, @@ -522,7 +522,7 @@ "outputs": {}, "presencePenaltyEnabled": false, "presence_penalty": 0.4, - "prompts": "Text Content:\n\n{Splitter:KindDingosJam@chunks}\n", + "prompts": "Text Content:\n\n{Splitter:TastyPointsLay@chunks}\n", "sys_prompt": "Role\nYou are a text analyzer.\n\nTask\nPropose 3 questions about a given piece of text content.\n\nRequirements\n- Understand and summarize the text content, and propose the top 3 important questions.\n- The questions SHOULD NOT have overlapping meanings.\n- The questions SHOULD cover the main content of the text as much as possible.\n- The questions MUST be in the same language as the given piece of text content.\n- One question per line.\n- Output questions ONLY.", "temperature": 0.1, "temperatureEnabled": false, @@ -559,7 +559,7 @@ "outputs": {}, "presencePenaltyEnabled": false, "presence_penalty": 0.4, - "prompts": "Content: \n\n{Splitter:KindDingosJam@chunks}", + "prompts": "Content: \n\n{Splitter:BlueResultsWink@chunks}", "sys_prompt": "Extract important structured information from the given content. Output ONLY a valid JSON string with no additional text. If no important structured information is found, output an empty JSON object: {}.\n\nImportant structured information may include: names, dates, locations, events, key facts, numerical data, or other extractable entities.", "temperature": 0.1, "temperatureEnabled": false, From dd046be976293bfc47c696497a9a2d3ff00aab42 Mon Sep 17 00:00:00 2001 From: buua436 Date: Tue, 9 Dec 2025 09:34:01 +0800 Subject: [PATCH 2/8] Fix: parent-child chunking method (#11810) ### What problem does this PR solve? change: parent-child chunking method ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue) --- conf/infinity_mapping.json | 1 + rag/nlp/search.py | 6 ++++-- rag/svr/task_executor.py | 4 ++-- 3 files changed, 7 insertions(+), 4 deletions(-) diff --git a/conf/infinity_mapping.json b/conf/infinity_mapping.json index e68dd4f15..7a28d5754 100644 --- a/conf/infinity_mapping.json +++ b/conf/infinity_mapping.json @@ -2,6 +2,7 @@ "id": {"type": "varchar", "default": ""}, "doc_id": {"type": "varchar", "default": ""}, "kb_id": {"type": "varchar", "default": ""}, + "mom_id": {"type": "varchar", "default": ""}, "create_time": {"type": "varchar", "default": ""}, "create_timestamp_flt": {"type": "float", "default": 0.0}, "img_id": {"type": "varchar", "default": ""}, diff --git a/rag/nlp/search.py b/rag/nlp/search.py index 1ca70f678..f5dd2d4de 100644 --- a/rag/nlp/search.py +++ b/rag/nlp/search.py @@ -91,7 +91,7 @@ class Dealer: ["docnm_kwd", "content_ltks", "kb_id", "img_id", "title_tks", "important_kwd", "position_int", "doc_id", "page_num_int", "top_int", "create_timestamp_flt", "knowledge_graph_kwd", "question_kwd", "question_tks", "doc_type_kwd", - "available_int", "content_with_weight", PAGERANK_FLD, TAG_FLD]) + "available_int", "content_with_weight", "mom_id", PAGERANK_FLD, TAG_FLD]) kwds = set([]) qst = req.get("question", "") @@ -469,6 +469,7 @@ class Dealer: "vector": chunk.get(vector_column, zero_vector), "positions": position_int, "doc_type_kwd": chunk.get("doc_type_kwd", ""), + "mom_id": chunk.get("mom_id", ""), } if highlight and sres.highlight: if id in sres.highlight: @@ -650,7 +651,8 @@ class Dealer: i = 0 while i < len(chunks): ck = chunks[i] - if not ck.get("mom_id"): + mom_id = ck.get("mom_id") + if not isinstance(mom_id, str) or not mom_id.strip(): i += 1 continue mom_chunks[ck["mom_id"]].append(chunks.pop(i)) diff --git a/rag/svr/task_executor.py b/rag/svr/task_executor.py index b08aa7524..62693f24f 100644 --- a/rag/svr/task_executor.py +++ b/rag/svr/task_executor.py @@ -727,17 +727,17 @@ async def insert_es(task_id, task_tenant_id, task_dataset_id, chunks, progress_c if not mom: continue id = xxhash.xxh64(mom.encode("utf-8")).hexdigest() + ck["mom_id"] = id if id in mother_ids: continue mother_ids.add(id) - ck["mom_id"] = id mom_ck = copy.deepcopy(ck) mom_ck["id"] = id mom_ck["content_with_weight"] = mom mom_ck["available_int"] = 0 flds = list(mom_ck.keys()) for fld in flds: - if fld not in ["id", "content_with_weight", "doc_id", "kb_id", "available_int", "position_int"]: + if fld not in ["id", "content_with_weight", "doc_id", "docnm_kwd", "kb_id", "available_int", "position_int"]: del mom_ck[fld] mothers.append(mom_ck) From f3a03b06b2538532304acc193d3b71b7c29da11a Mon Sep 17 00:00:00 2001 From: Levi <81591061+levischd@users.noreply.github.com> Date: Tue, 9 Dec 2025 02:35:03 +0100 Subject: [PATCH 3/8] fix: align http client proxy kwarg (#11818) ### What problem does this PR solve? Our HTTP wrapper still passed proxies to httpx.Client/AsyncClient, which expect proxy. As a result, configured proxies were ignored and calls could fail with ValueError("Failed to fetch OIDC metadata: Client.__init__() got an unexpected keyword argument 'proxies'"). This PR switches to the correct proxy kwarg so proxies are honored and the runtime error is resolved. ### Type of change - [X] Bug Fix (non-breaking change which fixes an issue) --- Contribution during my time at RAGcon GmbH. --- common/http_client.py | 68 ++++++++++++++++++++++++++++++------------- 1 file changed, 48 insertions(+), 20 deletions(-) diff --git a/common/http_client.py b/common/http_client.py index 2ffbb3bce..51f2e7fcc 100644 --- a/common/http_client.py +++ b/common/http_client.py @@ -24,7 +24,9 @@ logger = logging.getLogger(__name__) # Default knobs; keep conservative to avoid unexpected behavioural changes. DEFAULT_TIMEOUT = float(os.environ.get("HTTP_CLIENT_TIMEOUT", "15")) # Align with requests default: follow redirects with a max of 30 unless overridden. -DEFAULT_FOLLOW_REDIRECTS = bool(int(os.environ.get("HTTP_CLIENT_FOLLOW_REDIRECTS", "1"))) +DEFAULT_FOLLOW_REDIRECTS = bool( + int(os.environ.get("HTTP_CLIENT_FOLLOW_REDIRECTS", "1")) +) DEFAULT_MAX_REDIRECTS = int(os.environ.get("HTTP_CLIENT_MAX_REDIRECTS", "30")) DEFAULT_MAX_RETRIES = int(os.environ.get("HTTP_CLIENT_MAX_RETRIES", "2")) DEFAULT_BACKOFF_FACTOR = float(os.environ.get("HTTP_CLIENT_BACKOFF_FACTOR", "0.5")) @@ -32,7 +34,9 @@ DEFAULT_PROXY = os.environ.get("HTTP_CLIENT_PROXY") DEFAULT_USER_AGENT = os.environ.get("HTTP_CLIENT_USER_AGENT", "ragflow-http-client") -def _clean_headers(headers: Optional[Dict[str, str]], auth_token: Optional[str] = None) -> Optional[Dict[str, str]]: +def _clean_headers( + headers: Optional[Dict[str, str]], auth_token: Optional[str] = None +) -> Optional[Dict[str, str]]: merged_headers: Dict[str, str] = {} if DEFAULT_USER_AGENT: merged_headers["User-Agent"] = DEFAULT_USER_AGENT @@ -59,39 +63,51 @@ async def async_request( auth_token: Optional[str] = None, retries: Optional[int] = None, backoff_factor: Optional[float] = None, - proxies: Any = None, + proxy: Any = None, **kwargs: Any, ) -> httpx.Response: """Lightweight async HTTP wrapper using httpx.AsyncClient with safe defaults.""" timeout = timeout if timeout is not None else DEFAULT_TIMEOUT - follow_redirects = DEFAULT_FOLLOW_REDIRECTS if follow_redirects is None else follow_redirects + follow_redirects = ( + DEFAULT_FOLLOW_REDIRECTS if follow_redirects is None else follow_redirects + ) max_redirects = DEFAULT_MAX_REDIRECTS if max_redirects is None else max_redirects retries = DEFAULT_MAX_RETRIES if retries is None else max(retries, 0) - backoff_factor = DEFAULT_BACKOFF_FACTOR if backoff_factor is None else backoff_factor + backoff_factor = ( + DEFAULT_BACKOFF_FACTOR if backoff_factor is None else backoff_factor + ) headers = _clean_headers(headers, auth_token=auth_token) - proxies = DEFAULT_PROXY if proxies is None else proxies + proxy = DEFAULT_PROXY if proxy is None else proxy async with httpx.AsyncClient( timeout=timeout, follow_redirects=follow_redirects, max_redirects=max_redirects, - proxies=proxies, + proxy=proxy, ) as client: last_exc: Exception | None = None for attempt in range(retries + 1): try: start = time.monotonic() - response = await client.request(method=method, url=url, headers=headers, **kwargs) + response = await client.request( + method=method, url=url, headers=headers, **kwargs + ) duration = time.monotonic() - start - logger.debug(f"async_request {method} {url} -> {response.status_code} in {duration:.3f}s") + logger.debug( + f"async_request {method} {url} -> {response.status_code} in {duration:.3f}s" + ) return response except httpx.RequestError as exc: last_exc = exc if attempt >= retries: - logger.warning(f"async_request exhausted retries for {method} {url}: {exc}") + logger.warning( + f"async_request exhausted retries for {method} {url}: {exc}" + ) raise delay = _get_delay(backoff_factor, attempt) - logger.warning(f"async_request attempt {attempt + 1}/{retries + 1} failed for {method} {url}: {exc}; retrying in {delay:.2f}s") + logger.warning( + f"async_request attempt {attempt + 1}/{retries + 1} failed for {method} {url}: {exc}; retrying in {delay:.2f}s" + ) await asyncio.sleep(delay) raise last_exc # pragma: no cover @@ -107,39 +123,51 @@ def sync_request( auth_token: Optional[str] = None, retries: Optional[int] = None, backoff_factor: Optional[float] = None, - proxies: Any = None, + proxy: Any = None, **kwargs: Any, ) -> httpx.Response: """Synchronous counterpart to async_request, for CLI/tests or sync contexts.""" timeout = timeout if timeout is not None else DEFAULT_TIMEOUT - follow_redirects = DEFAULT_FOLLOW_REDIRECTS if follow_redirects is None else follow_redirects + follow_redirects = ( + DEFAULT_FOLLOW_REDIRECTS if follow_redirects is None else follow_redirects + ) max_redirects = DEFAULT_MAX_REDIRECTS if max_redirects is None else max_redirects retries = DEFAULT_MAX_RETRIES if retries is None else max(retries, 0) - backoff_factor = DEFAULT_BACKOFF_FACTOR if backoff_factor is None else backoff_factor + backoff_factor = ( + DEFAULT_BACKOFF_FACTOR if backoff_factor is None else backoff_factor + ) headers = _clean_headers(headers, auth_token=auth_token) - proxies = DEFAULT_PROXY if proxies is None else proxies + proxy = DEFAULT_PROXY if proxy is None else proxy with httpx.Client( timeout=timeout, follow_redirects=follow_redirects, max_redirects=max_redirects, - proxies=proxies, + proxy=proxy, ) as client: last_exc: Exception | None = None for attempt in range(retries + 1): try: start = time.monotonic() - response = client.request(method=method, url=url, headers=headers, **kwargs) + response = client.request( + method=method, url=url, headers=headers, **kwargs + ) duration = time.monotonic() - start - logger.debug(f"sync_request {method} {url} -> {response.status_code} in {duration:.3f}s") + logger.debug( + f"sync_request {method} {url} -> {response.status_code} in {duration:.3f}s" + ) return response except httpx.RequestError as exc: last_exc = exc if attempt >= retries: - logger.warning(f"sync_request exhausted retries for {method} {url}: {exc}") + logger.warning( + f"sync_request exhausted retries for {method} {url}: {exc}" + ) raise delay = _get_delay(backoff_factor, attempt) - logger.warning(f"sync_request attempt {attempt + 1}/{retries + 1} failed for {method} {url}: {exc}; retrying in {delay:.2f}s") + logger.warning( + f"sync_request attempt {attempt + 1}/{retries + 1} failed for {method} {url}: {exc}; retrying in {delay:.2f}s" + ) time.sleep(delay) raise last_exc # pragma: no cover From 1777620ea5fe3bed7653243dc538861f476b76ad Mon Sep 17 00:00:00 2001 From: sjIlll <52092755+SJpoor@users.noreply.github.com> Date: Tue, 9 Dec 2025 09:38:44 +0800 Subject: [PATCH 4/8] fix: set default embedding model for TEI profile in Docker deployment (#11824) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## What's changed fix: unify embedding model fallback logic for both TEI and non-TEI Docker deployments > This fix targets **Docker / `docker-compose` deployments**, ensuring a valid default embedding model is always set—regardless of the compose profile used. ## Changes | Scenario | New Behavior | |--------|--------------| | **Non-`tei-` profile** (e.g., default deployment) | `EMBEDDING_MDL` is now correctly initialized from `EMBEDDING_CFG` (derived from `user_default_llm`), ensuring custom defaults like `bge-m3@Ollama` are properly applied to new tenants. | | **`tei-` profile** (`COMPOSE_PROFILES` contains `tei-`) | Still respects the `TEI_MODEL` environment variable. If unset, falls back to `EMBEDDING_CFG`. Only when both are empty does it use the built-in default (`BAAI/bge-small-en-v1.5`), preventing an empty embedding model. | ## Why This Change? - **In non-TEI mode**: The previous logic would reset `EMBEDDING_MDL` to an empty string, causing pre-configured defaults (e.g., `bge-m3@Ollama` in the Docker image) to be ignored—leading to tenant initialization failures or silent misconfigurations. - **In TEI mode**: Users need the ability to override the model via `TEI_MODEL`, but without a safe fallback, missing configuration could break the system. The new logic adopts a **“config-first, env-var-override”** strategy for robustness in containerized environments. ## Implementation - Updated the assignment logic for `EMBEDDING_MDL` in `rag/common/settings.py` to follow a unified fallback chain: EMBEDDING_CFG → TEI_MODEL (if tei- profile active) → built-in default ## Testing Verified in Docker deployments: 1. **`COMPOSE_PROFILES=`** (no TEI) → New tenants get `bge-m3@Ollama` as the default embedding model 2. **`COMPOSE_PROFILES=tei-gpu` with no `TEI_MODEL` set** → Falls back to `BAAI/bge-small-en-v1.5` 3. **`COMPOSE_PROFILES=tei-gpu` with `TEI_MODEL=my-model`** → New tenants use `my-model` as the embedding model Closes #8916 fix #11522 fix #11306 --- common/settings.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/common/settings.py b/common/settings.py index c8f7a6de4..45dcdb618 100644 --- a/common/settings.py +++ b/common/settings.py @@ -210,7 +210,10 @@ def init_settings(): IMAGE2TEXT_CFG = _resolve_per_model_config(image2text_entry, LLM_FACTORY, API_KEY, LLM_BASE_URL) CHAT_MDL = CHAT_CFG.get("model", "") or "" - EMBEDDING_MDL = os.getenv("TEI_MODEL", "BAAI/bge-small-en-v1.5") if "tei-" in os.getenv("COMPOSE_PROFILES", "") else "" + EMBEDDING_MDL = EMBEDDING_CFG.get("model", "") or "" + compose_profiles = os.getenv("COMPOSE_PROFILES", "") + if "tei-" in compose_profiles: + EMBEDDING_MDL = os.getenv("TEI_MODEL", EMBEDDING_MDL or "BAAI/bge-small-en-v1.5") RERANK_MDL = RERANK_CFG.get("model", "") or "" ASR_MDL = ASR_CFG.get("model", "") or "" IMAGE2TEXT_MDL = IMAGE2TEXT_CFG.get("model", "") or "" From 481192300dc23c872d5a68b490f1cdfe5bcd3cd7 Mon Sep 17 00:00:00 2001 From: Stephen Hu <812791840@qq.com> Date: Tue, 9 Dec 2025 09:58:34 +0800 Subject: [PATCH 5/8] Fix:[ERROR][Exception]: list index out of range (#11826) ### What problem does this PR solve? https://github.com/infiniflow/ragflow/issues/11821 ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue) --- rag/prompts/generator.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/rag/prompts/generator.py b/rag/prompts/generator.py index e8ad77032..9fc30dc33 100644 --- a/rag/prompts/generator.py +++ b/rag/prompts/generator.py @@ -781,7 +781,10 @@ async def run_toc_from_text(chunks, chat_mdl, callback=None): # Merge structure and content (by index) prune = len(toc_with_levels) > 512 - max_lvl = sorted([t.get("level", "0") for t in toc_with_levels if isinstance(t, dict)])[-1] + max_lvl = "0" + sorted_list = sorted([t.get("level", "0") for t in toc_with_levels if isinstance(t, dict)]) + if sorted_list: + max_lvl = sorted_list[-1] merged = [] for _ , (toc_item, src_item) in enumerate(zip(toc_with_levels, filtered)): if prune and toc_item.get("level", "0") >= max_lvl: From c51e6b2a58abb25628e329d579c88875f75e7469 Mon Sep 17 00:00:00 2001 From: Yongteng Lei Date: Tue, 9 Dec 2025 13:08:37 +0800 Subject: [PATCH 6/8] Refa: migrate CV model chat to Async (#11828) ### What problem does this PR solve? Migrate CV model chat to Async. #11750 ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue) - [x] Refactoring --- rag/app/picture.py | 3 +- rag/flow/parser/parser.py | 3 +- rag/llm/chat_model.py | 3 +- rag/llm/cv_model.py | 107 +++++++++++++++++++++----------------- 4 files changed, 66 insertions(+), 50 deletions(-) diff --git a/rag/app/picture.py b/rag/app/picture.py index 8e7aa4bce..bc93ab279 100644 --- a/rag/app/picture.py +++ b/rag/app/picture.py @@ -14,6 +14,7 @@ # limitations under the License. # +import asyncio import io import re @@ -50,7 +51,7 @@ def chunk(filename, binary, tenant_id, lang, callback=None, **kwargs): } ) cv_mdl = LLMBundle(tenant_id, llm_type=LLMType.IMAGE2TEXT, lang=lang) - ans = cv_mdl.chat(system="", history=[], gen_conf={}, video_bytes=binary, filename=filename) + ans = asyncio.run(cv_mdl.async_chat(system="", history=[], gen_conf={}, video_bytes=binary, filename=filename)) callback(0.8, "CV LLM respond: %s ..." % ans[:32]) ans += "\n" + ans tokenize(doc, ans, eng) diff --git a/rag/flow/parser/parser.py b/rag/flow/parser/parser.py index 7747448ad..8b443bfb7 100644 --- a/rag/flow/parser/parser.py +++ b/rag/flow/parser/parser.py @@ -12,6 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +import asyncio import io import json import os @@ -634,7 +635,7 @@ class Parser(ProcessBase): self.set_output("output_format", conf["output_format"]) cv_mdl = LLMBundle(self._canvas.get_tenant_id(), LLMType.IMAGE2TEXT, llm_name=conf["llm_id"]) - txt = cv_mdl.chat(system="", history=[], gen_conf={}, video_bytes=blob, filename=name) + txt = asyncio.run(cv_mdl.async_chat(system="", history=[], gen_conf={}, video_bytes=blob, filename=name)) self.set_output("text", txt) diff --git a/rag/llm/chat_model.py b/rag/llm/chat_model.py index 9f5457224..f3f207eb2 100644 --- a/rag/llm/chat_model.py +++ b/rag/llm/chat_model.py @@ -28,7 +28,7 @@ import json_repair import litellm import openai from openai import AsyncOpenAI, OpenAI -from openai.lib.azure import AzureOpenAI +from openai.lib.azure import AzureOpenAI, AsyncAzureOpenAI from strenum import StrEnum from common.token_utils import num_tokens_from_string, total_token_count_from_response @@ -535,6 +535,7 @@ class AzureChat(Base): api_version = json.loads(key).get("api_version", "2024-02-01") super().__init__(key, model_name, base_url, **kwargs) self.client = AzureOpenAI(api_key=api_key, azure_endpoint=base_url, api_version=api_version) + self.async_client = AsyncAzureOpenAI(api_key=key, base_url=base_url, api_version=api_version) self.model_name = model_name @property diff --git a/rag/llm/cv_model.py b/rag/llm/cv_model.py index cc2aff97c..707bfef9e 100644 --- a/rag/llm/cv_model.py +++ b/rag/llm/cv_model.py @@ -14,6 +14,7 @@ # limitations under the License. # +import asyncio import base64 import json import logging @@ -27,9 +28,8 @@ from pathlib import Path from urllib.parse import urljoin import requests -from openai import OpenAI -from openai.lib.azure import AzureOpenAI -from zhipuai import ZhipuAI +from openai import OpenAI, AsyncOpenAI +from openai.lib.azure import AzureOpenAI, AsyncAzureOpenAI from common.token_utils import num_tokens_from_string, total_token_count_from_response from rag.nlp import is_english @@ -76,9 +76,9 @@ class Base(ABC): pmpt.append({"type": "image_url", "image_url": {"url": img if isinstance(img, str) and img.startswith("data:") else f"data:image/png;base64,{img}"}}) return pmpt - def chat(self, system, history, gen_conf, images=None, **kwargs): + async def async_chat(self, system, history, gen_conf, images=None, **kwargs): try: - response = self.client.chat.completions.create( + response = await self.async_client.chat.completions.create( model=self.model_name, messages=self._form_history(system, history, images), extra_body=self.extra_body, @@ -87,17 +87,17 @@ class Base(ABC): except Exception as e: return "**ERROR**: " + str(e), 0 - def chat_streamly(self, system, history, gen_conf, images=None, **kwargs): + async def async_chat_streamly(self, system, history, gen_conf, images=None, **kwargs): ans = "" tk_count = 0 try: - response = self.client.chat.completions.create( + response = await self.async_client.chat.completions.create( model=self.model_name, messages=self._form_history(system, history, images), stream=True, extra_body=self.extra_body, ) - for resp in response: + async for resp in response: if not resp.choices[0].delta.content: continue delta = resp.choices[0].delta.content @@ -191,6 +191,7 @@ class GptV4(Base): base_url = "https://api.openai.com/v1" self.api_key = key self.client = OpenAI(api_key=key, base_url=base_url) + self.async_client = AsyncOpenAI(api_key=key, base_url=base_url) self.model_name = model_name self.lang = lang super().__init__(**kwargs) @@ -221,6 +222,7 @@ class AzureGptV4(GptV4): api_key = json.loads(key).get("api_key", "") api_version = json.loads(key).get("api_version", "2024-02-01") self.client = AzureOpenAI(api_key=api_key, azure_endpoint=kwargs["base_url"], api_version=api_version) + self.async_client = AsyncAzureOpenAI(api_key=api_key, azure_endpoint=kwargs["base_url"], api_version=api_version) self.model_name = model_name self.lang = lang Base.__init__(self, **kwargs) @@ -243,7 +245,7 @@ class QWenCV(GptV4): base_url = "https://dashscope.aliyuncs.com/compatible-mode/v1" super().__init__(key, model_name, lang=lang, base_url=base_url, **kwargs) - def chat(self, system, history, gen_conf, images=None, video_bytes=None, filename="", **kwargs): + async def async_chat(self, system, history, gen_conf, images=None, video_bytes=None, filename="", **kwargs): if video_bytes: try: summary, summary_num_tokens = self._process_video(video_bytes, filename) @@ -313,7 +315,8 @@ class Zhipu4V(GptV4): _FACTORY_NAME = "ZHIPU-AI" def __init__(self, key, model_name="glm-4v", lang="Chinese", **kwargs): - self.client = ZhipuAI(api_key=key) + self.client = OpenAI(api_key=key, base_url="https://open.bigmodel.cn/api/paas/v4/") + self.async_client = AsyncOpenAI(api_key=key, base_url="https://open.bigmodel.cn/api/paas/v4/") self.model_name = model_name self.lang = lang Base.__init__(self, **kwargs) @@ -342,20 +345,20 @@ class Zhipu4V(GptV4): ) return response.json() - def chat(self, system, history, gen_conf, images=None, stream=False, **kwargs): + async def async_chat(self, system, history, gen_conf, images=None, **kwargs): if system and history and history[0].get("role") != "system": history.insert(0, {"role": "system", "content": system}) gen_conf = self._clean_conf(gen_conf) logging.info(json.dumps(history, ensure_ascii=False, indent=2)) - response = self.client.chat.completions.create(model=self.model_name, messages=self._form_history(system, history, images), stream=False, **gen_conf) + response = await self.async_client.chat.completions.create(model=self.model_name, messages=self._form_history(system, history, images), stream=False, **gen_conf) content = response.choices[0].message.content.strip() cleaned = re.sub(r"<\|(begin_of_box|end_of_box)\|>", "", content).strip() return cleaned, total_token_count_from_response(response) - def chat_streamly(self, system, history, gen_conf, images=None, **kwargs): + async def async_chat_streamly(self, system, history, gen_conf, images=None, **kwargs): from rag.llm.chat_model import LENGTH_NOTIFICATION_CN, LENGTH_NOTIFICATION_EN from rag.nlp import is_chinese @@ -366,8 +369,8 @@ class Zhipu4V(GptV4): tk_count = 0 try: logging.info(json.dumps(history, ensure_ascii=False, indent=2)) - response = self.client.chat.completions.create(model=self.model_name, messages=self._form_history(system, history, images), stream=True, **gen_conf) - for resp in response: + response = await self.async_client.chat.completions.create(model=self.model_name, messages=self._form_history(system, history, images), stream=True, **gen_conf) + async for resp in response: if not resp.choices[0].delta.content: continue delta = resp.choices[0].delta.content @@ -412,6 +415,7 @@ class StepFunCV(GptV4): if not base_url: base_url = "https://api.stepfun.com/v1" self.client = OpenAI(api_key=key, base_url=base_url) + self.async_client = AsyncOpenAI(api_key=key, base_url=base_url) self.model_name = model_name self.lang = lang Base.__init__(self, **kwargs) @@ -425,6 +429,7 @@ class VolcEngineCV(GptV4): base_url = "https://ark.cn-beijing.volces.com/api/v3" ark_api_key = json.loads(key).get("ark_api_key", "") self.client = OpenAI(api_key=ark_api_key, base_url=base_url) + self.async_client = AsyncOpenAI(api_key=ark_api_key, base_url=base_url) self.model_name = json.loads(key).get("ep_id", "") + json.loads(key).get("endpoint_id", "") self.lang = lang Base.__init__(self, **kwargs) @@ -438,6 +443,7 @@ class LmStudioCV(GptV4): raise ValueError("Local llm url cannot be None") base_url = urljoin(base_url, "v1") self.client = OpenAI(api_key="lm-studio", base_url=base_url) + self.async_client = AsyncOpenAI(api_key="lm-studio", base_url=base_url) self.model_name = model_name self.lang = lang Base.__init__(self, **kwargs) @@ -451,6 +457,7 @@ class OpenAI_APICV(GptV4): raise ValueError("url cannot be None") base_url = urljoin(base_url, "v1") self.client = OpenAI(api_key=key, base_url=base_url) + self.async_client = AsyncOpenAI(api_key=key, base_url=base_url) self.model_name = model_name.split("___")[0] self.lang = lang Base.__init__(self, **kwargs) @@ -491,6 +498,7 @@ class OpenRouterCV(GptV4): base_url = "https://openrouter.ai/api/v1" api_key = json.loads(key).get("api_key", "") self.client = OpenAI(api_key=api_key, base_url=base_url) + self.async_client = AsyncOpenAI(api_key=api_key, base_url=base_url) self.model_name = model_name self.lang = lang Base.__init__(self, **kwargs) @@ -522,6 +530,7 @@ class LocalAICV(GptV4): raise ValueError("Local cv model url cannot be None") base_url = urljoin(base_url, "v1") self.client = OpenAI(api_key="empty", base_url=base_url) + self.async_client = AsyncOpenAI(api_key="empty", base_url=base_url) self.model_name = model_name.split("___")[0] self.lang = lang Base.__init__(self, **kwargs) @@ -533,6 +542,7 @@ class XinferenceCV(GptV4): def __init__(self, key, model_name="", lang="Chinese", base_url="", **kwargs): base_url = urljoin(base_url, "v1") self.client = OpenAI(api_key=key, base_url=base_url) + self.async_client = AsyncOpenAI(api_key=key, base_url=base_url) self.model_name = model_name self.lang = lang Base.__init__(self, **kwargs) @@ -546,6 +556,7 @@ class GPUStackCV(GptV4): raise ValueError("Local llm url cannot be None") base_url = urljoin(base_url, "v1") self.client = OpenAI(api_key=key, base_url=base_url) + self.async_client = AsyncOpenAI(api_key=key, base_url=base_url) self.model_name = model_name self.lang = lang Base.__init__(self, **kwargs) @@ -635,19 +646,19 @@ class OllamaCV(Base): except Exception as e: return "**ERROR**: " + str(e), 0 - def chat(self, system, history, gen_conf, images=None, **kwargs): + async def async_chat(self, system, history, gen_conf, images=None, **kwargs): try: - response = self.client.chat(model=self.model_name, messages=self._form_history(system, history, images), options=self._clean_conf(gen_conf), keep_alive=self.keep_alive) + response = await asyncio.to_thread(self.client.chat, model=self.model_name, messages=self._form_history(system, history, images), options=self._clean_conf(gen_conf), keep_alive=self.keep_alive) ans = response["message"]["content"].strip() return ans, response["eval_count"] + response.get("prompt_eval_count", 0) except Exception as e: return "**ERROR**: " + str(e), 0 - def chat_streamly(self, system, history, gen_conf, images=None, **kwargs): + async def async_chat_streamly(self, system, history, gen_conf, images=None, **kwargs): ans = "" try: - response = self.client.chat(model=self.model_name, messages=self._form_history(system, history, images), stream=True, options=self._clean_conf(gen_conf), keep_alive=self.keep_alive) + response = await asyncio.to_thread(self.client.chat, model=self.model_name, messages=self._form_history(system, history, images), stream=True, options=self._clean_conf(gen_conf), keep_alive=self.keep_alive) for resp in response: if resp["done"]: yield resp.get("prompt_eval_count", 0) + resp.get("eval_count", 0) @@ -780,41 +791,41 @@ class GeminiCV(Base): ) return res.text, total_token_count_from_response(res) - def chat(self, system, history, gen_conf, images=None, video_bytes=None, filename="", **kwargs): + async def async_chat(self, system, history, gen_conf, images=None, video_bytes=None, filename="", **kwargs): if video_bytes: try: size = len(video_bytes) if video_bytes else 0 - logging.info(f"[GeminiCV] chat called with video: filename={filename} size={size}") - summary, summary_num_tokens = self._process_video(video_bytes, filename) + logging.info(f"[GeminiCV] async_chat called with video: filename={filename} size={size}") + summary, summary_num_tokens = await asyncio.to_thread(self._process_video, video_bytes, filename) return summary, summary_num_tokens except Exception as e: - logging.info(f"[GeminiCV] chat video error: {e}") + logging.info(f"[GeminiCV] async_chat video error: {e}") return "**ERROR**: " + str(e), 0 from google.genai import types history_len = len(history) if history else 0 images_len = len(images) if images else 0 - logging.info(f"[GeminiCV] chat called: history_len={history_len} images_len={images_len} gen_conf={gen_conf}") + logging.info(f"[GeminiCV] async_chat called: history_len={history_len} images_len={images_len} gen_conf={gen_conf}") generation_config = types.GenerateContentConfig( temperature=gen_conf.get("temperature", 0.3), top_p=gen_conf.get("top_p", 0.7), ) try: - response = self.client.models.generate_content( + response = await self.client.aio.models.generate_content( model=self.model_name, contents=self._form_history(system, history, images), config=generation_config, ) ans = response.text - logging.info("[GeminiCV] chat completed") + logging.info("[GeminiCV] async_chat completed") return ans, total_token_count_from_response(response) except Exception as e: - logging.warning(f"[GeminiCV] chat error: {e}") + logging.warning(f"[GeminiCV] async_chat error: {e}") return "**ERROR**: " + str(e), 0 - def chat_streamly(self, system, history, gen_conf, images=None, **kwargs): + async def async_chat_streamly(self, system, history, gen_conf, images=None, **kwargs): ans = "" response = None try: @@ -826,15 +837,15 @@ class GeminiCV(Base): ) history_len = len(history) if history else 0 images_len = len(images) if images else 0 - logging.info(f"[GeminiCV] chat_streamly called: history_len={history_len} images_len={images_len} gen_conf={gen_conf}") + logging.info(f"[GeminiCV] async_chat_streamly called: history_len={history_len} images_len={images_len} gen_conf={gen_conf}") - response_stream = self.client.models.generate_content_stream( + response_stream = await self.client.aio.models.generate_content_stream( model=self.model_name, contents=self._form_history(system, history, images), config=generation_config, ) - for chunk in response_stream: + async for chunk in response_stream: if chunk.text: ans += chunk.text yield chunk.text @@ -939,17 +950,17 @@ class NvidiaCV(Base): response = self._request(vision_prompt) return (response["choices"][0]["message"]["content"].strip(), total_token_count_from_response(response)) - def chat(self, system, history, gen_conf, images=None, **kwargs): + async def async_chat(self, system, history, gen_conf, images=None, **kwargs): try: - response = self._request(self._form_history(system, history, images), gen_conf) + response = await asyncio.to_thread(self._request, self._form_history(system, history, images), gen_conf) return (response["choices"][0]["message"]["content"].strip(), total_token_count_from_response(response)) except Exception as e: return "**ERROR**: " + str(e), 0 - def chat_streamly(self, system, history, gen_conf, images=None, **kwargs): + async def async_chat_streamly(self, system, history, gen_conf, images=None, **kwargs): total_tokens = 0 try: - response = self._request(self._form_history(system, history, images), gen_conf) + response = await asyncio.to_thread(self._request, self._form_history(system, history, images), gen_conf) cnt = response["choices"][0]["message"]["content"] total_tokens += total_token_count_from_response(response) for resp in cnt: @@ -967,6 +978,7 @@ class AnthropicCV(Base): import anthropic self.client = anthropic.Anthropic(api_key=key) + self.async_client = anthropic.AsyncAnthropic(api_key=key) self.model_name = model_name self.system = "" self.max_tokens = 8192 @@ -1012,17 +1024,18 @@ class AnthropicCV(Base): gen_conf["max_tokens"] = self.max_tokens return gen_conf - def chat(self, system, history, gen_conf, images=None, **kwargs): + async def async_chat(self, system, history, gen_conf, images=None, **kwargs): gen_conf = self._clean_conf(gen_conf) ans = "" try: - response = self.client.messages.create( + response = await self.async_client.messages.create( model=self.model_name, messages=self._form_history(system, history, images), system=system, stream=False, **gen_conf, - ).to_dict() + ) + response = response.to_dict() ans = response["content"][0]["text"] if response["stop_reason"] == "max_tokens": ans += "...\nFor the content length reason, it stopped, continue?" if is_english([ans]) else "······\n由于长度的原因,回答被截断了,要继续吗?" @@ -1033,11 +1046,11 @@ class AnthropicCV(Base): except Exception as e: return ans + "\n**ERROR**: " + str(e), 0 - def chat_streamly(self, system, history, gen_conf, images=None, **kwargs): + async def async_chat_streamly(self, system, history, gen_conf, images=None, **kwargs): gen_conf = self._clean_conf(gen_conf) total_tokens = 0 try: - response = self.client.messages.create( + response = self.async_client.messages.create( model=self.model_name, messages=self._form_history(system, history, images), system=system, @@ -1045,7 +1058,7 @@ class AnthropicCV(Base): **gen_conf, ) think = False - for res in response: + async for res in response: if res.type == "content_block_delta": if res.delta.type == "thinking_delta" and res.delta.thinking: if not think: @@ -1117,18 +1130,18 @@ class GoogleCV(AnthropicCV, GeminiCV): else: return GeminiCV.describe_with_prompt(self, image, prompt) - def chat(self, system, history, gen_conf, images=None, **kwargs): + async def async_chat(self, system, history, gen_conf, images=None, **kwargs): if "claude" in self.model_name: - return AnthropicCV.chat(self, system, history, gen_conf, images) + return await AnthropicCV.async_chat(self, system, history, gen_conf, images) else: - return GeminiCV.chat(self, system, history, gen_conf, images) + return await GeminiCV.async_chat(self, system, history, gen_conf, images) - def chat_streamly(self, system, history, gen_conf, images=None, **kwargs): + async def async_chat_streamly(self, system, history, gen_conf, images=None, **kwargs): if "claude" in self.model_name: - for ans in AnthropicCV.chat_streamly(self, system, history, gen_conf, images): + async for ans in AnthropicCV.async_chat_streamly(self, system, history, gen_conf, images): yield ans else: - for ans in GeminiCV.chat_streamly(self, system, history, gen_conf, images): + async for ans in GeminiCV.async_chat_streamly(self, system, history, gen_conf, images): yield ans From 28bc87c5e24ca0660fa5344c32a871e96862db0a Mon Sep 17 00:00:00 2001 From: chanx <1243304602@qq.com> Date: Tue, 9 Dec 2025 14:52:58 +0800 Subject: [PATCH 7/8] Feature: Memory interface integration testing (#11833) ### What problem does this PR solve? Feature: Memory interface integration testing ### Type of change - [x] New Feature (non-breaking change which adds functionality) --- web/src/components/confirm-delete-dialog.tsx | 29 +- web/src/components/dynamic-form.tsx | 534 +++++++++--------- web/src/components/ragflow-form.tsx | 7 +- web/src/layouts/next-header.tsx | 1 + web/src/locales/en.ts | 31 +- .../configuration/common-item.tsx | 13 + .../dataset-setting/configuration/naive.tsx | 3 +- .../dataset/dataset-setting/form-schema.ts | 1 + .../pages/dataset/dataset-setting/index.tsx | 1 + .../dataset-setting/permission-form-field.tsx | 24 +- web/src/pages/memories/add-or-edit-modal.tsx | 39 +- web/src/pages/memories/constants/index.tsx | 22 +- web/src/pages/memories/hooks.ts | 20 +- web/src/pages/memories/index.tsx | 8 +- web/src/pages/memories/interface.ts | 20 +- web/src/pages/memories/memory-card.tsx | 2 +- web/src/pages/memories/memory-dropdown.tsx | 22 +- web/src/pages/memory/constant.tsx | 2 + .../pages/memory/hooks/use-memory-messages.ts | 59 -- .../pages/memory/hooks/use-memory-setting.ts | 26 +- web/src/pages/memory/memory-message/hook.ts | 128 +++++ web/src/pages/memory/memory-message/index.tsx | 2 +- .../pages/memory/memory-message/interface.ts | 5 + .../memory/memory-message/message-table.tsx | 167 +++++- .../memory-setting/advanced-settings-form.tsx | 159 ++++++ .../memory/memory-setting/basic-form.tsx | 53 ++ web/src/pages/memory/memory-setting/hook.ts | 42 ++ web/src/pages/memory/memory-setting/index.tsx | 113 +++- .../memory-setting/memory-model-form.tsx | 74 +++ web/src/pages/memory/sidebar/index.tsx | 21 +- web/src/services/memory-service.ts | 29 +- web/src/utils/api.ts | 12 +- 32 files changed, 1168 insertions(+), 501 deletions(-) delete mode 100644 web/src/pages/memory/hooks/use-memory-messages.ts create mode 100644 web/src/pages/memory/memory-message/hook.ts create mode 100644 web/src/pages/memory/memory-setting/advanced-settings-form.tsx create mode 100644 web/src/pages/memory/memory-setting/basic-form.tsx create mode 100644 web/src/pages/memory/memory-setting/hook.ts create mode 100644 web/src/pages/memory/memory-setting/memory-model-form.tsx diff --git a/web/src/components/confirm-delete-dialog.tsx b/web/src/components/confirm-delete-dialog.tsx index cc85e8153..22ac73fcb 100644 --- a/web/src/components/confirm-delete-dialog.tsx +++ b/web/src/components/confirm-delete-dialog.tsx @@ -45,7 +45,7 @@ export function ConfirmDeleteDialog({ const { t } = useTranslation(); if (hidden) { - return children; + return children || <>; } return ( @@ -54,7 +54,7 @@ export function ConfirmDeleteDialog({ open={open} defaultOpen={defaultOpen} > - {children} + {children && {children}} { e.stopPropagation(); @@ -109,23 +109,28 @@ export function ConfirmDeleteDialog({ export const ConfirmDeleteDialogNode = ({ avatar, name, + warnText, children, }: { avatar?: { avatar?: string; name?: string; isPerson?: boolean }; name?: string; + warnText?: string; children?: React.ReactNode; }) => { return ( -
- {avatar && ( - - )} - {name &&
{name}
} +
+
+ {avatar && ( + + )} + {name &&
{name}
} +
+ {warnText &&
{warnText}
} {children}
); diff --git a/web/src/components/dynamic-form.tsx b/web/src/components/dynamic-form.tsx index 2fdc55af0..2926f32b6 100644 --- a/web/src/components/dynamic-form.tsx +++ b/web/src/components/dynamic-form.tsx @@ -110,7 +110,7 @@ export interface DynamicFormRef { } // Generate Zod validation schema based on field configurations -const generateSchema = (fields: FormFieldConfig[]): ZodSchema => { +export const generateSchema = (fields: FormFieldConfig[]): ZodSchema => { const schema: Record = {}; const nestedSchemas: Record> = {}; @@ -311,6 +311,271 @@ const generateDefaultValues = ( return defaultValues as DefaultValues; }; +// Render form fields +export const RenderField = ({ + field, + labelClassName, +}: { + field: FormFieldConfig; + labelClassName?: string; +}) => { + const form = useFormContext(); + if (field.render) { + if (field.type === FormFieldType.Custom && field.hideLabel) { + return
{field.render({})}
; + } + return ( + + {(fieldProps) => { + const finalFieldProps = field.onChange + ? { + ...fieldProps, + onChange: (e: any) => { + fieldProps.onChange(e); + field.onChange?.(e.target?.value ?? e); + }, + } + : fieldProps; + return field.render?.(finalFieldProps); + }} + + ); + } + switch (field.type) { + case FormFieldType.Textarea: + return ( + + {(fieldProps) => { + const finalFieldProps = field.onChange + ? { + ...fieldProps, + onChange: (e: any) => { + fieldProps.onChange(e); + field.onChange?.(e.target.value); + }, + } + : fieldProps; + return ( +