From c8ab9079b3debc7595d456de809755ba5fcf8515 Mon Sep 17 00:00:00 2001 From: buua436 <66937541+buua436@users.noreply.github.com> Date: Thu, 20 Nov 2025 19:00:38 +0800 Subject: [PATCH 1/6] Fix:improve multi-column document detection (#11415) ### What problem does this PR solve? change: improve multi-column document detection ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue) --- deepdoc/parser/pdf_parser.py | 99 ++++++++++++++++++++++-------------- 1 file changed, 61 insertions(+), 38 deletions(-) diff --git a/deepdoc/parser/pdf_parser.py b/deepdoc/parser/pdf_parser.py index 5bc877a6a..6d8431c82 100644 --- a/deepdoc/parser/pdf_parser.py +++ b/deepdoc/parser/pdf_parser.py @@ -33,6 +33,8 @@ import xgboost as xgb from huggingface_hub import snapshot_download from PIL import Image from pypdf import PdfReader as pdf2_read +from sklearn.cluster import KMeans +from sklearn.metrics import silhouette_score from common.file_utils import get_project_base_directory from common.misc_utils import pip_install_torch @@ -353,7 +355,6 @@ class RAGFlowPdfParser: def _assign_column(self, boxes, zoomin=3): if not boxes: return boxes - if all("col_id" in b for b in boxes): return boxes @@ -361,61 +362,80 @@ class RAGFlowPdfParser: for b in boxes: by_page[b["page_number"]].append(b) - page_info = {} # pg -> dict(page_w, left_edge, cand_cols) - counter = Counter() + page_cols = {} for pg, bxs in by_page.items(): if not bxs: - page_info[pg] = {"page_w": 1.0, "left_edge": 0.0, "cand": 1} - counter[1] += 1 + page_cols[pg] = 1 continue - if hasattr(self, "page_images") and self.page_images and len(self.page_images) >= pg: - page_w = self.page_images[pg - 1].size[0] / max(1, zoomin) - left_edge = 0.0 - else: - xs0 = [box["x0"] for box in bxs] - xs1 = [box["x1"] for box in bxs] - left_edge = float(min(xs0)) - page_w = max(1.0, float(max(xs1) - left_edge)) + x0s_raw = np.array([b["x0"] for b in bxs], dtype=float) - widths = [max(1.0, (box["x1"] - box["x0"])) for box in bxs] - median_w = float(np.median(widths)) if widths else 1.0 + min_x0 = np.min(x0s_raw) + max_x1 = np.max([b["x1"] for b in bxs]) + width = max_x1 - min_x0 - raw_cols = int(page_w / max(1.0, median_w)) + INDENT_TOL = width * 0.12 + x0s = [] + for x in x0s_raw: + if abs(x - min_x0) < INDENT_TOL: + x0s.append([min_x0]) + else: + x0s.append([x]) + x0s = np.array(x0s, dtype=float) + + max_try = min(4, len(bxs)) + if max_try < 2: + max_try = 1 + best_k = 1 + best_score = -1 - # cand = raw_cols if (raw_cols >= 2 and median_w < page_w / raw_cols * 0.8) else 1 - cand = raw_cols + for k in range(1, max_try + 1): + km = KMeans(n_clusters=k, n_init="auto") + labels = km.fit_predict(x0s) - page_info[pg] = {"page_w": page_w, "left_edge": left_edge, "cand": cand} - counter[cand] += 1 + centers = np.sort(km.cluster_centers_.flatten()) + if len(centers) > 1: + try: + score = silhouette_score(x0s, labels) + except ValueError: + continue + else: + score = 0 + print(f"{k=},{score=}",flush=True) + if score > best_score: + best_score = score + best_k = k - logging.info(f"[Page {pg}] median_w={median_w:.2f}, page_w={page_w:.2f}, raw_cols={raw_cols}, cand={cand}") + page_cols[pg] = best_k + logging.info(f"[Page {pg}] best_score={best_score:.2f}, best_k={best_k}") - global_cols = counter.most_common(1)[0][0] + + global_cols = Counter(page_cols.values()).most_common(1)[0][0] logging.info(f"Global column_num decided by majority: {global_cols}") + for pg, bxs in by_page.items(): if not bxs: continue + k = page_cols[pg] + if len(bxs) < k: + k = 1 + x0s = np.array([[b["x0"]] for b in bxs], dtype=float) + km = KMeans(n_clusters=k, n_init="auto") + labels = km.fit_predict(x0s) - page_w = page_info[pg]["page_w"] - left_edge = page_info[pg]["left_edge"] + centers = km.cluster_centers_.flatten() + order = np.argsort(centers) - if global_cols == 1: - for box in bxs: - box["col_id"] = 0 - continue + remap = {orig: new for new, orig in enumerate(order)} - for box in bxs: - w = box["x1"] - box["x0"] - if w >= 0.8 * page_w: - box["col_id"] = 0 - continue - cx = 0.5 * (box["x0"] + box["x1"]) - norm_cx = (cx - left_edge) / page_w - norm_cx = max(0.0, min(norm_cx, 0.999999)) - box["col_id"] = int(min(global_cols - 1, norm_cx * global_cols)) + for b, lb in zip(bxs, labels): + b["col_id"] = remap[lb] + + grouped = defaultdict(list) + for b in bxs: + grouped[b["col_id"]].append(b) return boxes @@ -1303,7 +1323,10 @@ class RAGFlowPdfParser: positions = [] for ii, (pns, left, right, top, bottom) in enumerate(poss): - right = left + max_width + if 0 < ii < len(poss) - 1: + right = max(left + 10, right) + else: + right = left + max_width bottom *= ZM for pn in pns[1:]: if 0 <= pn - 1 < page_count: From d3d2ccc76c1078dd401707eb6b3bf6db51200d1d Mon Sep 17 00:00:00 2001 From: Billy Bao Date: Thu, 20 Nov 2025 19:07:17 +0800 Subject: [PATCH 2/6] Feat: add more chunking method (#11413) ### What problem does this PR solve? Feat: add more chunking method #11311 ### Type of change - [x] New Feature (non-breaking change which adds functionality) --- deepdoc/parser/docling_parser.py | 14 +++++++---- deepdoc/parser/mineru_parser.py | 12 +++++++--- docker/.env | 9 ++++++- rag/app/manual.py | 5 ++-- rag/app/naive.py | 4 ++++ rag/app/paper.py | 41 +++++++++++++++++++++++++------- 6 files changed, 66 insertions(+), 19 deletions(-) diff --git a/deepdoc/parser/docling_parser.py b/deepdoc/parser/docling_parser.py index 59fec9250..965f82265 100644 --- a/deepdoc/parser/docling_parser.py +++ b/deepdoc/parser/docling_parser.py @@ -187,7 +187,7 @@ class DoclingParser(RAGFlowPdfParser): bbox = _BBox(int(pn), bb[0], bb[1], bb[2], bb[3]) yield (DoclingContentType.EQUATION.value, text, bbox) - def _transfer_to_sections(self, doc) -> list[tuple[str, str]]: + def _transfer_to_sections(self, doc, parse_method: str) -> list[tuple[str, str]]: sections: list[tuple[str, str]] = [] for typ, payload, bbox in self._iter_doc_items(doc): if typ == DoclingContentType.TEXT.value: @@ -200,7 +200,12 @@ class DoclingParser(RAGFlowPdfParser): continue tag = self._make_line_tag(bbox) if isinstance(bbox,_BBox) else "" - sections.append((section, tag)) + if parse_method == "manual": + sections.append((section, typ, tag)) + elif parse_method == "paper": + sections.append((section + tag, typ)) + else: + sections.append((section, tag)) return sections def cropout_docling_table(self, page_no: int, bbox: tuple[float, float, float, float], zoomin: int = 1): @@ -282,7 +287,8 @@ class DoclingParser(RAGFlowPdfParser): output_dir: Optional[str] = None, lang: Optional[str] = None, method: str = "auto", - delete_output: bool = True, + delete_output: bool = True, + parse_method: str = "raw" ): if not self.check_installation(): @@ -318,7 +324,7 @@ class DoclingParser(RAGFlowPdfParser): if callback: callback(0.7, f"[Docling] Parsed doc: {getattr(doc, 'num_pages', 'n/a')} pages") - sections = self._transfer_to_sections(doc) + sections = self._transfer_to_sections(doc, parse_method=parse_method) tables = self._transfer_to_tables(doc) if callback: diff --git a/deepdoc/parser/mineru_parser.py b/deepdoc/parser/mineru_parser.py index d2b694188..d4834de39 100644 --- a/deepdoc/parser/mineru_parser.py +++ b/deepdoc/parser/mineru_parser.py @@ -476,7 +476,7 @@ class MinerUParser(RAGFlowPdfParser): item[key] = str((subdir / item[key]).resolve()) return data - def _transfer_to_sections(self, outputs: list[dict[str, Any]]): + def _transfer_to_sections(self, outputs: list[dict[str, Any]], parse_method: str = None): sections = [] for output in outputs: match output["type"]: @@ -497,7 +497,11 @@ class MinerUParser(RAGFlowPdfParser): case MinerUContentType.DISCARDED: pass - if section: + if section and parse_method == "manual": + sections.append((section, output["type"], self._line_tag(output))) + elif section and parse_method == "paper": + sections.append((section + self._line_tag(output), output["type"])) + else: sections.append((section, self._line_tag(output))) return sections @@ -516,6 +520,7 @@ class MinerUParser(RAGFlowPdfParser): method: str = "auto", server_url: Optional[str] = None, delete_output: bool = True, + parse_method: str = "raw" ) -> tuple: import shutil @@ -565,7 +570,8 @@ class MinerUParser(RAGFlowPdfParser): self.logger.info(f"[MinerU] Parsed {len(outputs)} blocks from PDF.") if callback: callback(0.75, f"[MinerU] Parsed {len(outputs)} blocks from PDF.") - return self._transfer_to_sections(outputs), self._transfer_to_tables(outputs) + + return self._transfer_to_sections(outputs, parse_method), self._transfer_to_tables(outputs) finally: if temp_pdf and temp_pdf.exists(): try: diff --git a/docker/.env b/docker/.env index d7e4b025f..6423b7824 100644 --- a/docker/.env +++ b/docker/.env @@ -230,9 +230,16 @@ REGISTER_ENABLED=1 # SANDBOX_MAX_MEMORY=256m # b, k, m, g # SANDBOX_TIMEOUT=10s # s, m, 1m30s -# Enable DocLing and Mineru +# Enable DocLing USE_DOCLING=false + +# Enable Mineru USE_MINERU=false +MINERU_EXECUTABLE="$HOME/uv_tools/.venv/bin/mineru" +MINERU_DELETE_OUTPUT=0 # keep output directory +MINERU_BACKEND=pipeline # or another backend you prefer + + # pptx support DOTNET_SYSTEM_GLOBALIZATION_INVARIANT=1 \ No newline at end of file diff --git a/rag/app/manual.py b/rag/app/manual.py index 5808e2498..124864041 100644 --- a/rag/app/manual.py +++ b/rag/app/manual.py @@ -213,6 +213,7 @@ def chunk(filename, binary=None, from_page=0, to_page=100000, lang = lang, callback = callback, pdf_cls = Pdf, + parse_method = "manual", **kwargs ) @@ -225,7 +226,7 @@ def chunk(filename, binary=None, from_page=0, to_page=100000, elif len(section) != 3: raise ValueError(f"Unexpected section length: {len(section)} (value={section!r})") - txt, sec_id, poss = section + txt, layoutno, poss = section if isinstance(poss, str): poss = pdf_parser.extract_positions(poss) first = poss[0] # tuple: ([pn], x1, x2, y1, y2) @@ -235,7 +236,7 @@ def chunk(filename, binary=None, from_page=0, to_page=100000, pn = pn[0] # [pn] -> pn poss[0] = (pn, *first[1:]) - return (txt, sec_id, poss) + return (txt, layoutno, poss) sections = [_normalize_section(sec) for sec in sections] diff --git a/rag/app/naive.py b/rag/app/naive.py index 49dca17af..562336d7f 100644 --- a/rag/app/naive.py +++ b/rag/app/naive.py @@ -59,6 +59,7 @@ def by_mineru(filename, binary=None, from_page=0, to_page=100000, lang="Chinese" mineru_executable = os.environ.get("MINERU_EXECUTABLE", "mineru") mineru_api = os.environ.get("MINERU_APISERVER", "http://host.docker.internal:9987") pdf_parser = MinerUParser(mineru_path=mineru_executable, mineru_api=mineru_api) + parse_method = kwargs.get("parse_method", "raw") if not pdf_parser.check_installation(): callback(-1, "MinerU not found.") @@ -72,12 +73,14 @@ def by_mineru(filename, binary=None, from_page=0, to_page=100000, lang="Chinese" backend=os.environ.get("MINERU_BACKEND", "pipeline"), server_url=os.environ.get("MINERU_SERVER_URL", ""), delete_output=bool(int(os.environ.get("MINERU_DELETE_OUTPUT", 1))), + parse_method=parse_method ) return sections, tables, pdf_parser def by_docling(filename, binary=None, from_page=0, to_page=100000, lang="Chinese", callback=None, pdf_cls = None ,**kwargs): pdf_parser = DoclingParser() + parse_method = kwargs.get("parse_method", "raw") if not pdf_parser.check_installation(): callback(-1, "Docling not found.") @@ -89,6 +92,7 @@ def by_docling(filename, binary=None, from_page=0, to_page=100000, lang="Chinese callback=callback, output_dir=os.environ.get("MINERU_OUTPUT_DIR", ""), delete_output=bool(int(os.environ.get("MINERU_DELETE_OUTPUT", 1))), + parse_method=parse_method ) return sections, tables, pdf_parser diff --git a/rag/app/paper.py b/rag/app/paper.py index d95976c9f..222be0762 100644 --- a/rag/app/paper.py +++ b/rag/app/paper.py @@ -21,8 +21,10 @@ import re from deepdoc.parser.figure_parser import vision_figure_parser_pdf_wrapper from common.constants import ParserType from rag.nlp import rag_tokenizer, tokenize, tokenize_table, add_positions, bullets_category, title_frequency, tokenize_chunks -from deepdoc.parser import PdfParser, PlainParser +from deepdoc.parser import PdfParser import numpy as np +from rag.app.naive import by_plaintext, PARSERS + class Pdf(PdfParser): def __init__(self): @@ -147,19 +149,40 @@ def chunk(filename, binary=None, from_page=0, to_page=100000, "parser_config", { "chunk_token_num": 512, "delimiter": "\n!?。;!?", "layout_recognize": "DeepDOC"}) if re.search(r"\.pdf$", filename, re.IGNORECASE): - if parser_config.get("layout_recognize", "DeepDOC") == "Plain Text": - pdf_parser = PlainParser() + layout_recognizer = parser_config.get("layout_recognize", "DeepDOC") + + if isinstance(layout_recognizer, bool): + layout_recognizer = "DeepDOC" if layout_recognizer else "Plain Text" + + name = layout_recognizer.strip().lower() + pdf_parser = PARSERS.get(name, by_plaintext) + callback(0.1, "Start to parse.") + + if name == "deepdoc": + pdf_parser = Pdf() + paper = pdf_parser(filename if not binary else binary, + from_page=from_page, to_page=to_page, callback=callback) + else: + sections, tables, pdf_parser = pdf_parser( + filename=filename, + binary=binary, + from_page=from_page, + to_page=to_page, + lang=lang, + callback=callback, + pdf_cls=Pdf, + parse_method="paper", + **kwargs + ) + paper = { "title": filename, "authors": " ", "abstract": "", - "sections": pdf_parser(filename if not binary else binary, from_page=from_page, to_page=to_page)[0], - "tables": [] + "sections": sections, + "tables": tables } - else: - pdf_parser = Pdf() - paper = pdf_parser(filename if not binary else binary, - from_page=from_page, to_page=to_page, callback=callback) + tbls=paper["tables"] tbls=vision_figure_parser_pdf_wrapper(tbls=tbls,callback=callback,**kwargs) paper["tables"] = tbls From 820934fc779609247a03a6eaa6886149b4fcd691 Mon Sep 17 00:00:00 2001 From: Kevin Hu Date: Thu, 20 Nov 2025 19:51:25 +0800 Subject: [PATCH 3/6] Fix: no result if metadata returns none. (#11412) ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue) --- agent/tools/retrieval.py | 6 +++--- api/apps/chunk_app.py | 4 ++-- api/apps/sdk/doc.py | 2 ++ api/apps/sdk/session.py | 12 ++++++------ api/db/services/dialog_service.py | 15 ++++++++------- 5 files changed, 21 insertions(+), 18 deletions(-) diff --git a/agent/tools/retrieval.py b/agent/tools/retrieval.py index c3a01e517..fd9096cb6 100644 --- a/agent/tools/retrieval.py +++ b/agent/tools/retrieval.py @@ -137,7 +137,7 @@ class Retrieval(ToolBase, ABC): if not doc_ids: doc_ids = None elif self._param.meta_data_filter.get("method") == "manual": - filters=self._param.meta_data_filter["manual"] + filters = self._param.meta_data_filter["manual"] for flt in filters: pat = re.compile(self.variable_ref_patt) s = flt["value"] @@ -166,8 +166,8 @@ class Retrieval(ToolBase, ABC): out_parts.append(s[last:]) flt["value"] = "".join(out_parts) doc_ids.extend(meta_filter(metas, filters, self._param.meta_data_filter.get("logic", "and"))) - if not doc_ids: - doc_ids = None + if filters and not doc_ids: + doc_ids = ["-999"] if self._param.cross_languages: query = cross_languages(kbs[0].tenant_id, None, query, self._param.cross_languages) diff --git a/api/apps/chunk_app.py b/api/apps/chunk_app.py index e121bcba7..b43fb9af1 100644 --- a/api/apps/chunk_app.py +++ b/api/apps/chunk_app.py @@ -311,8 +311,8 @@ async def retrieval_test(): doc_ids = None elif meta_data_filter.get("method") == "manual": doc_ids.extend(meta_filter(metas, meta_data_filter["manual"], meta_data_filter.get("logic", "and"))) - if not doc_ids: - doc_ids = None + if meta_data_filter["manual"] and not doc_ids: + doc_ids = ["-999"] try: tenants = UserTenantService.query(user_id=current_user.id) diff --git a/api/apps/sdk/doc.py b/api/apps/sdk/doc.py index 84300ac3c..30fbd835e 100644 --- a/api/apps/sdk/doc.py +++ b/api/apps/sdk/doc.py @@ -1445,6 +1445,8 @@ async def retrieval_test(tenant_id): metadata_condition = req.get("metadata_condition", {}) or {} metas = DocumentService.get_meta_by_kbs(kb_ids) doc_ids = meta_filter(metas, convert_conditions(metadata_condition), metadata_condition.get("logic", "and")) + if metadata_condition and not doc_ids: + doc_ids = ["-999"] similarity_threshold = float(req.get("similarity_threshold", 0.2)) vector_similarity_weight = float(req.get("vector_similarity_weight", 0.3)) top = int(req.get("top_k", 1024)) diff --git a/api/apps/sdk/session.py b/api/apps/sdk/session.py index 533375622..074401ede 100644 --- a/api/apps/sdk/session.py +++ b/api/apps/sdk/session.py @@ -446,8 +446,8 @@ async def agent_completions(tenant_id, agent_id): if req.get("stream", True): - def generate(): - for answer in agent_completion(tenant_id=tenant_id, agent_id=agent_id, **req): + async def generate(): + async for answer in agent_completion(tenant_id=tenant_id, agent_id=agent_id, **req): if isinstance(answer, str): try: ans = json.loads(answer[5:]) # remove "data:" @@ -471,7 +471,7 @@ async def agent_completions(tenant_id, agent_id): full_content = "" reference = {} final_ans = "" - for answer in agent_completion(tenant_id=tenant_id, agent_id=agent_id, **req): + async for answer in agent_completion(tenant_id=tenant_id, agent_id=agent_id, **req): try: ans = json.loads(answer[5:]) @@ -873,7 +873,7 @@ async def agent_bot_completions(agent_id): resp.headers.add_header("Content-Type", "text/event-stream; charset=utf-8") return resp - for answer in agent_completion(objs[0].tenant_id, agent_id, **req): + async for answer in agent_completion(objs[0].tenant_id, agent_id, **req): return get_result(data=answer) @@ -981,8 +981,8 @@ async def retrieval_test_embedded(): doc_ids = None elif meta_data_filter.get("method") == "manual": doc_ids.extend(meta_filter(metas, meta_data_filter["manual"], meta_data_filter.get("logic", "and"))) - if not doc_ids: - doc_ids = None + if meta_data_filter["manual"] and not doc_ids: + doc_ids = ["-999"] try: tenants = UserTenantService.query(user_id=tenant_id) diff --git a/api/db/services/dialog_service.py b/api/db/services/dialog_service.py index db878574d..0a09ea532 100644 --- a/api/db/services/dialog_service.py +++ b/api/db/services/dialog_service.py @@ -415,9 +415,10 @@ def chat(dialog, messages, stream=True, **kwargs): if not attachments: attachments = None elif dialog.meta_data_filter.get("method") == "manual": - attachments.extend(meta_filter(metas, dialog.meta_data_filter["manual"], dialog.meta_data_filter.get("logic", "and"))) - if not attachments: - attachments = None + conds = dialog.meta_data_filter["manual"] + attachments.extend(meta_filter(metas, conds, dialog.meta_data_filter.get("logic", "and"))) + if conds and not attachments: + attachments = ["-999"] if prompt_config.get("keyword", False): questions[-1] += keyword_extraction(chat_mdl, questions[-1]) @@ -787,8 +788,8 @@ def ask(question, kb_ids, tenant_id, chat_llm_name=None, search_config={}): doc_ids = None elif meta_data_filter.get("method") == "manual": doc_ids.extend(meta_filter(metas, meta_data_filter["manual"], meta_data_filter.get("logic", "and"))) - if not doc_ids: - doc_ids = None + if meta_data_filter["manual"] and not doc_ids: + doc_ids = ["-999"] kbinfos = retriever.retrieval( question=question, @@ -862,8 +863,8 @@ def gen_mindmap(question, kb_ids, tenant_id, search_config={}): doc_ids = None elif meta_data_filter.get("method") == "manual": doc_ids.extend(meta_filter(metas, meta_data_filter["manual"], meta_data_filter.get("logic", "and"))) - if not doc_ids: - doc_ids = None + if meta_data_filter["manual"] and not doc_ids: + doc_ids = ["-999"] ranks = settings.retriever.retrieval( question=question, From 065917bf1c22300a0ddaffc04a4bdaf608e10a32 Mon Sep 17 00:00:00 2001 From: Yongteng Lei Date: Thu, 20 Nov 2025 19:51:37 +0800 Subject: [PATCH 4/6] Feat: enriches Notion connector (#11414) ### What problem does this PR solve? Enriches rich text (links, mentions, equations), flags to-do blocks with [x]/[ ], captures block-level equations, builds table HTML, downloads attachments. ### Type of change - [x] New Feature (non-breaking change which adds functionality) --- common/data_source/notion_connector.py | 356 ++++++++++++++++++------- 1 file changed, 264 insertions(+), 92 deletions(-) diff --git a/common/data_source/notion_connector.py b/common/data_source/notion_connector.py index 8c6a522ad..e29bbbe76 100644 --- a/common/data_source/notion_connector.py +++ b/common/data_source/notion_connector.py @@ -1,38 +1,45 @@ +import html import logging from collections.abc import Generator +from datetime import datetime, timezone +from pathlib import Path from typing import Any, Optional +from urllib.parse import urlparse + from retry import retry from common.data_source.config import ( INDEX_BATCH_SIZE, - DocumentSource, NOTION_CONNECTOR_DISABLE_RECURSIVE_PAGE_LOOKUP + NOTION_CONNECTOR_DISABLE_RECURSIVE_PAGE_LOOKUP, + DocumentSource, +) +from common.data_source.exceptions import ( + ConnectorMissingCredentialError, + ConnectorValidationError, + CredentialExpiredError, + InsufficientPermissionsError, + UnexpectedValidationError, ) from common.data_source.interfaces import ( LoadConnector, PollConnector, - SecondsSinceUnixEpoch + SecondsSinceUnixEpoch, ) from common.data_source.models import ( Document, - TextSection, GenerateDocumentsOutput -) -from common.data_source.exceptions import ( - ConnectorValidationError, - CredentialExpiredError, - InsufficientPermissionsError, - UnexpectedValidationError, ConnectorMissingCredentialError -) -from common.data_source.models import ( - NotionPage, + GenerateDocumentsOutput, NotionBlock, - NotionSearchResponse + NotionPage, + NotionSearchResponse, + TextSection, ) from common.data_source.utils import ( - rl_requests, batch_generator, + datetime_from_string, fetch_notion_data, + filter_pages_by_time, properties_to_str, - filter_pages_by_time, datetime_from_string + rl_requests, ) @@ -61,11 +68,9 @@ class NotionConnector(LoadConnector, PollConnector): self.recursive_index_enabled = recursive_index_enabled or bool(root_page_id) @retry(tries=3, delay=1, backoff=2) - def _fetch_child_blocks( - self, block_id: str, cursor: Optional[str] = None - ) -> dict[str, Any] | None: + def _fetch_child_blocks(self, block_id: str, cursor: Optional[str] = None) -> dict[str, Any] | None: """Fetch all child blocks via the Notion API.""" - logging.debug(f"Fetching children of block with ID '{block_id}'") + logging.debug(f"[Notion]: Fetching children of block with ID {block_id}") block_url = f"https://api.notion.com/v1/blocks/{block_id}/children" query_params = {"start_cursor": cursor} if cursor else None @@ -79,49 +84,42 @@ class NotionConnector(LoadConnector, PollConnector): response.raise_for_status() return response.json() except Exception as e: - if hasattr(e, 'response') and e.response.status_code == 404: - logging.error( - f"Unable to access block with ID '{block_id}'. " - f"This is likely due to the block not being shared with the integration." - ) + if hasattr(e, "response") and e.response.status_code == 404: + logging.error(f"[Notion]: Unable to access block with ID {block_id}. This is likely due to the block not being shared with the integration.") return None else: - logging.exception(f"Error fetching blocks: {e}") + logging.exception(f"[Notion]: Error fetching blocks: {e}") raise @retry(tries=3, delay=1, backoff=2) def _fetch_page(self, page_id: str) -> NotionPage: """Fetch a page from its ID via the Notion API.""" - logging.debug(f"Fetching page for ID '{page_id}'") + logging.debug(f"[Notion]: Fetching page for ID {page_id}") page_url = f"https://api.notion.com/v1/pages/{page_id}" try: data = fetch_notion_data(page_url, self.headers, "GET") return NotionPage(**data) except Exception as e: - logging.warning(f"Failed to fetch page, trying database for ID '{page_id}': {e}") + logging.warning(f"[Notion]: Failed to fetch page, trying database for ID {page_id}: {e}") return self._fetch_database_as_page(page_id) @retry(tries=3, delay=1, backoff=2) def _fetch_database_as_page(self, database_id: str) -> NotionPage: """Attempt to fetch a database as a page.""" - logging.debug(f"Fetching database for ID '{database_id}' as a page") + logging.debug(f"[Notion]: Fetching database for ID {database_id} as a page") database_url = f"https://api.notion.com/v1/databases/{database_id}" data = fetch_notion_data(database_url, self.headers, "GET") database_name = data.get("title") - database_name = ( - database_name[0].get("text", {}).get("content") if database_name else None - ) + database_name = database_name[0].get("text", {}).get("content") if database_name else None return NotionPage(**data, database_name=database_name) @retry(tries=3, delay=1, backoff=2) - def _fetch_database( - self, database_id: str, cursor: Optional[str] = None - ) -> dict[str, Any]: + def _fetch_database(self, database_id: str, cursor: Optional[str] = None) -> dict[str, Any]: """Fetch a database from its ID via the Notion API.""" - logging.debug(f"Fetching database for ID '{database_id}'") + logging.debug(f"[Notion]: Fetching database for ID {database_id}") block_url = f"https://api.notion.com/v1/databases/{database_id}/query" body = {"start_cursor": cursor} if cursor else None @@ -129,17 +127,12 @@ class NotionConnector(LoadConnector, PollConnector): data = fetch_notion_data(block_url, self.headers, "POST", body) return data except Exception as e: - if hasattr(e, 'response') and e.response.status_code in [404, 400]: - logging.error( - f"Unable to access database with ID '{database_id}'. " - f"This is likely due to the database not being shared with the integration." - ) + if hasattr(e, "response") and e.response.status_code in [404, 400]: + logging.error(f"[Notion]: Unable to access database with ID {database_id}. This is likely due to the database not being shared with the integration.") return {"results": [], "next_cursor": None} raise - def _read_pages_from_database( - self, database_id: str - ) -> tuple[list[NotionBlock], list[str]]: + def _read_pages_from_database(self, database_id: str) -> tuple[list[NotionBlock], list[str]]: """Returns a list of top level blocks and all page IDs in the database.""" result_blocks: list[NotionBlock] = [] result_pages: list[str] = [] @@ -158,10 +151,10 @@ class NotionConnector(LoadConnector, PollConnector): if self.recursive_index_enabled: if obj_type == "page": - logging.debug(f"Found page with ID '{obj_id}' in database '{database_id}'") + logging.debug(f"[Notion]: Found page with ID {obj_id} in database {database_id}") result_pages.append(result["id"]) elif obj_type == "database": - logging.debug(f"Found database with ID '{obj_id}' in database '{database_id}'") + logging.debug(f"[Notion]: Found database with ID {obj_id} in database {database_id}") _, child_pages = self._read_pages_from_database(obj_id) result_pages.extend(child_pages) @@ -172,44 +165,229 @@ class NotionConnector(LoadConnector, PollConnector): return result_blocks, result_pages - def _read_blocks(self, base_block_id: str) -> tuple[list[NotionBlock], list[str]]: - """Reads all child blocks for the specified block, returns blocks and child page ids.""" + def _extract_rich_text(self, rich_text_array: list[dict[str, Any]]) -> str: + collected_text: list[str] = [] + for rich_text in rich_text_array: + content = "" + r_type = rich_text.get("type") + + if r_type == "equation": + expr = rich_text.get("equation", {}).get("expression") + if expr: + content = expr + elif r_type == "mention": + mention = rich_text.get("mention", {}) or {} + mention_type = mention.get("type") + mention_value = mention.get(mention_type, {}) if mention_type else {} + if mention_type == "date": + start = mention_value.get("start") + end = mention_value.get("end") + if start and end: + content = f"{start} - {end}" + elif start: + content = start + elif mention_type in {"page", "database"}: + content = mention_value.get("id", rich_text.get("plain_text", "")) + elif mention_type == "link_preview": + content = mention_value.get("url", rich_text.get("plain_text", "")) + else: + content = rich_text.get("plain_text", "") or str(mention_value) + else: + if rich_text.get("plain_text"): + content = rich_text["plain_text"] + elif "text" in rich_text and rich_text["text"].get("content"): + content = rich_text["text"]["content"] + + href = rich_text.get("href") + if content and href: + content = f"{content} ({href})" + + if content: + collected_text.append(content) + + return "".join(collected_text).strip() + + def _build_table_html(self, table_block_id: str) -> str | None: + rows: list[str] = [] + cursor = None + while True: + data = self._fetch_child_blocks(table_block_id, cursor) + if data is None: + break + + for result in data["results"]: + if result.get("type") != "table_row": + continue + cells_html: list[str] = [] + for cell in result["table_row"].get("cells", []): + cell_text = self._extract_rich_text(cell) + cell_html = html.escape(cell_text) if cell_text else "" + cells_html.append(f"{cell_html}") + rows.append(f"{''.join(cells_html)}") + + if data.get("next_cursor") is None: + break + cursor = data["next_cursor"] + + if not rows: + return None + return "\n" + "\n".join(rows) + "\n
" + + def _download_file(self, url: str) -> bytes | None: + try: + response = rl_requests.get(url, timeout=60) + response.raise_for_status() + return response.content + except Exception as exc: + logging.warning(f"[Notion]: Failed to download Notion file from {url}: {exc}") + return None + + def _extract_file_metadata(self, result_obj: dict[str, Any], block_id: str) -> tuple[str | None, str, str | None]: + file_source_type = result_obj.get("type") + file_source = result_obj.get(file_source_type, {}) if file_source_type else {} + url = file_source.get("url") + + name = result_obj.get("name") or file_source.get("name") + if url and not name: + parsed_name = Path(urlparse(url).path).name + name = parsed_name or f"notion_file_{block_id}" + elif not name: + name = f"notion_file_{block_id}" + + caption = self._extract_rich_text(result_obj.get("caption", [])) if "caption" in result_obj else None + + return url, name, caption + + def _build_attachment_document( + self, + block_id: str, + url: str, + name: str, + caption: Optional[str], + page_last_edited_time: Optional[str], + ) -> Document | None: + file_bytes = self._download_file(url) + if file_bytes is None: + return None + + extension = Path(name).suffix or Path(urlparse(url).path).suffix or ".bin" + if extension and not extension.startswith("."): + extension = f".{extension}" + if not extension: + extension = ".bin" + + updated_at = datetime_from_string(page_last_edited_time) if page_last_edited_time else datetime.now(timezone.utc) + semantic_identifier = caption or name or f"Notion file {block_id}" + + return Document( + id=block_id, + blob=file_bytes, + source=DocumentSource.NOTION, + semantic_identifier=semantic_identifier, + extension=extension, + size_bytes=len(file_bytes), + doc_updated_at=updated_at, + ) + + def _read_blocks(self, base_block_id: str, page_last_edited_time: Optional[str] = None) -> tuple[list[NotionBlock], list[str], list[Document]]: result_blocks: list[NotionBlock] = [] child_pages: list[str] = [] + attachments: list[Document] = [] cursor = None while True: data = self._fetch_child_blocks(base_block_id, cursor) if data is None: - return result_blocks, child_pages + return result_blocks, child_pages, attachments for result in data["results"]: - logging.debug(f"Found child block for block with ID '{base_block_id}': {result}") + logging.debug(f"[Notion]: Found child block for block with ID {base_block_id}: {result}") result_block_id = result["id"] result_type = result["type"] result_obj = result[result_type] if result_type in ["ai_block", "unsupported", "external_object_instance_page"]: - logging.warning(f"Skipping unsupported block type '{result_type}'") + logging.warning(f"[Notion]: Skipping unsupported block type {result_type}") + continue + + if result_type == "table": + table_html = self._build_table_html(result_block_id) + if table_html: + result_blocks.append( + NotionBlock( + id=result_block_id, + text=table_html, + prefix="\n\n", + ) + ) + continue + + if result_type == "equation": + expr = result_obj.get("expression") + if expr: + result_blocks.append( + NotionBlock( + id=result_block_id, + text=expr, + prefix="\n", + ) + ) continue cur_result_text_arr = [] if "rich_text" in result_obj: - for rich_text in result_obj["rich_text"]: - if "text" in rich_text: - text = rich_text["text"]["content"] - cur_result_text_arr.append(text) + text = self._extract_rich_text(result_obj["rich_text"]) + if text: + cur_result_text_arr.append(text) + + if result_type == "bulleted_list_item": + if cur_result_text_arr: + cur_result_text_arr[0] = f"- {cur_result_text_arr[0]}" + else: + cur_result_text_arr = ["- "] + + if result_type == "numbered_list_item": + if cur_result_text_arr: + cur_result_text_arr[0] = f"1. {cur_result_text_arr[0]}" + else: + cur_result_text_arr = ["1. "] + + if result_type == "to_do": + checked = result_obj.get("checked") + checkbox_prefix = "[x]" if checked else "[ ]" + if cur_result_text_arr: + cur_result_text_arr = [f"{checkbox_prefix} {cur_result_text_arr[0]}"] + cur_result_text_arr[1:] + else: + cur_result_text_arr = [checkbox_prefix] + + if result_type in {"file", "image", "pdf", "video", "audio"}: + file_url, file_name, caption = self._extract_file_metadata(result_obj, result_block_id) + if file_url: + attachment_doc = self._build_attachment_document( + block_id=result_block_id, + url=file_url, + name=file_name, + caption=caption, + page_last_edited_time=page_last_edited_time, + ) + if attachment_doc: + attachments.append(attachment_doc) + + attachment_label = caption or file_name + if attachment_label: + cur_result_text_arr.append(f"{result_type.capitalize()}: {attachment_label}") if result["has_children"]: if result_type == "child_page": child_pages.append(result_block_id) else: - logging.debug(f"Entering sub-block: {result_block_id}") - subblocks, subblock_child_pages = self._read_blocks(result_block_id) - logging.debug(f"Finished sub-block: {result_block_id}") + logging.debug(f"[Notion]: Entering sub-block: {result_block_id}") + subblocks, subblock_child_pages, subblock_attachments = self._read_blocks(result_block_id, page_last_edited_time) + logging.debug(f"[Notion]: Finished sub-block: {result_block_id}") result_blocks.extend(subblocks) child_pages.extend(subblock_child_pages) + attachments.extend(subblock_attachments) if result_type == "child_database": inner_blocks, inner_child_pages = self._read_pages_from_database(result_block_id) @@ -231,7 +409,7 @@ class NotionConnector(LoadConnector, PollConnector): cursor = data["next_cursor"] - return result_blocks, child_pages + return result_blocks, child_pages, attachments def _read_page_title(self, page: NotionPage) -> Optional[str]: """Extracts the title from a Notion page.""" @@ -245,9 +423,7 @@ class NotionConnector(LoadConnector, PollConnector): return None - def _read_pages( - self, pages: list[NotionPage] - ) -> Generator[Document, None, None]: + def _read_pages(self, pages: list[NotionPage], start: SecondsSinceUnixEpoch | None = None, end: SecondsSinceUnixEpoch | None = None) -> Generator[Document, None, None]: """Reads pages for rich text content and generates Documents.""" all_child_page_ids: list[str] = [] @@ -255,11 +431,17 @@ class NotionConnector(LoadConnector, PollConnector): if isinstance(page, dict): page = NotionPage(**page) if page.id in self.indexed_pages: - logging.debug(f"Already indexed page with ID '{page.id}'. Skipping.") + logging.debug(f"[Notion]: Already indexed page with ID {page.id}. Skipping.") continue - logging.info(f"Reading page with ID '{page.id}', with url {page.url}") - page_blocks, child_page_ids = self._read_blocks(page.id) + if start is not None and end is not None: + page_ts = datetime_from_string(page.last_edited_time).timestamp() + if not (page_ts > start and page_ts <= end): + logging.debug(f"[Notion]: Skipping page {page.id} outside polling window.") + continue + + logging.info(f"[Notion]: Reading page with ID {page.id}, with url {page.url}") + page_blocks, child_page_ids, attachment_docs = self._read_blocks(page.id, page.last_edited_time) all_child_page_ids.extend(child_page_ids) self.indexed_pages.add(page.id) @@ -268,14 +450,12 @@ class NotionConnector(LoadConnector, PollConnector): if not page_blocks: if not raw_page_title: - logging.warning(f"No blocks OR title found for page with ID '{page.id}'. Skipping.") + logging.warning(f"[Notion]: No blocks OR title found for page with ID {page.id}. Skipping.") continue text = page_title if page.properties: - text += "\n\n" + "\n".join( - [f"{key}: {value}" for key, value in page.properties.items()] - ) + text += "\n\n" + "\n".join([f"{key}: {value}" for key, value in page.properties.items()]) sections = [TextSection(link=page.url, text=text)] else: sections = [ @@ -286,45 +466,39 @@ class NotionConnector(LoadConnector, PollConnector): for block in page_blocks ] - blob = ("\n".join([sec.text for sec in sections])).encode("utf-8") + joined_text = "\n".join(sec.text for sec in sections) + blob = joined_text.encode("utf-8") yield Document( - id=page.id, - blob=blob, - source=DocumentSource.NOTION, - semantic_identifier=page_title, - extension=".txt", - size_bytes=len(blob), - doc_updated_at=datetime_from_string(page.last_edited_time) + id=page.id, blob=blob, source=DocumentSource.NOTION, semantic_identifier=page_title, extension=".txt", size_bytes=len(blob), doc_updated_at=datetime_from_string(page.last_edited_time) ) + for attachment_doc in attachment_docs: + yield attachment_doc + if self.recursive_index_enabled and all_child_page_ids: for child_page_batch_ids in batch_generator(all_child_page_ids, INDEX_BATCH_SIZE): - child_page_batch = [ - self._fetch_page(page_id) - for page_id in child_page_batch_ids - if page_id not in self.indexed_pages - ] - yield from self._read_pages(child_page_batch) + child_page_batch = [self._fetch_page(page_id) for page_id in child_page_batch_ids if page_id not in self.indexed_pages] + yield from self._read_pages(child_page_batch, start, end) @retry(tries=3, delay=1, backoff=2) def _search_notion(self, query_dict: dict[str, Any]) -> NotionSearchResponse: """Search for pages from a Notion database.""" - logging.debug(f"Searching for pages in Notion with query_dict: {query_dict}") + logging.debug(f"[Notion]: Searching for pages in Notion with query_dict: {query_dict}") data = fetch_notion_data("https://api.notion.com/v1/search", self.headers, "POST", query_dict) return NotionSearchResponse(**data) - def _recursive_load(self) -> Generator[list[Document], None, None]: + def _recursive_load(self, start: SecondsSinceUnixEpoch | None = None, end: SecondsSinceUnixEpoch | None = None) -> Generator[list[Document], None, None]: """Recursively load pages starting from root page ID.""" if self.root_page_id is None or not self.recursive_index_enabled: raise RuntimeError("Recursive page lookup is not enabled") - logging.info(f"Recursively loading pages from Notion based on root page with ID: {self.root_page_id}") + logging.info(f"[Notion]: Recursively loading pages from Notion based on root page with ID: {self.root_page_id}") pages = [self._fetch_page(page_id=self.root_page_id)] - yield from batch_generator(self._read_pages(pages), self.batch_size) + yield from batch_generator(self._read_pages(pages, start, end), self.batch_size) def load_credentials(self, credentials: dict[str, Any]) -> dict[str, Any] | None: """Applies integration token to headers.""" - self.headers["Authorization"] = f'Bearer {credentials["notion_integration_token"]}' + self.headers["Authorization"] = f"Bearer {credentials['notion_integration_token']}" return None def load_from_state(self) -> GenerateDocumentsOutput: @@ -348,12 +522,10 @@ class NotionConnector(LoadConnector, PollConnector): else: break - def poll_source( - self, start: SecondsSinceUnixEpoch, end: SecondsSinceUnixEpoch - ) -> GenerateDocumentsOutput: + def poll_source(self, start: SecondsSinceUnixEpoch, end: SecondsSinceUnixEpoch) -> GenerateDocumentsOutput: """Poll Notion for updated pages within a time period.""" if self.recursive_index_enabled and self.root_page_id: - yield from self._recursive_load() + yield from self._recursive_load(start, end) return query_dict = { @@ -367,7 +539,7 @@ class NotionConnector(LoadConnector, PollConnector): pages = filter_pages_by_time(db_res.results, start, end, "last_edited_time") if pages: - yield from batch_generator(self._read_pages(pages), self.batch_size) + yield from batch_generator(self._read_pages(pages, start, end), self.batch_size) if db_res.has_more: query_dict["start_cursor"] = db_res.next_cursor else: From 971c1bcba707013143e788c514261629a4c6a221 Mon Sep 17 00:00:00 2001 From: coding Date: Fri, 21 Nov 2025 09:33:36 +0800 Subject: [PATCH 5/6] Fix: missing parameters in by_plaintext method for PDF naive mode (#11408) ### What problem does this PR solve? FIx: missing parameters in by_plaintext method for PDF naive mode ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue) --------- Co-authored-by: lih --- rag/app/book.py | 1 + rag/app/laws.py | 1 + rag/app/manual.py | 1 + rag/app/one.py | 1 + rag/app/presentation.py | 1 + 5 files changed, 5 insertions(+) diff --git a/rag/app/book.py b/rag/app/book.py index 5ea28d40d..5bdaec72d 100644 --- a/rag/app/book.py +++ b/rag/app/book.py @@ -113,6 +113,7 @@ def chunk(filename, binary=None, from_page=0, to_page=100000, lang = lang, callback = callback, pdf_cls = Pdf, + layout_recognizer = layout_recognizer, **kwargs ) diff --git a/rag/app/laws.py b/rag/app/laws.py index dd97e4e3a..ba2592833 100644 --- a/rag/app/laws.py +++ b/rag/app/laws.py @@ -172,6 +172,7 @@ def chunk(filename, binary=None, from_page=0, to_page=100000, lang = lang, callback = callback, pdf_cls = Pdf, + layout_recognizer = layout_recognizer, **kwargs ) diff --git a/rag/app/manual.py b/rag/app/manual.py index 124864041..b3a4ae38d 100644 --- a/rag/app/manual.py +++ b/rag/app/manual.py @@ -213,6 +213,7 @@ def chunk(filename, binary=None, from_page=0, to_page=100000, lang = lang, callback = callback, pdf_cls = Pdf, + layout_recognizer = layout_recognizer, parse_method = "manual", **kwargs ) diff --git a/rag/app/one.py b/rag/app/one.py index 5574aaa51..7cd1bb785 100644 --- a/rag/app/one.py +++ b/rag/app/one.py @@ -99,6 +99,7 @@ def chunk(filename, binary=None, from_page=0, to_page=100000, lang = lang, callback = callback, pdf_cls = Pdf, + layout_recognizer = layout_recognizer, **kwargs ) diff --git a/rag/app/presentation.py b/rag/app/presentation.py index cd1d308ec..6a872528f 100644 --- a/rag/app/presentation.py +++ b/rag/app/presentation.py @@ -142,6 +142,7 @@ def chunk(filename, binary=None, from_page=0, to_page=100000, lang = lang, callback = callback, pdf_cls = Pdf, + layout_recognizer = layout_recognizer, **kwargs ) From 653b78595834762b019bc8b29ad845bb5e7cdaa0 Mon Sep 17 00:00:00 2001 From: chanx <1243304602@qq.com> Date: Fri, 21 Nov 2025 09:33:50 +0800 Subject: [PATCH 6/6] Fix: Modify the style of the user center #10703 (#11419) ### What problem does this PR solve? Fix: Modify the style of the user center ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue) --- web/src/components/back-button/index.tsx | 5 +- web/src/components/confirm-delete-dialog.tsx | 3 +- web/src/components/dynamic-form.tsx | 20 +- web/src/components/edit-tag/index.tsx | 4 +- web/src/components/file-uploader.tsx | 12 +- .../components/originui/password-input.tsx | 14 - .../originui/select-with-search.tsx | 20 +- web/src/components/ui/command.tsx | 12 +- web/src/components/ui/input.tsx | 9 +- web/src/components/ui/modal/modal.tsx | 23 +- web/src/components/ui/multi-select.tsx | 8 +- web/src/components/ui/select.tsx | 4 +- web/src/components/ui/textarea.tsx | 2 +- web/src/components/ui/tooltip.tsx | 4 +- web/src/hooks/common-hooks.tsx | 30 +- web/src/locales/en.ts | 2 + web/src/locales/zh.ts | 2 + web/src/pages/dataset/sidebar/index.tsx | 2 +- .../data-source/add-datasource-modal.tsx | 1 + .../data-source/component/dynamic-form.tsx | 725 ------------------ .../user-setting/data-source/contant.tsx | 95 --- .../data-source-detail-page/index.tsx | 6 +- .../user-setting/mcp/edit-mcp-dialog.tsx | 2 +- web/src/pages/user-setting/mcp/index.tsx | 2 +- .../setting-model/components/un-add-model.tsx | 4 +- .../user-setting/setting-team/user-table.tsx | 4 +- web/src/pages/user-setting/sidebar/index.tsx | 4 +- 27 files changed, 122 insertions(+), 897 deletions(-) delete mode 100644 web/src/pages/user-setting/data-source/component/dynamic-form.tsx diff --git a/web/src/components/back-button/index.tsx b/web/src/components/back-button/index.tsx index c790d6882..118042128 100644 --- a/web/src/components/back-button/index.tsx +++ b/web/src/components/back-button/index.tsx @@ -29,7 +29,10 @@ const BackButton: React.FC = ({ return ( diff --git a/web/src/components/edit-tag/index.tsx b/web/src/components/edit-tag/index.tsx index 1921853d0..a05fadad6 100644 --- a/web/src/components/edit-tag/index.tsx +++ b/web/src/components/edit-tag/index.tsx @@ -102,8 +102,8 @@ const EditTag = React.forwardRef( {Array.isArray(tagChild) && tagChild.length > 0 && <>{tagChild}} {!inputVisible && ( ); diff --git a/web/src/components/originui/select-with-search.tsx b/web/src/components/originui/select-with-search.tsx index 2d095f157..e3cec6240 100644 --- a/web/src/components/originui/select-with-search.tsx +++ b/web/src/components/originui/select-with-search.tsx @@ -140,7 +140,7 @@ export const SelectWithSearch = forwardRef< ref={ref} disabled={disabled} className={cn( - '!bg-bg-input hover:bg-background border-border-button w-full justify-between px-3 font-normal outline-offset-0 outline-none focus-visible:outline-[3px] [&_svg]:pointer-events-auto', + '!bg-bg-input hover:bg-background border-border-button w-full justify-between px-3 font-normal outline-offset-0 outline-none focus-visible:outline-[3px] [&_svg]:pointer-events-auto group', triggerClassName, )} > @@ -155,12 +155,12 @@ export const SelectWithSearch = forwardRef< {value && allowClear && ( <> )} @@ -173,12 +173,17 @@ export const SelectWithSearch = forwardRef< - - - + + {options && options.length > 0 && ( + + )} + {t('common.noDataFound')} {options.map((group, idx) => { if (group.options) { @@ -209,6 +214,7 @@ export const SelectWithSearch = forwardRef< value={group.value} disabled={group.disabled} onSelect={handleSelect} + className="min-h-10" > {group.label} diff --git a/web/src/components/ui/command.tsx b/web/src/components/ui/command.tsx index 10caef77d..b96c82ba2 100644 --- a/web/src/components/ui/command.tsx +++ b/web/src/components/ui/command.tsx @@ -39,7 +39,10 @@ const CommandInput = React.forwardRef< React.ElementRef, React.ComponentPropsWithoutRef >(({ className, ...props }, ref) => ( -
+
e.stopPropagation()} onMouseEnter={(e) => e.currentTarget.focus()} tabIndex={-1} @@ -96,7 +102,7 @@ const CommandGroup = React.forwardRef< , 'prefix'> { @@ -50,6 +49,8 @@ const Input = React.forwardRef( 'pr-12': !!suffix || isPasswordInput, 'pr-24': !!suffix && isPasswordInput, }, + type === 'number' && + '[appearance:textfield] [&::-webkit-outer-spin-button]:appearance-none [&::-webkit-inner-spin-button]:appearance-none', className, )} value={inputValue ?? ''} @@ -77,10 +78,10 @@ const Input = React.forwardRef( )} {isPasswordInput && ( - + )}
); diff --git a/web/src/components/ui/modal/modal.tsx b/web/src/components/ui/modal/modal.tsx index af516b1e6..c40e5f90b 100644 --- a/web/src/components/ui/modal/modal.tsx +++ b/web/src/components/ui/modal/modal.tsx @@ -27,7 +27,10 @@ export interface ModalProps { okText?: ReactNode | string; onOk?: () => void; onCancel?: () => void; + okButtonClassName?: string; + cancelButtonClassName?: string; disabled?: boolean; + style?: React.CSSProperties; } export interface ModalType extends FC { show: typeof modalIns.show; @@ -56,7 +59,10 @@ const Modal: ModalType = ({ confirmLoading, cancelText, okText, + okButtonClassName, + cancelButtonClassName, disabled = false, + style, }) => { const sizeClasses = { small: 'max-w-md', @@ -111,7 +117,10 @@ const Modal: ModalType = ({ @@ -122,6 +131,7 @@ const Modal: ModalType = ({ className={cn( 'px-2 py-1 bg-primary text-primary-foreground rounded-md hover:bg-primary/90', { 'cursor-not-allowed': disabled }, + okButtonClassName, )} > {confirmLoading && ( @@ -153,23 +163,26 @@ const Modal: ModalType = ({ handleOk, showfooter, footerClassName, + okButtonClassName, + cancelButtonClassName, ]); return ( maskClosable && onOpenChange?.(false)} > e.stopPropagation()} > {/* title */} {(title || closable) && (