From 820934fc779609247a03a6eaa6886149b4fcd691 Mon Sep 17 00:00:00 2001 From: Kevin Hu Date: Thu, 20 Nov 2025 19:51:25 +0800 Subject: [PATCH 01/12] Fix: no result if metadata returns none. (#11412) ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue) --- agent/tools/retrieval.py | 6 +++--- api/apps/chunk_app.py | 4 ++-- api/apps/sdk/doc.py | 2 ++ api/apps/sdk/session.py | 12 ++++++------ api/db/services/dialog_service.py | 15 ++++++++------- 5 files changed, 21 insertions(+), 18 deletions(-) diff --git a/agent/tools/retrieval.py b/agent/tools/retrieval.py index c3a01e517..fd9096cb6 100644 --- a/agent/tools/retrieval.py +++ b/agent/tools/retrieval.py @@ -137,7 +137,7 @@ class Retrieval(ToolBase, ABC): if not doc_ids: doc_ids = None elif self._param.meta_data_filter.get("method") == "manual": - filters=self._param.meta_data_filter["manual"] + filters = self._param.meta_data_filter["manual"] for flt in filters: pat = re.compile(self.variable_ref_patt) s = flt["value"] @@ -166,8 +166,8 @@ class Retrieval(ToolBase, ABC): out_parts.append(s[last:]) flt["value"] = "".join(out_parts) doc_ids.extend(meta_filter(metas, filters, self._param.meta_data_filter.get("logic", "and"))) - if not doc_ids: - doc_ids = None + if filters and not doc_ids: + doc_ids = ["-999"] if self._param.cross_languages: query = cross_languages(kbs[0].tenant_id, None, query, self._param.cross_languages) diff --git a/api/apps/chunk_app.py b/api/apps/chunk_app.py index e121bcba7..b43fb9af1 100644 --- a/api/apps/chunk_app.py +++ b/api/apps/chunk_app.py @@ -311,8 +311,8 @@ async def retrieval_test(): doc_ids = None elif meta_data_filter.get("method") == "manual": doc_ids.extend(meta_filter(metas, meta_data_filter["manual"], meta_data_filter.get("logic", "and"))) - if not doc_ids: - doc_ids = None + if meta_data_filter["manual"] and not doc_ids: + doc_ids = ["-999"] try: tenants = UserTenantService.query(user_id=current_user.id) diff --git a/api/apps/sdk/doc.py b/api/apps/sdk/doc.py index 84300ac3c..30fbd835e 100644 --- a/api/apps/sdk/doc.py +++ b/api/apps/sdk/doc.py @@ -1445,6 +1445,8 @@ async def retrieval_test(tenant_id): metadata_condition = req.get("metadata_condition", {}) or {} metas = DocumentService.get_meta_by_kbs(kb_ids) doc_ids = meta_filter(metas, convert_conditions(metadata_condition), metadata_condition.get("logic", "and")) + if metadata_condition and not doc_ids: + doc_ids = ["-999"] similarity_threshold = float(req.get("similarity_threshold", 0.2)) vector_similarity_weight = float(req.get("vector_similarity_weight", 0.3)) top = int(req.get("top_k", 1024)) diff --git a/api/apps/sdk/session.py b/api/apps/sdk/session.py index 533375622..074401ede 100644 --- a/api/apps/sdk/session.py +++ b/api/apps/sdk/session.py @@ -446,8 +446,8 @@ async def agent_completions(tenant_id, agent_id): if req.get("stream", True): - def generate(): - for answer in agent_completion(tenant_id=tenant_id, agent_id=agent_id, **req): + async def generate(): + async for answer in agent_completion(tenant_id=tenant_id, agent_id=agent_id, **req): if isinstance(answer, str): try: ans = json.loads(answer[5:]) # remove "data:" @@ -471,7 +471,7 @@ async def agent_completions(tenant_id, agent_id): full_content = "" reference = {} final_ans = "" - for answer in agent_completion(tenant_id=tenant_id, agent_id=agent_id, **req): + async for answer in agent_completion(tenant_id=tenant_id, agent_id=agent_id, **req): try: ans = json.loads(answer[5:]) @@ -873,7 +873,7 @@ async def agent_bot_completions(agent_id): resp.headers.add_header("Content-Type", "text/event-stream; charset=utf-8") return resp - for answer in agent_completion(objs[0].tenant_id, agent_id, **req): + async for answer in agent_completion(objs[0].tenant_id, agent_id, **req): return get_result(data=answer) @@ -981,8 +981,8 @@ async def retrieval_test_embedded(): doc_ids = None elif meta_data_filter.get("method") == "manual": doc_ids.extend(meta_filter(metas, meta_data_filter["manual"], meta_data_filter.get("logic", "and"))) - if not doc_ids: - doc_ids = None + if meta_data_filter["manual"] and not doc_ids: + doc_ids = ["-999"] try: tenants = UserTenantService.query(user_id=tenant_id) diff --git a/api/db/services/dialog_service.py b/api/db/services/dialog_service.py index db878574d..0a09ea532 100644 --- a/api/db/services/dialog_service.py +++ b/api/db/services/dialog_service.py @@ -415,9 +415,10 @@ def chat(dialog, messages, stream=True, **kwargs): if not attachments: attachments = None elif dialog.meta_data_filter.get("method") == "manual": - attachments.extend(meta_filter(metas, dialog.meta_data_filter["manual"], dialog.meta_data_filter.get("logic", "and"))) - if not attachments: - attachments = None + conds = dialog.meta_data_filter["manual"] + attachments.extend(meta_filter(metas, conds, dialog.meta_data_filter.get("logic", "and"))) + if conds and not attachments: + attachments = ["-999"] if prompt_config.get("keyword", False): questions[-1] += keyword_extraction(chat_mdl, questions[-1]) @@ -787,8 +788,8 @@ def ask(question, kb_ids, tenant_id, chat_llm_name=None, search_config={}): doc_ids = None elif meta_data_filter.get("method") == "manual": doc_ids.extend(meta_filter(metas, meta_data_filter["manual"], meta_data_filter.get("logic", "and"))) - if not doc_ids: - doc_ids = None + if meta_data_filter["manual"] and not doc_ids: + doc_ids = ["-999"] kbinfos = retriever.retrieval( question=question, @@ -862,8 +863,8 @@ def gen_mindmap(question, kb_ids, tenant_id, search_config={}): doc_ids = None elif meta_data_filter.get("method") == "manual": doc_ids.extend(meta_filter(metas, meta_data_filter["manual"], meta_data_filter.get("logic", "and"))) - if not doc_ids: - doc_ids = None + if meta_data_filter["manual"] and not doc_ids: + doc_ids = ["-999"] ranks = settings.retriever.retrieval( question=question, From 065917bf1c22300a0ddaffc04a4bdaf608e10a32 Mon Sep 17 00:00:00 2001 From: Yongteng Lei Date: Thu, 20 Nov 2025 19:51:37 +0800 Subject: [PATCH 02/12] Feat: enriches Notion connector (#11414) ### What problem does this PR solve? Enriches rich text (links, mentions, equations), flags to-do blocks with [x]/[ ], captures block-level equations, builds table HTML, downloads attachments. ### Type of change - [x] New Feature (non-breaking change which adds functionality) --- common/data_source/notion_connector.py | 356 ++++++++++++++++++------- 1 file changed, 264 insertions(+), 92 deletions(-) diff --git a/common/data_source/notion_connector.py b/common/data_source/notion_connector.py index 8c6a522ad..e29bbbe76 100644 --- a/common/data_source/notion_connector.py +++ b/common/data_source/notion_connector.py @@ -1,38 +1,45 @@ +import html import logging from collections.abc import Generator +from datetime import datetime, timezone +from pathlib import Path from typing import Any, Optional +from urllib.parse import urlparse + from retry import retry from common.data_source.config import ( INDEX_BATCH_SIZE, - DocumentSource, NOTION_CONNECTOR_DISABLE_RECURSIVE_PAGE_LOOKUP + NOTION_CONNECTOR_DISABLE_RECURSIVE_PAGE_LOOKUP, + DocumentSource, +) +from common.data_source.exceptions import ( + ConnectorMissingCredentialError, + ConnectorValidationError, + CredentialExpiredError, + InsufficientPermissionsError, + UnexpectedValidationError, ) from common.data_source.interfaces import ( LoadConnector, PollConnector, - SecondsSinceUnixEpoch + SecondsSinceUnixEpoch, ) from common.data_source.models import ( Document, - TextSection, GenerateDocumentsOutput -) -from common.data_source.exceptions import ( - ConnectorValidationError, - CredentialExpiredError, - InsufficientPermissionsError, - UnexpectedValidationError, ConnectorMissingCredentialError -) -from common.data_source.models import ( - NotionPage, + GenerateDocumentsOutput, NotionBlock, - NotionSearchResponse + NotionPage, + NotionSearchResponse, + TextSection, ) from common.data_source.utils import ( - rl_requests, batch_generator, + datetime_from_string, fetch_notion_data, + filter_pages_by_time, properties_to_str, - filter_pages_by_time, datetime_from_string + rl_requests, ) @@ -61,11 +68,9 @@ class NotionConnector(LoadConnector, PollConnector): self.recursive_index_enabled = recursive_index_enabled or bool(root_page_id) @retry(tries=3, delay=1, backoff=2) - def _fetch_child_blocks( - self, block_id: str, cursor: Optional[str] = None - ) -> dict[str, Any] | None: + def _fetch_child_blocks(self, block_id: str, cursor: Optional[str] = None) -> dict[str, Any] | None: """Fetch all child blocks via the Notion API.""" - logging.debug(f"Fetching children of block with ID '{block_id}'") + logging.debug(f"[Notion]: Fetching children of block with ID {block_id}") block_url = f"https://api.notion.com/v1/blocks/{block_id}/children" query_params = {"start_cursor": cursor} if cursor else None @@ -79,49 +84,42 @@ class NotionConnector(LoadConnector, PollConnector): response.raise_for_status() return response.json() except Exception as e: - if hasattr(e, 'response') and e.response.status_code == 404: - logging.error( - f"Unable to access block with ID '{block_id}'. " - f"This is likely due to the block not being shared with the integration." - ) + if hasattr(e, "response") and e.response.status_code == 404: + logging.error(f"[Notion]: Unable to access block with ID {block_id}. This is likely due to the block not being shared with the integration.") return None else: - logging.exception(f"Error fetching blocks: {e}") + logging.exception(f"[Notion]: Error fetching blocks: {e}") raise @retry(tries=3, delay=1, backoff=2) def _fetch_page(self, page_id: str) -> NotionPage: """Fetch a page from its ID via the Notion API.""" - logging.debug(f"Fetching page for ID '{page_id}'") + logging.debug(f"[Notion]: Fetching page for ID {page_id}") page_url = f"https://api.notion.com/v1/pages/{page_id}" try: data = fetch_notion_data(page_url, self.headers, "GET") return NotionPage(**data) except Exception as e: - logging.warning(f"Failed to fetch page, trying database for ID '{page_id}': {e}") + logging.warning(f"[Notion]: Failed to fetch page, trying database for ID {page_id}: {e}") return self._fetch_database_as_page(page_id) @retry(tries=3, delay=1, backoff=2) def _fetch_database_as_page(self, database_id: str) -> NotionPage: """Attempt to fetch a database as a page.""" - logging.debug(f"Fetching database for ID '{database_id}' as a page") + logging.debug(f"[Notion]: Fetching database for ID {database_id} as a page") database_url = f"https://api.notion.com/v1/databases/{database_id}" data = fetch_notion_data(database_url, self.headers, "GET") database_name = data.get("title") - database_name = ( - database_name[0].get("text", {}).get("content") if database_name else None - ) + database_name = database_name[0].get("text", {}).get("content") if database_name else None return NotionPage(**data, database_name=database_name) @retry(tries=3, delay=1, backoff=2) - def _fetch_database( - self, database_id: str, cursor: Optional[str] = None - ) -> dict[str, Any]: + def _fetch_database(self, database_id: str, cursor: Optional[str] = None) -> dict[str, Any]: """Fetch a database from its ID via the Notion API.""" - logging.debug(f"Fetching database for ID '{database_id}'") + logging.debug(f"[Notion]: Fetching database for ID {database_id}") block_url = f"https://api.notion.com/v1/databases/{database_id}/query" body = {"start_cursor": cursor} if cursor else None @@ -129,17 +127,12 @@ class NotionConnector(LoadConnector, PollConnector): data = fetch_notion_data(block_url, self.headers, "POST", body) return data except Exception as e: - if hasattr(e, 'response') and e.response.status_code in [404, 400]: - logging.error( - f"Unable to access database with ID '{database_id}'. " - f"This is likely due to the database not being shared with the integration." - ) + if hasattr(e, "response") and e.response.status_code in [404, 400]: + logging.error(f"[Notion]: Unable to access database with ID {database_id}. This is likely due to the database not being shared with the integration.") return {"results": [], "next_cursor": None} raise - def _read_pages_from_database( - self, database_id: str - ) -> tuple[list[NotionBlock], list[str]]: + def _read_pages_from_database(self, database_id: str) -> tuple[list[NotionBlock], list[str]]: """Returns a list of top level blocks and all page IDs in the database.""" result_blocks: list[NotionBlock] = [] result_pages: list[str] = [] @@ -158,10 +151,10 @@ class NotionConnector(LoadConnector, PollConnector): if self.recursive_index_enabled: if obj_type == "page": - logging.debug(f"Found page with ID '{obj_id}' in database '{database_id}'") + logging.debug(f"[Notion]: Found page with ID {obj_id} in database {database_id}") result_pages.append(result["id"]) elif obj_type == "database": - logging.debug(f"Found database with ID '{obj_id}' in database '{database_id}'") + logging.debug(f"[Notion]: Found database with ID {obj_id} in database {database_id}") _, child_pages = self._read_pages_from_database(obj_id) result_pages.extend(child_pages) @@ -172,44 +165,229 @@ class NotionConnector(LoadConnector, PollConnector): return result_blocks, result_pages - def _read_blocks(self, base_block_id: str) -> tuple[list[NotionBlock], list[str]]: - """Reads all child blocks for the specified block, returns blocks and child page ids.""" + def _extract_rich_text(self, rich_text_array: list[dict[str, Any]]) -> str: + collected_text: list[str] = [] + for rich_text in rich_text_array: + content = "" + r_type = rich_text.get("type") + + if r_type == "equation": + expr = rich_text.get("equation", {}).get("expression") + if expr: + content = expr + elif r_type == "mention": + mention = rich_text.get("mention", {}) or {} + mention_type = mention.get("type") + mention_value = mention.get(mention_type, {}) if mention_type else {} + if mention_type == "date": + start = mention_value.get("start") + end = mention_value.get("end") + if start and end: + content = f"{start} - {end}" + elif start: + content = start + elif mention_type in {"page", "database"}: + content = mention_value.get("id", rich_text.get("plain_text", "")) + elif mention_type == "link_preview": + content = mention_value.get("url", rich_text.get("plain_text", "")) + else: + content = rich_text.get("plain_text", "") or str(mention_value) + else: + if rich_text.get("plain_text"): + content = rich_text["plain_text"] + elif "text" in rich_text and rich_text["text"].get("content"): + content = rich_text["text"]["content"] + + href = rich_text.get("href") + if content and href: + content = f"{content} ({href})" + + if content: + collected_text.append(content) + + return "".join(collected_text).strip() + + def _build_table_html(self, table_block_id: str) -> str | None: + rows: list[str] = [] + cursor = None + while True: + data = self._fetch_child_blocks(table_block_id, cursor) + if data is None: + break + + for result in data["results"]: + if result.get("type") != "table_row": + continue + cells_html: list[str] = [] + for cell in result["table_row"].get("cells", []): + cell_text = self._extract_rich_text(cell) + cell_html = html.escape(cell_text) if cell_text else "" + cells_html.append(f"{cell_html}") + rows.append(f"{''.join(cells_html)}") + + if data.get("next_cursor") is None: + break + cursor = data["next_cursor"] + + if not rows: + return None + return "\n" + "\n".join(rows) + "\n
" + + def _download_file(self, url: str) -> bytes | None: + try: + response = rl_requests.get(url, timeout=60) + response.raise_for_status() + return response.content + except Exception as exc: + logging.warning(f"[Notion]: Failed to download Notion file from {url}: {exc}") + return None + + def _extract_file_metadata(self, result_obj: dict[str, Any], block_id: str) -> tuple[str | None, str, str | None]: + file_source_type = result_obj.get("type") + file_source = result_obj.get(file_source_type, {}) if file_source_type else {} + url = file_source.get("url") + + name = result_obj.get("name") or file_source.get("name") + if url and not name: + parsed_name = Path(urlparse(url).path).name + name = parsed_name or f"notion_file_{block_id}" + elif not name: + name = f"notion_file_{block_id}" + + caption = self._extract_rich_text(result_obj.get("caption", [])) if "caption" in result_obj else None + + return url, name, caption + + def _build_attachment_document( + self, + block_id: str, + url: str, + name: str, + caption: Optional[str], + page_last_edited_time: Optional[str], + ) -> Document | None: + file_bytes = self._download_file(url) + if file_bytes is None: + return None + + extension = Path(name).suffix or Path(urlparse(url).path).suffix or ".bin" + if extension and not extension.startswith("."): + extension = f".{extension}" + if not extension: + extension = ".bin" + + updated_at = datetime_from_string(page_last_edited_time) if page_last_edited_time else datetime.now(timezone.utc) + semantic_identifier = caption or name or f"Notion file {block_id}" + + return Document( + id=block_id, + blob=file_bytes, + source=DocumentSource.NOTION, + semantic_identifier=semantic_identifier, + extension=extension, + size_bytes=len(file_bytes), + doc_updated_at=updated_at, + ) + + def _read_blocks(self, base_block_id: str, page_last_edited_time: Optional[str] = None) -> tuple[list[NotionBlock], list[str], list[Document]]: result_blocks: list[NotionBlock] = [] child_pages: list[str] = [] + attachments: list[Document] = [] cursor = None while True: data = self._fetch_child_blocks(base_block_id, cursor) if data is None: - return result_blocks, child_pages + return result_blocks, child_pages, attachments for result in data["results"]: - logging.debug(f"Found child block for block with ID '{base_block_id}': {result}") + logging.debug(f"[Notion]: Found child block for block with ID {base_block_id}: {result}") result_block_id = result["id"] result_type = result["type"] result_obj = result[result_type] if result_type in ["ai_block", "unsupported", "external_object_instance_page"]: - logging.warning(f"Skipping unsupported block type '{result_type}'") + logging.warning(f"[Notion]: Skipping unsupported block type {result_type}") + continue + + if result_type == "table": + table_html = self._build_table_html(result_block_id) + if table_html: + result_blocks.append( + NotionBlock( + id=result_block_id, + text=table_html, + prefix="\n\n", + ) + ) + continue + + if result_type == "equation": + expr = result_obj.get("expression") + if expr: + result_blocks.append( + NotionBlock( + id=result_block_id, + text=expr, + prefix="\n", + ) + ) continue cur_result_text_arr = [] if "rich_text" in result_obj: - for rich_text in result_obj["rich_text"]: - if "text" in rich_text: - text = rich_text["text"]["content"] - cur_result_text_arr.append(text) + text = self._extract_rich_text(result_obj["rich_text"]) + if text: + cur_result_text_arr.append(text) + + if result_type == "bulleted_list_item": + if cur_result_text_arr: + cur_result_text_arr[0] = f"- {cur_result_text_arr[0]}" + else: + cur_result_text_arr = ["- "] + + if result_type == "numbered_list_item": + if cur_result_text_arr: + cur_result_text_arr[0] = f"1. {cur_result_text_arr[0]}" + else: + cur_result_text_arr = ["1. "] + + if result_type == "to_do": + checked = result_obj.get("checked") + checkbox_prefix = "[x]" if checked else "[ ]" + if cur_result_text_arr: + cur_result_text_arr = [f"{checkbox_prefix} {cur_result_text_arr[0]}"] + cur_result_text_arr[1:] + else: + cur_result_text_arr = [checkbox_prefix] + + if result_type in {"file", "image", "pdf", "video", "audio"}: + file_url, file_name, caption = self._extract_file_metadata(result_obj, result_block_id) + if file_url: + attachment_doc = self._build_attachment_document( + block_id=result_block_id, + url=file_url, + name=file_name, + caption=caption, + page_last_edited_time=page_last_edited_time, + ) + if attachment_doc: + attachments.append(attachment_doc) + + attachment_label = caption or file_name + if attachment_label: + cur_result_text_arr.append(f"{result_type.capitalize()}: {attachment_label}") if result["has_children"]: if result_type == "child_page": child_pages.append(result_block_id) else: - logging.debug(f"Entering sub-block: {result_block_id}") - subblocks, subblock_child_pages = self._read_blocks(result_block_id) - logging.debug(f"Finished sub-block: {result_block_id}") + logging.debug(f"[Notion]: Entering sub-block: {result_block_id}") + subblocks, subblock_child_pages, subblock_attachments = self._read_blocks(result_block_id, page_last_edited_time) + logging.debug(f"[Notion]: Finished sub-block: {result_block_id}") result_blocks.extend(subblocks) child_pages.extend(subblock_child_pages) + attachments.extend(subblock_attachments) if result_type == "child_database": inner_blocks, inner_child_pages = self._read_pages_from_database(result_block_id) @@ -231,7 +409,7 @@ class NotionConnector(LoadConnector, PollConnector): cursor = data["next_cursor"] - return result_blocks, child_pages + return result_blocks, child_pages, attachments def _read_page_title(self, page: NotionPage) -> Optional[str]: """Extracts the title from a Notion page.""" @@ -245,9 +423,7 @@ class NotionConnector(LoadConnector, PollConnector): return None - def _read_pages( - self, pages: list[NotionPage] - ) -> Generator[Document, None, None]: + def _read_pages(self, pages: list[NotionPage], start: SecondsSinceUnixEpoch | None = None, end: SecondsSinceUnixEpoch | None = None) -> Generator[Document, None, None]: """Reads pages for rich text content and generates Documents.""" all_child_page_ids: list[str] = [] @@ -255,11 +431,17 @@ class NotionConnector(LoadConnector, PollConnector): if isinstance(page, dict): page = NotionPage(**page) if page.id in self.indexed_pages: - logging.debug(f"Already indexed page with ID '{page.id}'. Skipping.") + logging.debug(f"[Notion]: Already indexed page with ID {page.id}. Skipping.") continue - logging.info(f"Reading page with ID '{page.id}', with url {page.url}") - page_blocks, child_page_ids = self._read_blocks(page.id) + if start is not None and end is not None: + page_ts = datetime_from_string(page.last_edited_time).timestamp() + if not (page_ts > start and page_ts <= end): + logging.debug(f"[Notion]: Skipping page {page.id} outside polling window.") + continue + + logging.info(f"[Notion]: Reading page with ID {page.id}, with url {page.url}") + page_blocks, child_page_ids, attachment_docs = self._read_blocks(page.id, page.last_edited_time) all_child_page_ids.extend(child_page_ids) self.indexed_pages.add(page.id) @@ -268,14 +450,12 @@ class NotionConnector(LoadConnector, PollConnector): if not page_blocks: if not raw_page_title: - logging.warning(f"No blocks OR title found for page with ID '{page.id}'. Skipping.") + logging.warning(f"[Notion]: No blocks OR title found for page with ID {page.id}. Skipping.") continue text = page_title if page.properties: - text += "\n\n" + "\n".join( - [f"{key}: {value}" for key, value in page.properties.items()] - ) + text += "\n\n" + "\n".join([f"{key}: {value}" for key, value in page.properties.items()]) sections = [TextSection(link=page.url, text=text)] else: sections = [ @@ -286,45 +466,39 @@ class NotionConnector(LoadConnector, PollConnector): for block in page_blocks ] - blob = ("\n".join([sec.text for sec in sections])).encode("utf-8") + joined_text = "\n".join(sec.text for sec in sections) + blob = joined_text.encode("utf-8") yield Document( - id=page.id, - blob=blob, - source=DocumentSource.NOTION, - semantic_identifier=page_title, - extension=".txt", - size_bytes=len(blob), - doc_updated_at=datetime_from_string(page.last_edited_time) + id=page.id, blob=blob, source=DocumentSource.NOTION, semantic_identifier=page_title, extension=".txt", size_bytes=len(blob), doc_updated_at=datetime_from_string(page.last_edited_time) ) + for attachment_doc in attachment_docs: + yield attachment_doc + if self.recursive_index_enabled and all_child_page_ids: for child_page_batch_ids in batch_generator(all_child_page_ids, INDEX_BATCH_SIZE): - child_page_batch = [ - self._fetch_page(page_id) - for page_id in child_page_batch_ids - if page_id not in self.indexed_pages - ] - yield from self._read_pages(child_page_batch) + child_page_batch = [self._fetch_page(page_id) for page_id in child_page_batch_ids if page_id not in self.indexed_pages] + yield from self._read_pages(child_page_batch, start, end) @retry(tries=3, delay=1, backoff=2) def _search_notion(self, query_dict: dict[str, Any]) -> NotionSearchResponse: """Search for pages from a Notion database.""" - logging.debug(f"Searching for pages in Notion with query_dict: {query_dict}") + logging.debug(f"[Notion]: Searching for pages in Notion with query_dict: {query_dict}") data = fetch_notion_data("https://api.notion.com/v1/search", self.headers, "POST", query_dict) return NotionSearchResponse(**data) - def _recursive_load(self) -> Generator[list[Document], None, None]: + def _recursive_load(self, start: SecondsSinceUnixEpoch | None = None, end: SecondsSinceUnixEpoch | None = None) -> Generator[list[Document], None, None]: """Recursively load pages starting from root page ID.""" if self.root_page_id is None or not self.recursive_index_enabled: raise RuntimeError("Recursive page lookup is not enabled") - logging.info(f"Recursively loading pages from Notion based on root page with ID: {self.root_page_id}") + logging.info(f"[Notion]: Recursively loading pages from Notion based on root page with ID: {self.root_page_id}") pages = [self._fetch_page(page_id=self.root_page_id)] - yield from batch_generator(self._read_pages(pages), self.batch_size) + yield from batch_generator(self._read_pages(pages, start, end), self.batch_size) def load_credentials(self, credentials: dict[str, Any]) -> dict[str, Any] | None: """Applies integration token to headers.""" - self.headers["Authorization"] = f'Bearer {credentials["notion_integration_token"]}' + self.headers["Authorization"] = f"Bearer {credentials['notion_integration_token']}" return None def load_from_state(self) -> GenerateDocumentsOutput: @@ -348,12 +522,10 @@ class NotionConnector(LoadConnector, PollConnector): else: break - def poll_source( - self, start: SecondsSinceUnixEpoch, end: SecondsSinceUnixEpoch - ) -> GenerateDocumentsOutput: + def poll_source(self, start: SecondsSinceUnixEpoch, end: SecondsSinceUnixEpoch) -> GenerateDocumentsOutput: """Poll Notion for updated pages within a time period.""" if self.recursive_index_enabled and self.root_page_id: - yield from self._recursive_load() + yield from self._recursive_load(start, end) return query_dict = { @@ -367,7 +539,7 @@ class NotionConnector(LoadConnector, PollConnector): pages = filter_pages_by_time(db_res.results, start, end, "last_edited_time") if pages: - yield from batch_generator(self._read_pages(pages), self.batch_size) + yield from batch_generator(self._read_pages(pages, start, end), self.batch_size) if db_res.has_more: query_dict["start_cursor"] = db_res.next_cursor else: From 971c1bcba707013143e788c514261629a4c6a221 Mon Sep 17 00:00:00 2001 From: coding Date: Fri, 21 Nov 2025 09:33:36 +0800 Subject: [PATCH 03/12] Fix: missing parameters in by_plaintext method for PDF naive mode (#11408) ### What problem does this PR solve? FIx: missing parameters in by_plaintext method for PDF naive mode ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue) --------- Co-authored-by: lih --- rag/app/book.py | 1 + rag/app/laws.py | 1 + rag/app/manual.py | 1 + rag/app/one.py | 1 + rag/app/presentation.py | 1 + 5 files changed, 5 insertions(+) diff --git a/rag/app/book.py b/rag/app/book.py index 5ea28d40d..5bdaec72d 100644 --- a/rag/app/book.py +++ b/rag/app/book.py @@ -113,6 +113,7 @@ def chunk(filename, binary=None, from_page=0, to_page=100000, lang = lang, callback = callback, pdf_cls = Pdf, + layout_recognizer = layout_recognizer, **kwargs ) diff --git a/rag/app/laws.py b/rag/app/laws.py index dd97e4e3a..ba2592833 100644 --- a/rag/app/laws.py +++ b/rag/app/laws.py @@ -172,6 +172,7 @@ def chunk(filename, binary=None, from_page=0, to_page=100000, lang = lang, callback = callback, pdf_cls = Pdf, + layout_recognizer = layout_recognizer, **kwargs ) diff --git a/rag/app/manual.py b/rag/app/manual.py index 124864041..b3a4ae38d 100644 --- a/rag/app/manual.py +++ b/rag/app/manual.py @@ -213,6 +213,7 @@ def chunk(filename, binary=None, from_page=0, to_page=100000, lang = lang, callback = callback, pdf_cls = Pdf, + layout_recognizer = layout_recognizer, parse_method = "manual", **kwargs ) diff --git a/rag/app/one.py b/rag/app/one.py index 5574aaa51..7cd1bb785 100644 --- a/rag/app/one.py +++ b/rag/app/one.py @@ -99,6 +99,7 @@ def chunk(filename, binary=None, from_page=0, to_page=100000, lang = lang, callback = callback, pdf_cls = Pdf, + layout_recognizer = layout_recognizer, **kwargs ) diff --git a/rag/app/presentation.py b/rag/app/presentation.py index cd1d308ec..6a872528f 100644 --- a/rag/app/presentation.py +++ b/rag/app/presentation.py @@ -142,6 +142,7 @@ def chunk(filename, binary=None, from_page=0, to_page=100000, lang = lang, callback = callback, pdf_cls = Pdf, + layout_recognizer = layout_recognizer, **kwargs ) From 653b78595834762b019bc8b29ad845bb5e7cdaa0 Mon Sep 17 00:00:00 2001 From: chanx <1243304602@qq.com> Date: Fri, 21 Nov 2025 09:33:50 +0800 Subject: [PATCH 04/12] Fix: Modify the style of the user center #10703 (#11419) ### What problem does this PR solve? Fix: Modify the style of the user center ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue) --- web/src/components/back-button/index.tsx | 5 +- web/src/components/confirm-delete-dialog.tsx | 3 +- web/src/components/dynamic-form.tsx | 20 +- web/src/components/edit-tag/index.tsx | 4 +- web/src/components/file-uploader.tsx | 12 +- .../components/originui/password-input.tsx | 14 - .../originui/select-with-search.tsx | 20 +- web/src/components/ui/command.tsx | 12 +- web/src/components/ui/input.tsx | 9 +- web/src/components/ui/modal/modal.tsx | 23 +- web/src/components/ui/multi-select.tsx | 8 +- web/src/components/ui/select.tsx | 4 +- web/src/components/ui/textarea.tsx | 2 +- web/src/components/ui/tooltip.tsx | 4 +- web/src/hooks/common-hooks.tsx | 30 +- web/src/locales/en.ts | 2 + web/src/locales/zh.ts | 2 + web/src/pages/dataset/sidebar/index.tsx | 2 +- .../data-source/add-datasource-modal.tsx | 1 + .../data-source/component/dynamic-form.tsx | 725 ------------------ .../user-setting/data-source/contant.tsx | 95 --- .../data-source-detail-page/index.tsx | 6 +- .../user-setting/mcp/edit-mcp-dialog.tsx | 2 +- web/src/pages/user-setting/mcp/index.tsx | 2 +- .../setting-model/components/un-add-model.tsx | 4 +- .../user-setting/setting-team/user-table.tsx | 4 +- web/src/pages/user-setting/sidebar/index.tsx | 4 +- 27 files changed, 122 insertions(+), 897 deletions(-) delete mode 100644 web/src/pages/user-setting/data-source/component/dynamic-form.tsx diff --git a/web/src/components/back-button/index.tsx b/web/src/components/back-button/index.tsx index c790d6882..118042128 100644 --- a/web/src/components/back-button/index.tsx +++ b/web/src/components/back-button/index.tsx @@ -29,7 +29,10 @@ const BackButton: React.FC = ({ return ( diff --git a/web/src/components/edit-tag/index.tsx b/web/src/components/edit-tag/index.tsx index 1921853d0..a05fadad6 100644 --- a/web/src/components/edit-tag/index.tsx +++ b/web/src/components/edit-tag/index.tsx @@ -102,8 +102,8 @@ const EditTag = React.forwardRef( {Array.isArray(tagChild) && tagChild.length > 0 && <>{tagChild}} {!inputVisible && ( ); diff --git a/web/src/components/originui/select-with-search.tsx b/web/src/components/originui/select-with-search.tsx index 2d095f157..e3cec6240 100644 --- a/web/src/components/originui/select-with-search.tsx +++ b/web/src/components/originui/select-with-search.tsx @@ -140,7 +140,7 @@ export const SelectWithSearch = forwardRef< ref={ref} disabled={disabled} className={cn( - '!bg-bg-input hover:bg-background border-border-button w-full justify-between px-3 font-normal outline-offset-0 outline-none focus-visible:outline-[3px] [&_svg]:pointer-events-auto', + '!bg-bg-input hover:bg-background border-border-button w-full justify-between px-3 font-normal outline-offset-0 outline-none focus-visible:outline-[3px] [&_svg]:pointer-events-auto group', triggerClassName, )} > @@ -155,12 +155,12 @@ export const SelectWithSearch = forwardRef< {value && allowClear && ( <> )} @@ -173,12 +173,17 @@ export const SelectWithSearch = forwardRef< - - - + + {options && options.length > 0 && ( + + )} + {t('common.noDataFound')} {options.map((group, idx) => { if (group.options) { @@ -209,6 +214,7 @@ export const SelectWithSearch = forwardRef< value={group.value} disabled={group.disabled} onSelect={handleSelect} + className="min-h-10" > {group.label} diff --git a/web/src/components/ui/command.tsx b/web/src/components/ui/command.tsx index 10caef77d..b96c82ba2 100644 --- a/web/src/components/ui/command.tsx +++ b/web/src/components/ui/command.tsx @@ -39,7 +39,10 @@ const CommandInput = React.forwardRef< React.ElementRef, React.ComponentPropsWithoutRef >(({ className, ...props }, ref) => ( -
+
e.stopPropagation()} onMouseEnter={(e) => e.currentTarget.focus()} tabIndex={-1} @@ -96,7 +102,7 @@ const CommandGroup = React.forwardRef< , 'prefix'> { @@ -50,6 +49,8 @@ const Input = React.forwardRef( 'pr-12': !!suffix || isPasswordInput, 'pr-24': !!suffix && isPasswordInput, }, + type === 'number' && + '[appearance:textfield] [&::-webkit-outer-spin-button]:appearance-none [&::-webkit-inner-spin-button]:appearance-none', className, )} value={inputValue ?? ''} @@ -77,10 +78,10 @@ const Input = React.forwardRef( )} {isPasswordInput && ( - + )}
); diff --git a/web/src/components/ui/modal/modal.tsx b/web/src/components/ui/modal/modal.tsx index af516b1e6..c40e5f90b 100644 --- a/web/src/components/ui/modal/modal.tsx +++ b/web/src/components/ui/modal/modal.tsx @@ -27,7 +27,10 @@ export interface ModalProps { okText?: ReactNode | string; onOk?: () => void; onCancel?: () => void; + okButtonClassName?: string; + cancelButtonClassName?: string; disabled?: boolean; + style?: React.CSSProperties; } export interface ModalType extends FC { show: typeof modalIns.show; @@ -56,7 +59,10 @@ const Modal: ModalType = ({ confirmLoading, cancelText, okText, + okButtonClassName, + cancelButtonClassName, disabled = false, + style, }) => { const sizeClasses = { small: 'max-w-md', @@ -111,7 +117,10 @@ const Modal: ModalType = ({ @@ -122,6 +131,7 @@ const Modal: ModalType = ({ className={cn( 'px-2 py-1 bg-primary text-primary-foreground rounded-md hover:bg-primary/90', { 'cursor-not-allowed': disabled }, + okButtonClassName, )} > {confirmLoading && ( @@ -153,23 +163,26 @@ const Modal: ModalType = ({ handleOk, showfooter, footerClassName, + okButtonClassName, + cancelButtonClassName, ]); return ( maskClosable && onOpenChange?.(false)} > e.stopPropagation()} > {/* title */} {(title || closable) && (
+ + ); + })} +
+ + ); +} diff --git a/web/src/pages/agent/form/iteration-form/index.tsx b/web/src/pages/agent/form/iteration-form/index.tsx index 6132b7f5e..c528fa720 100644 --- a/web/src/pages/agent/form/iteration-form/index.tsx +++ b/web/src/pages/agent/form/iteration-form/index.tsx @@ -1,4 +1,3 @@ -import { FormContainer } from '@/components/form-container'; import { Form } from '@/components/ui/form'; import { zodResolver } from '@hookform/resolvers/zod'; import { memo, useMemo } from 'react'; @@ -10,12 +9,21 @@ import { FormWrapper } from '../components/form-wrapper'; import { Output } from '../components/output'; import { QueryVariable } from '../components/query-variable'; import { DynamicOutput } from './dynamic-output'; +import { DynamicVariables } from './dynamic-variables'; import { OutputArray } from './interface'; import { useValues } from './use-values'; import { useWatchFormChange } from './use-watch-form-change'; const FormSchema = z.object({ query: z.string().optional(), + variables: z.array( + z.object({ + variable: z.string().optional(), + operator: z.string().optional(), + parameter: z.string().or(z.number()).or(z.boolean()).optional(), + mode: z.string(), + }), + ), outputs: z.array(z.object({ name: z.string(), value: z.any() })).optional(), }); @@ -41,12 +49,11 @@ function IterationForm({ node }: INextOperatorForm) { return (
- - - + + diff --git a/web/src/pages/agent/form/iteration-form/use-build-logical-options.ts b/web/src/pages/agent/form/iteration-form/use-build-logical-options.ts new file mode 100644 index 000000000..a7f960e98 --- /dev/null +++ b/web/src/pages/agent/form/iteration-form/use-build-logical-options.ts @@ -0,0 +1,59 @@ +import { buildOptions } from '@/utils/form'; +import { camelCase } from 'lodash'; +import { useCallback } from 'react'; +import { useTranslation } from 'react-i18next'; +import { + JsonSchemaDataType, + VariableAssignerLogicalArrayOperator, + VariableAssignerLogicalNumberOperator, + VariableAssignerLogicalNumberOperatorLabelMap, + VariableAssignerLogicalOperator, +} from '../../constant'; + +export function useBuildLogicalOptions() { + const { t } = useTranslation(); + + const buildVariableAssignerLogicalOptions = useCallback( + (record: Record) => { + return buildOptions( + record, + t, + 'flow.variableAssignerLogicalOperatorOptions', + true, + ); + }, + [t], + ); + + const buildLogicalOptions = useCallback( + (type: string) => { + if ( + type?.toLowerCase().startsWith(JsonSchemaDataType.Array.toLowerCase()) + ) { + return buildVariableAssignerLogicalOptions( + VariableAssignerLogicalArrayOperator, + ); + } + + if (type === JsonSchemaDataType.Number) { + return Object.values(VariableAssignerLogicalNumberOperator).map( + (val) => ({ + label: t( + `flow.variableAssignerLogicalOperatorOptions.${camelCase(VariableAssignerLogicalNumberOperatorLabelMap[val as keyof typeof VariableAssignerLogicalNumberOperatorLabelMap] || val)}`, + ), + value: val, + }), + ); + } + + return buildVariableAssignerLogicalOptions( + VariableAssignerLogicalOperator, + ); + }, + [buildVariableAssignerLogicalOptions, t], + ); + + return { + buildLogicalOptions, + }; +} diff --git a/web/src/pages/agent/gobal-variable-sheet/constant.ts b/web/src/pages/agent/gobal-variable-sheet/constant.ts index 72a7d6342..935540c15 100644 --- a/web/src/pages/agent/gobal-variable-sheet/constant.ts +++ b/web/src/pages/agent/gobal-variable-sheet/constant.ts @@ -1,7 +1,7 @@ import { FormFieldConfig, FormFieldType } from '@/components/dynamic-form'; -import { buildSelectOptions } from '@/utils/component-util'; import { t } from 'i18next'; import { TypesWithArray } from '../constant'; +import { buildConversationVariableSelectOptions } from '../utils'; export { TypesWithArray } from '../constant'; // const TypesWithoutArray = Object.values(JsonSchemaDataType).filter( // (item) => item !== JsonSchemaDataType.Array, @@ -29,7 +29,7 @@ export const GlobalFormFields = [ placeholder: '', required: true, type: FormFieldType.Select, - options: buildSelectOptions(Object.values(TypesWithArray)), + options: buildConversationVariableSelectOptions(), }, { label: t('flow.defaultValue'), diff --git a/web/src/pages/agent/utils.ts b/web/src/pages/agent/utils.ts index f4e4a4b1d..6ae2935b4 100644 --- a/web/src/pages/agent/utils.ts +++ b/web/src/pages/agent/utils.ts @@ -7,6 +7,7 @@ import { ICategorizeItemResult, } from '@/interfaces/database/agent'; import { DSLComponents, RAGFlowNodeType } from '@/interfaces/database/flow'; +import { buildSelectOptions } from '@/utils/component-util'; import { removeUselessFieldsFromValues } from '@/utils/form'; import { Edge, Node, XYPosition } from '@xyflow/react'; import { FormInstance, FormListFieldData } from 'antd'; @@ -30,6 +31,7 @@ import { NoDebugOperatorsList, NodeHandleId, Operator, + TypesWithArray, } from './constant'; import { DataOperationsFormSchemaType } from './form/data-operations-form'; import { ExtractorFormSchemaType } from './form/extractor-form'; @@ -766,3 +768,7 @@ export function buildBeginQueryWithObject( export function getArrayElementType(type: string) { return typeof type === 'string' ? type.match(/<([^>]+)>/)?.at(1) ?? '' : ''; } + +export function buildConversationVariableSelectOptions() { + return buildSelectOptions(Object.values(TypesWithArray)); +} From 1845daf41ff10ac510bb8815cdf8dd7cd77f958a Mon Sep 17 00:00:00 2001 From: chanx <1243304602@qq.com> Date: Fri, 21 Nov 2025 14:32:50 +0800 Subject: [PATCH 07/12] Fix: UI adjustments, replacing private components with public components (#11438) ### What problem does this PR solve? Fix: UI adjustments, replacing private components with public components - UI adjustments for public components (input, multiselect, SliderInputFormField) - Replacing the private LlmSettingFieldItems component in search with the public LlmSettingFieldItems component ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue) --- web/src/components/llm-setting-items/next.tsx | 170 +++++++++---- .../components/llm-setting-items/slider.tsx | 7 +- web/src/components/originui/input.tsx | 6 +- .../raptor-form-fields.tsx | 10 +- .../components/similarity-slider/index.tsx | 6 +- .../components/slider-input-form-field.tsx | 5 +- web/src/components/ui/command.tsx | 2 +- web/src/components/ui/divider.tsx | 2 +- web/src/components/ui/input.tsx | 82 ++++-- web/src/components/ui/multi-select.tsx | 42 ++-- .../search-setting-aisummery-config.tsx | 236 ------------------ web/src/pages/next-search/search-setting.tsx | 25 +- 12 files changed, 252 insertions(+), 341 deletions(-) delete mode 100644 web/src/pages/next-search/search-setting-aisummery-config.tsx diff --git a/web/src/components/llm-setting-items/next.tsx b/web/src/components/llm-setting-items/next.tsx index 8d21bf9a8..3ab762201 100644 --- a/web/src/components/llm-setting-items/next.tsx +++ b/web/src/components/llm-setting-items/next.tsx @@ -1,6 +1,9 @@ -import { ModelVariableType } from '@/constants/knowledge'; +import { + ModelVariableType, + settledModelVariableMap, +} from '@/constants/knowledge'; import { useTranslate } from '@/hooks/common-hooks'; -import { camelCase } from 'lodash'; +import { camelCase, isEqual } from 'lodash'; import { useCallback } from 'react'; import { useFormContext } from 'react-hook-form'; import { z } from 'zod'; @@ -25,6 +28,13 @@ import { useHandleFreedomChange } from './use-watch-change'; interface LlmSettingFieldItemsProps { prefix?: string; options?: any[]; + showFields?: Array< + | 'temperature' + | 'top_p' + | 'presence_penalty' + | 'frequency_penalty' + | 'max_tokens' + >; } export const LLMIdFormField = { @@ -56,6 +66,13 @@ export const LlmSettingSchema = { export function LlmSettingFieldItems({ prefix, options, + showFields = [ + 'temperature', + 'top_p', + 'presence_penalty', + 'frequency_penalty', + 'max_tokens', + ], }: LlmSettingFieldItemsProps) { const form = useFormContext(); const { t } = useTranslate('chat'); @@ -72,14 +89,53 @@ export function LlmSettingFieldItems({ const parameterOptions = Object.values(ModelVariableType).map((x) => ({ label: t(camelCase(x)), value: x, - })); + })) as { label: string; value: ModelVariableType | 'Custom' }[]; + + parameterOptions.push({ + label: t(camelCase('Custom')), + value: 'Custom', + }); + const checkParameterIsEqual = () => { + const [ + parameter, + topPValue, + frequencyPenaltyValue, + temperatureValue, + presencePenaltyValue, + maxTokensValue, + ] = form.getValues([ + getFieldWithPrefix('parameter'), + getFieldWithPrefix('temperature'), + getFieldWithPrefix('top_p'), + getFieldWithPrefix('frequency_penalty'), + getFieldWithPrefix('presence_penalty'), + getFieldWithPrefix('max_tokens'), + ]); + if (parameter && parameter !== 'Custom') { + const parameterValue = + settledModelVariableMap[parameter as keyof typeof ModelVariableType]; + const parameterRealValue = { + top_p: topPValue, + temperature: temperatureValue, + frequency_penalty: frequencyPenaltyValue, + presence_penalty: presencePenaltyValue, + max_tokens: maxTokensValue, + }; + if (!isEqual(parameterValue, parameterRealValue)) { + form.setValue(getFieldWithPrefix('parameter'), 'Custom'); + } + } + }; return (
- + ( {t('freedom')} @@ -107,45 +163,71 @@ export function LlmSettingFieldItems({ )} /> - - - - - + {showFields.some((item) => item === 'temperature') && ( + { + checkParameterIsEqual(); + }} + > + )} + {showFields.some((item) => item === 'top_p') && ( + { + checkParameterIsEqual(); + }} + > + )} + {showFields.some((item) => item === 'presence_penalty') && ( + { + checkParameterIsEqual(); + }} + > + )} + {showFields.some((item) => item === 'frequency_penalty') && ( + { + checkParameterIsEqual(); + }} + > + )} + {showFields.some((item) => item === 'max_tokens') && ( + { + checkParameterIsEqual(); + }} + > + )}
); } diff --git a/web/src/components/llm-setting-items/slider.tsx b/web/src/components/llm-setting-items/slider.tsx index 482b80578..5c52d29e8 100644 --- a/web/src/components/llm-setting-items/slider.tsx +++ b/web/src/components/llm-setting-items/slider.tsx @@ -22,6 +22,7 @@ type SliderInputSwitchFormFieldProps = { onChange?: (value: number) => void; className?: string; checkName: string; + numberInputClassName?: string; }; export function SliderInputSwitchFormField({ @@ -34,6 +35,7 @@ export function SliderInputSwitchFormField({ onChange, className, checkName, + numberInputClassName, }: SliderInputSwitchFormFieldProps) { const form = useFormContext(); const disabled = !form.watch(checkName); @@ -81,7 +83,10 @@ export function SliderInputSwitchFormField({ +
+ +
} />
diff --git a/web/src/components/similarity-slider/index.tsx b/web/src/components/similarity-slider/index.tsx index 05a7f29ce..73666b504 100644 --- a/web/src/components/similarity-slider/index.tsx +++ b/web/src/components/similarity-slider/index.tsx @@ -59,6 +59,7 @@ interface SimilaritySliderFormFieldProps { similarityName?: string; vectorSimilarityWeightName?: string; isTooltipShown?: boolean; + numberInputClassName?: string; } export const initialSimilarityThresholdValue = { @@ -86,6 +87,7 @@ export function SimilaritySliderFormField({ similarityName = 'similarity_threshold', vectorSimilarityWeightName = 'vector_similarity_weight', isTooltipShown, + numberInputClassName, }: SimilaritySliderFormFieldProps) { const { t } = useTranslate('knowledgeDetails'); const form = useFormContext(); @@ -101,6 +103,7 @@ export function SimilaritySliderFormField({ step={0.01} layout={FormLayout.Vertical} tooltip={isTooltipShown && t('similarityThresholdTip')} + numberInputClassName={numberInputClassName} > -
+
@@ -158,6 +161,7 @@ export function SimilaritySliderFormField({ className={cn( 'h-6 w-10 p-0 text-center bg-bg-input border-border-default border text-text-secondary', '[appearance:textfield] [&::-webkit-outer-spin-button]:appearance-none [&::-webkit-inner-spin-button]:appearance-none', + numberInputClassName, )} max={1} min={0} diff --git a/web/src/components/slider-input-form-field.tsx b/web/src/components/slider-input-form-field.tsx index 40b2c6804..0344c4eeb 100644 --- a/web/src/components/slider-input-form-field.tsx +++ b/web/src/components/slider-input-form-field.tsx @@ -25,6 +25,7 @@ type SliderInputFormFieldProps = { tooltip?: ReactNode; defaultValue?: number; className?: string; + numberInputClassName?: string; } & FormLayoutType; export function SliderInputFormField({ @@ -36,6 +37,7 @@ export function SliderInputFormField({ tooltip, defaultValue, className, + numberInputClassName, layout = FormLayout.Horizontal, }: SliderInputFormFieldProps) { const form = useFormContext(); @@ -61,7 +63,7 @@ export function SliderInputFormField({
= ({ direction = 'horizontal', type = 'horizontal', text, - color = 'border-muted-foreground/50', + color = 'border-border-button', margin = 'my-4', className = '', }) => { diff --git a/web/src/components/ui/input.tsx b/web/src/components/ui/input.tsx index 7eb1c0fe4..c8f0044bf 100644 --- a/web/src/components/ui/input.tsx +++ b/web/src/components/ui/input.tsx @@ -2,7 +2,7 @@ import * as React from 'react'; import { cn } from '@/lib/utils'; import { Eye, EyeOff, Search } from 'lucide-react'; -import { useState } from 'react'; +import { useEffect, useMemo, useRef, useState } from 'react'; export interface InputProps extends Omit, 'prefix'> { @@ -17,6 +17,20 @@ const Input = React.forwardRef( const { defaultValue, ...restProps } = props; const inputValue = isControlled ? value : defaultValue; const [showPassword, setShowPassword] = useState(false); + const [prefixWidth, setPrefixWidth] = useState(0); + const [suffixWidth, setSuffixWidth] = useState(0); + + const prefixRef = useRef(null); + const suffixRef = useRef(null); + + useEffect(() => { + if (prefixRef.current) { + setPrefixWidth(prefixRef.current.offsetWidth); + } + if (suffixRef.current) { + setSuffixWidth(suffixRef.current.offsetWidth); + } + }, [prefix, suffix, prefixRef, suffixRef]); const handleChange: React.ChangeEventHandler = (e) => { if (type === 'number') { const numValue = e.target.value === '' ? '' : Number(e.target.value); @@ -34,42 +48,60 @@ const Input = React.forwardRef( const isPasswordInput = type === 'password'; - const inputEl = ( - + const inputEl = useMemo( + () => ( + + ), + [ + prefixWidth, + suffixWidth, + isPasswordInput, + inputValue, + className, + handleChange, + restProps, + ], ); if (prefix || suffix || isPasswordInput) { return (
{prefix && ( - + {prefix} )} {inputEl} {suffix && ( {selectedValues.length > 0 ? ( -
+
{selectedValues?.slice(0, maxCount)?.map((value) => { const option = flatOptions.find((o) => o.value === value); @@ -348,9 +348,9 @@ export const MultiSelect = React.forwardRef< )}
-
+
{ event.stopPropagation(); handleClear(); @@ -358,17 +358,17 @@ export const MultiSelect = React.forwardRef< /> - +
) : (
- + {placeholder} - +
)} @@ -379,14 +379,16 @@ export const MultiSelect = React.forwardRef< onEscapeKeyDown={() => setIsPopoverOpen(false)} > - + {options && options.length > 0 && ( + + )} No results found. - {showSelectAll && ( + {showSelectAll && options && options.length > 0 && ( )} - setIsPopoverOpen(false)} - className="flex-1 justify-center cursor-pointer max-w-full" - > - {t('common.close')} - + {options && options.length > 0 && ( + setIsPopoverOpen(false)} + className="flex-1 justify-center cursor-pointer max-w-full" + > + {t('common.close')} + + )}
diff --git a/web/src/pages/next-search/search-setting-aisummery-config.tsx b/web/src/pages/next-search/search-setting-aisummery-config.tsx deleted file mode 100644 index 8764c3014..000000000 --- a/web/src/pages/next-search/search-setting-aisummery-config.tsx +++ /dev/null @@ -1,236 +0,0 @@ -import { SliderInputSwitchFormField } from '@/components/llm-setting-items/slider'; -import { SelectWithSearch } from '@/components/originui/select-with-search'; -import { - FormControl, - FormField, - FormItem, - FormLabel, - FormMessage, -} from '@/components/ui/form'; -import { - Select, - SelectContent, - SelectItem, - SelectTrigger, - SelectValue, -} from '@/components/ui/select'; -import { - LlmModelType, - ModelVariableType, - settledModelVariableMap, -} from '@/constants/knowledge'; -import { useTranslate } from '@/hooks/common-hooks'; -import { useComposeLlmOptionsByModelTypes } from '@/hooks/llm-hooks'; -import { camelCase, isEqual } from 'lodash'; -import { useCallback } from 'react'; -import { useFormContext } from 'react-hook-form'; -import { z } from 'zod'; - -interface LlmSettingFieldItemsProps { - prefix?: string; - options?: any[]; -} -const LlmSettingEnableSchema = { - temperatureEnabled: z.boolean(), - topPEnabled: z.boolean(), - presencePenaltyEnabled: z.boolean(), - frequencyPenaltyEnabled: z.boolean(), -}; -export const LlmSettingSchema = { - llm_id: z.string(), - parameter: z.string().optional(), - temperature: z.coerce.number().optional(), - top_p: z.coerce.number().optional(), - presence_penalty: z.coerce.number().optional(), - frequency_penalty: z.coerce.number().optional(), - ...LlmSettingEnableSchema, - // maxTokensEnabled: z.boolean(), -}; - -export function LlmSettingFieldItems({ - prefix, - options, -}: LlmSettingFieldItemsProps) { - const form = useFormContext(); - const { t } = useTranslate('chat'); - - const modelOptions = useComposeLlmOptionsByModelTypes([ - LlmModelType.Chat, - LlmModelType.Image2text, - ]); - - const handleChange = useCallback( - (parameter: string) => { - const values = - settledModelVariableMap[ - parameter as keyof typeof settledModelVariableMap - ]; - const enabledKeys = Object.keys(LlmSettingEnableSchema); - - for (const key in values) { - if (Object.prototype.hasOwnProperty.call(values, key)) { - const element = values[key as keyof typeof values]; - form.setValue(`${prefix}.${key}`, element); - } - } - if (enabledKeys && enabledKeys.length) { - for (const key of enabledKeys) { - form.setValue(`${prefix}.${key}`, true); - } - } - }, - [form, prefix], - ); - - const parameterOptions = Object.values(ModelVariableType).map((x) => ({ - label: t(camelCase(x)), - value: x, - })) as unknown as { label: string; value: ModelVariableType | 'Custom' }[]; - parameterOptions.push({ - label: t(camelCase('Custom')), - value: 'Custom', - }); - - const getFieldWithPrefix = useCallback( - (name: string) => { - return prefix ? `${prefix}.${name}` : name; - }, - [prefix], - ); - - const checkParameterIsEquel = () => { - const [ - parameter, - topPValue, - frequencyPenaltyValue, - temperatureValue, - presencePenaltyValue, - ] = form.getValues([ - getFieldWithPrefix('parameter'), - getFieldWithPrefix('temperature'), - getFieldWithPrefix('top_p'), - getFieldWithPrefix('frequency_penalty'), - getFieldWithPrefix('presence_penalty'), - ]); - if (parameter && parameter !== 'Custom') { - const parameterValue = - settledModelVariableMap[parameter as keyof typeof ModelVariableType]; - const parameterRealValue = { - top_p: topPValue, - temperature: temperatureValue, - frequency_penalty: frequencyPenaltyValue, - presence_penalty: presencePenaltyValue, - }; - if (!isEqual(parameterValue, parameterRealValue)) { - form.setValue(getFieldWithPrefix('parameter'), 'Custom'); - } - } - }; - - return ( -
- ( - - - * - {t('model')} - - - - - - - )} - /> - ( - - {t('freedom')} - -
- -
-
- -
- )} - /> - { - checkParameterIsEquel(); - }} - > - { - checkParameterIsEquel(); - }} - > - { - checkParameterIsEquel(); - }} - > - { - checkParameterIsEquel(); - }} - > - {/* */} -
- ); -} diff --git a/web/src/pages/next-search/search-setting.tsx b/web/src/pages/next-search/search-setting.tsx index 665d7e49b..365c366cb 100644 --- a/web/src/pages/next-search/search-setting.tsx +++ b/web/src/pages/next-search/search-setting.tsx @@ -1,6 +1,10 @@ // src/pages/next-search/search-setting.tsx import { AvatarUpload } from '@/components/avatar-upload'; +import { + LlmSettingFieldItems, + LlmSettingSchema, +} from '@/components/llm-setting-items/next'; import { MetadataFilter, MetadataFilterSchema, @@ -46,10 +50,10 @@ import { IllmSettingProps, useUpdateSearch, } from '../next-searches/hooks'; -import { - LlmSettingFieldItems, - LlmSettingSchema, -} from './search-setting-aisummery-config'; +// import { +// LlmSettingFieldItems, +// LlmSettingSchema, +// } from './search-setting-aisummery-config'; interface SearchSettingProps { open: boolean; @@ -397,6 +401,7 @@ const SearchSetting: React.FC = ({ isTooltipShown similarityName="search_config.similarity_threshold" vectorSimilarityWeightName="search_config.vector_similarity_weight" + numberInputClassName="rounded-sm" > {/* Rerank Model */} = ({ = ({ )} /> {aiSummaryDisabled && ( + // )} {/* Feature Controls */} From 1033a3ae268367dbd2715ddc522ff6ee9a05064c Mon Sep 17 00:00:00 2001 From: FallingSnowFlake Date: Fri, 21 Nov 2025 14:33:29 +0800 Subject: [PATCH 08/12] Fix: improve PDF text type detection by expanding regex content (#11432) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Add whitespace validation to the PDF English text checking regex - Reduce false negatives in English PDF content recognition ### What problem does this PR solve? The core idea is to **expand the regex content used for English text detection** so it can accommodate more valid characters commonly found in English PDFs. The modifications include: - Adding support for **space** in the regex. - Ensuring the update does not reduce existing detection accuracy. ### Type of change - [✅] Bug Fix (non-breaking change which fixes an issue) --- deepdoc/parser/pdf_parser.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/deepdoc/parser/pdf_parser.py b/deepdoc/parser/pdf_parser.py index 6d8431c82..f6613c2f5 100644 --- a/deepdoc/parser/pdf_parser.py +++ b/deepdoc/parser/pdf_parser.py @@ -1091,7 +1091,7 @@ class RAGFlowPdfParser: logging.debug("Images converted.") self.is_english = [ - re.search(r"[a-zA-Z0-9,/¸;:'\[\]\(\)!@#$%^&*\"?<>._-]{30,}", "".join(random.choices([c["text"] for c in self.page_chars[i]], k=min(100, len(self.page_chars[i]))))) + re.search(r"[ a-zA-Z0-9,/¸;:'\[\]\(\)!@#$%^&*\"?<>._-]{30,}", "".join(random.choices([c["text"] for c in self.page_chars[i]], k=min(100, len(self.page_chars[i]))))) for i in range(len(self.page_chars)) ] if sum([1 if e else 0 for e in self.is_english]) > len(self.page_images) / 2: @@ -1148,7 +1148,7 @@ class RAGFlowPdfParser: if not self.is_english and not any([c for c in self.page_chars]) and self.boxes: bxes = [b for bxs in self.boxes for b in bxs] - self.is_english = re.search(r"[\na-zA-Z0-9,/¸;:'\[\]\(\)!@#$%^&*\"?<>._-]{30,}", "".join([b["text"] for b in random.choices(bxes, k=min(30, len(bxes)))])) + self.is_english = re.search(r"[ \na-zA-Z0-9,/¸;:'\[\]\(\)!@#$%^&*\"?<>._-]{30,}", "".join([b["text"] for b in random.choices(bxes, k=min(30, len(bxes)))])) logging.debug(f"Is it English: {self.is_english}") From db0f6840d988db04192d9c05139f21c5afa26c28 Mon Sep 17 00:00:00 2001 From: Yongteng Lei Date: Fri, 21 Nov 2025 14:36:26 +0800 Subject: [PATCH 09/12] Feat: ignore chunk size when using custom delimiters (#11434) ### What problem does this PR solve? Ignore chunk size when using custom delimiter. ### Type of change - [x] New Feature (non-breaking change which adds functionality) --- rag/nlp/__init__.py | 76 ++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 68 insertions(+), 8 deletions(-) diff --git a/rag/nlp/__init__.py b/rag/nlp/__init__.py index f61019377..add454ade 100644 --- a/rag/nlp/__init__.py +++ b/rag/nlp/__init__.py @@ -437,16 +437,16 @@ def not_title(txt): return re.search(r"[,;,。;!!]", txt) def tree_merge(bull, sections, depth): - + if not sections or bull < 0: return sections if isinstance(sections[0], type("")): sections = [(s, "") for s in sections] - + # filter out position information in pdf sections sections = [(t, o) for t, o in sections if t and len(t.split("@")[0].strip()) > 1 and not re.match(r"[0-9]+$", t.split("@")[0].strip())] - + def get_level(bull, section): text, layout = section text = re.sub(r"\u3000", " ", text).strip() @@ -465,7 +465,7 @@ def tree_merge(bull, sections, depth): level, text = get_level(bull, section) if not text.strip("\n"): continue - + lines.append((level, text)) level_set.add(level) @@ -608,6 +608,26 @@ def naive_merge(sections: str | list, chunk_token_num=128, delimiter="\n。; cks[-1] += t tk_nums[-1] += tnum + custom_delimiters = [m.group(1) for m in re.finditer(r"`([^`]+)`", delimiter)] + has_custom = bool(custom_delimiters) + if has_custom: + custom_pattern = "|".join(re.escape(t) for t in sorted(set(custom_delimiters), key=len, reverse=True)) + cks, tk_nums = [], [] + for sec, pos in sections: + split_sec = re.split(r"(%s)" % custom_pattern, sec, flags=re.DOTALL) + for sub_sec in split_sec: + if re.fullmatch(custom_pattern, sub_sec or ""): + continue + text = "\n" + sub_sec + local_pos = pos + if num_tokens_from_string(text) < 8: + local_pos = "" + if local_pos and text.find(local_pos) < 0: + text += local_pos + cks.append(text) + tk_nums.append(num_tokens_from_string(text)) + return cks + dels = get_delimiters(delimiter) for sec, pos in sections: if num_tokens_from_string(sec) < chunk_token_num: @@ -657,6 +677,29 @@ def naive_merge_with_images(texts, images, chunk_token_num=128, delimiter="\n。 result_images[-1] = concat_img(result_images[-1], image) tk_nums[-1] += tnum + custom_delimiters = [m.group(1) for m in re.finditer(r"`([^`]+)`", delimiter)] + has_custom = bool(custom_delimiters) + if has_custom: + custom_pattern = "|".join(re.escape(t) for t in sorted(set(custom_delimiters), key=len, reverse=True)) + cks, result_images, tk_nums = [], [], [] + for text, image in zip(texts, images): + text_str = text[0] if isinstance(text, tuple) else text + text_pos = text[1] if isinstance(text, tuple) and len(text) > 1 else "" + split_sec = re.split(r"(%s)" % custom_pattern, text_str) + for sub_sec in split_sec: + if re.fullmatch(custom_pattern, sub_sec or ""): + continue + text_seg = "\n" + sub_sec + local_pos = text_pos + if num_tokens_from_string(text_seg) < 8: + local_pos = "" + if local_pos and text_seg.find(local_pos) < 0: + text_seg += local_pos + cks.append(text_seg) + result_images.append(image) + tk_nums.append(num_tokens_from_string(text_seg)) + return cks, result_images + dels = get_delimiters(delimiter) for text, image in zip(texts, images): # if text is tuple, unpack it @@ -748,6 +791,23 @@ def naive_merge_docx(sections, chunk_token_num=128, delimiter="\n。;!?"): images[-1] = concat_img(images[-1], image) tk_nums[-1] += tnum + custom_delimiters = [m.group(1) for m in re.finditer(r"`([^`]+)`", delimiter)] + has_custom = bool(custom_delimiters) + if has_custom: + custom_pattern = "|".join(re.escape(t) for t in sorted(set(custom_delimiters), key=len, reverse=True)) + cks, images, tk_nums = [], [], [] + pattern = r"(%s)" % custom_pattern + for sec, image in sections: + split_sec = re.split(pattern, sec) + for sub_sec in split_sec: + if not sub_sec or re.fullmatch(custom_pattern, sub_sec): + continue + text_seg = "\n" + sub_sec + cks.append(text_seg) + images.append(image) + tk_nums.append(num_tokens_from_string(text_seg)) + return cks, images + dels = get_delimiters(delimiter) pattern = r"(%s)" % dels @@ -789,7 +849,7 @@ class Node: self.level = level self.depth = depth self.texts = texts or [] - self.children = [] + self.children = [] def add_child(self, child_node): self.children.append(child_node) @@ -835,7 +895,7 @@ class Node: return self def get_tree(self): - tree_list = [] + tree_list = [] self._dfs(self, tree_list, []) return tree_list @@ -860,7 +920,7 @@ class Node: # A leaf title within depth emits its title path as a chunk (header-only section) elif not child and (1 <= level <= self.depth): tree_list.append("\n".join(path_titles)) - + # Recurse into children with the updated title path for c in child: - self._dfs(c, tree_list, path_titles) \ No newline at end of file + self._dfs(c, tree_list, path_titles) From 249296e417e75d50053f4a49402b7b6a6eab2502 Mon Sep 17 00:00:00 2001 From: Kevin Hu Date: Fri, 21 Nov 2025 14:51:58 +0800 Subject: [PATCH 10/12] Feat: API supports toc_enhance. (#11437) ### What problem does this PR solve? Close #11433 ### Type of change - [x] New Feature (non-breaking change which adds functionality) --- README.md | 2 +- README_id.md | 2 +- README_ja.md | 2 +- README_ko.md | 2 +- README_pt_br.md | 2 +- README_tzh.md | 2 +- README_zh.md | 2 +- agent/component/iteration.py | 2 +- api/apps/__init__.py | 6 +++++- api/apps/sdk/doc.py | 6 ++++++ docs/references/http_api_reference.md | 6 ++++++ 11 files changed, 25 insertions(+), 9 deletions(-) diff --git a/README.md b/README.md index d82721d98..ded81f099 100644 --- a/README.md +++ b/README.md @@ -86,7 +86,7 @@ Try our demo at [https://demo.ragflow.io](https://demo.ragflow.io). ## 🔥 Latest Updates - 2025-11-19 Supports Gemini 3 Pro. -- 2025-11-12 Supports data synchronization from Confluence, AWS S3, Discord, Google Drive. +- 2025-11-12 Supports data synchronization from Confluence, S3, Notion, Discord, Google Drive. - 2025-10-23 Supports MinerU & Docling as document parsing methods. - 2025-10-15 Supports orchestrable ingestion pipeline. - 2025-08-08 Supports OpenAI's latest GPT-5 series models. diff --git a/README_id.md b/README_id.md index 953fce4c5..11b09b4fb 100644 --- a/README_id.md +++ b/README_id.md @@ -86,7 +86,7 @@ Coba demo kami di [https://demo.ragflow.io](https://demo.ragflow.io). ## 🔥 Pembaruan Terbaru - 2025-11-19 Mendukung Gemini 3 Pro. -- 2025-11-12 Mendukung sinkronisasi data dari Confluence, AWS S3, Discord, Google Drive. +- 2025-11-12 Mendukung sinkronisasi data dari Confluence, S3, Notion, Discord, Google Drive. - 2025-10-23 Mendukung MinerU & Docling sebagai metode penguraian dokumen. - 2025-10-15 Dukungan untuk jalur data yang terorkestrasi. - 2025-08-08 Mendukung model seri GPT-5 terbaru dari OpenAI. diff --git a/README_ja.md b/README_ja.md index 7711d3ff0..5e471b5c2 100644 --- a/README_ja.md +++ b/README_ja.md @@ -67,7 +67,7 @@ ## 🔥 最新情報 - 2025-11-19 Gemini 3 Proをサポートしています -- 2025-11-12 Confluence、AWS S3、Discord、Google Drive からのデータ同期をサポートします。 +- 2025-11-12 Confluence、S3、Notion、Discord、Google Drive からのデータ同期をサポートします。 - 2025-10-23 ドキュメント解析方法として MinerU と Docling をサポートします。 - 2025-10-15 オーケストレーションされたデータパイプラインのサポート。 - 2025-08-08 OpenAI の最新 GPT-5 シリーズモデルをサポートします。 diff --git a/README_ko.md b/README_ko.md index 386fd2faa..f34f23279 100644 --- a/README_ko.md +++ b/README_ko.md @@ -68,7 +68,7 @@ ## 🔥 업데이트 - 2025-11-19 Gemini 3 Pro를 지원합니다. -- 2025-11-12 Confluence, AWS S3, Discord, Google Drive에서 데이터 동기화를 지원합니다. +- 2025-11-12 Confluence, S3, Notion, Discord, Google Drive에서 데이터 동기화를 지원합니다. - 2025-10-23 문서 파싱 방법으로 MinerU 및 Docling을 지원합니다. - 2025-10-15 조정된 데이터 파이프라인 지원. - 2025-08-08 OpenAI의 최신 GPT-5 시리즈 모델을 지원합니다. diff --git a/README_pt_br.md b/README_pt_br.md index 487ec5530..71690ebb9 100644 --- a/README_pt_br.md +++ b/README_pt_br.md @@ -87,7 +87,7 @@ Experimente nossa demo em [https://demo.ragflow.io](https://demo.ragflow.io). ## 🔥 Últimas Atualizações - 19-11-2025 Suporta Gemini 3 Pro. -- 12-11-2025 Suporta a sincronização de dados do Confluence, AWS S3, Discord e Google Drive. +- 12-11-2025 Suporta a sincronização de dados do Confluence, S3, Notion, Discord e Google Drive. - 23-10-2025 Suporta MinerU e Docling como métodos de análise de documentos. - 15-10-2025 Suporte para pipelines de dados orquestrados. - 08-08-2025 Suporta a mais recente série GPT-5 da OpenAI. diff --git a/README_tzh.md b/README_tzh.md index eab5938e4..7756aacc8 100644 --- a/README_tzh.md +++ b/README_tzh.md @@ -86,7 +86,7 @@ ## 🔥 近期更新 - 2025-11-19 支援 Gemini 3 Pro. -- 2025-11-12 支援從 Confluence、AWS S3、Discord、Google Drive 進行資料同步。 +- 2025-11-12 支援從 Confluence、S3、Notion、Discord、Google Drive 進行資料同步。 - 2025-10-23 支援 MinerU 和 Docling 作為文件解析方法。 - 2025-10-15 支援可編排的資料管道。 - 2025-08-08 支援 OpenAI 最新的 GPT-5 系列模型。 diff --git a/README_zh.md b/README_zh.md index 58394b5fd..799c3aaea 100644 --- a/README_zh.md +++ b/README_zh.md @@ -86,7 +86,7 @@ ## 🔥 近期更新 - 2025-11-19 支持 Gemini 3 Pro. -- 2025-11-12 支持从 Confluence、AWS S3、Discord、Google Drive 进行数据同步。 +- 2025-11-12 支持从 Confluence、S3、Notion、Discord、Google Drive 进行数据同步。 - 2025-10-23 支持 MinerU 和 Docling 作为文档解析方法。 - 2025-10-15 支持可编排的数据管道。 - 2025-08-08 支持 OpenAI 最新的 GPT-5 系列模型。 diff --git a/agent/component/iteration.py b/agent/component/iteration.py index cff09d622..ae5c0b677 100644 --- a/agent/component/iteration.py +++ b/agent/component/iteration.py @@ -32,7 +32,7 @@ class IterationParam(ComponentParamBase): def __init__(self): super().__init__() self.items_ref = "" - self.veriable={} + self.variable={} def get_input_form(self) -> dict[str, dict]: return { diff --git a/api/apps/__init__.py b/api/apps/__init__.py index a53f67c06..a6e33c13b 100644 --- a/api/apps/__init__.py +++ b/api/apps/__init__.py @@ -24,7 +24,7 @@ from flasgger import Swagger from itsdangerous.url_safe import URLSafeTimedSerializer as Serializer from quart_cors import cors from common.constants import StatusEnum -from api.db.db_models import close_connection +from api.db.db_models import close_connection, APIToken from api.db.services import UserService from api.utils.json_encode import CustomJSONEncoder from api.utils import commands @@ -124,6 +124,10 @@ def _load_user(): user = UserService.query( access_token=access_token, status=StatusEnum.VALID.value ) + if not user and len(authorization.split()) == 2: + objs = APIToken.query(token=authorization.split()[1]) + if objs: + user = UserService.query(id=objs[0].tenant_id, status=StatusEnum.VALID.value) if user: if not user[0].access_token or not user[0].access_token.strip(): logging.warning(f"User {user[0].email} has empty access_token in database") diff --git a/api/apps/sdk/doc.py b/api/apps/sdk/doc.py index 30fbd835e..52acebc43 100644 --- a/api/apps/sdk/doc.py +++ b/api/apps/sdk/doc.py @@ -1434,6 +1434,7 @@ async def retrieval_test(tenant_id): question = req["question"] doc_ids = req.get("document_ids", []) use_kg = req.get("use_kg", False) + toc_enhance = req.get("toc_enhance", False) langs = req.get("cross_languages", []) if not isinstance(doc_ids, list): return get_error_data_result("`documents` should be a list") @@ -1487,6 +1488,11 @@ async def retrieval_test(tenant_id): highlight=highlight, rank_feature=label_question(question, kbs), ) + if toc_enhance: + chat_mdl = LLMBundle(kb.tenant_id, LLMType.CHAT) + cks = settings.retriever.retrieval_by_toc(question, ranks["chunks"], tenant_ids, chat_mdl, size) + if cks: + ranks["chunks"] = cks if use_kg: ck = settings.kg_retriever.retrieval(question, [k.tenant_id for k in kbs], kb_ids, embd_mdl, LLMBundle(kb.tenant_id, LLMType.CHAT)) if ck["content_with_weight"]: diff --git a/docs/references/http_api_reference.md b/docs/references/http_api_reference.md index bc1b15670..253745432 100644 --- a/docs/references/http_api_reference.md +++ b/docs/references/http_api_reference.md @@ -2072,6 +2072,7 @@ Retrieves chunks from specified datasets. - `"cross_languages"`: `list[string]` - `"metadata_condition"`: `object` - `"use_kg"`: `boolean` + - `"toc_enhance"`: `boolean` ##### Request example ```bash @@ -2122,6 +2123,8 @@ curl --request POST \ The number of chunks engaged in vector cosine computation. Defaults to `1024`. - `"use_kg"`: (*Body parameter*), `boolean` The search includes text chunks related to the knowledge graph of the selected dataset to handle complex multi-hop queries. Defaults to `False`. +- `"toc_enhance"`: (*Body parameter*), `boolean` + The search includes table of content enhancement in order to boost rank of relevant chunks. Files parsed with `TOC Enhance` enabled is prerequisite. Defaults to `False`. - `"rerank_id"`: (*Body parameter*), `integer` The ID of the rerank model. - `"keyword"`: (*Body parameter*), `boolean` @@ -2136,6 +2139,9 @@ curl --request POST \ The languages that should be translated into, in order to achieve keywords retrievals in different languages. - `"metadata_condition"`: (*Body parameter*), `object` The metadata condition used for filtering chunks: + - `"logic"`: (*Body parameter*), `string` + - `"and"` Intersection of the result from each condition (default). + - `"or"` union of the result from each condition. - `"conditions"`: (*Body parameter*), `array` A list of metadata filter conditions. - `"name"`: `string` - The metadata field name to filter by, e.g., `"author"`, `"company"`, `"url"`. Ensure this parameter before use. See [Set metadata](../guides/dataset/set_metadata.md) for details. From 13299197b8e176a9f98e701062a2d6bc62def738 Mon Sep 17 00:00:00 2001 From: balibabu Date: Fri, 21 Nov 2025 16:21:27 +0800 Subject: [PATCH 11/12] Feat: Enable logical operators in metadata. #11387 #11376 (#11442) ### What problem does this PR solve? Feat: Enable logical operators in metadata. #11387 #11376 ### Type of change - [x] New Feature (non-breaking change which adds functionality) --- web/src/components/metadata-filter/index.tsx | 1 + .../metadata-filter-conditions.tsx | 162 ++++++++++-------- web/src/constants/agent.tsx | 5 + .../hooks/logic-hooks/use-build-options.ts | 12 ++ web/src/pages/agent/constant/index.tsx | 5 +- .../pages/agent/form/switch-form/index.tsx | 14 +- 6 files changed, 119 insertions(+), 80 deletions(-) create mode 100644 web/src/hooks/logic-hooks/use-build-options.ts diff --git a/web/src/components/metadata-filter/index.tsx b/web/src/components/metadata-filter/index.tsx index 8dbdce42f..48388e4c2 100644 --- a/web/src/components/metadata-filter/index.tsx +++ b/web/src/components/metadata-filter/index.tsx @@ -14,6 +14,7 @@ type MetadataFilterProps = { export const MetadataFilterSchema = { meta_data_filter: z .object({ + logic: z.string().optional(), method: z.string().optional(), manual: z .array( diff --git a/web/src/components/metadata-filter/metadata-filter-conditions.tsx b/web/src/components/metadata-filter/metadata-filter-conditions.tsx index 80cb6409b..aee103a1f 100644 --- a/web/src/components/metadata-filter/metadata-filter-conditions.tsx +++ b/web/src/components/metadata-filter/metadata-filter-conditions.tsx @@ -15,14 +15,17 @@ import { } from '@/components/ui/form'; import { Input } from '@/components/ui/input'; import { Separator } from '@/components/ui/separator'; -import { SwitchOperatorOptions } from '@/constants/agent'; +import { SwitchLogicOperator, SwitchOperatorOptions } from '@/constants/agent'; import { useBuildSwitchOperatorOptions } from '@/hooks/logic-hooks/use-build-operator-options'; +import { useBuildSwitchLogicOperatorOptions } from '@/hooks/logic-hooks/use-build-options'; import { useFetchKnowledgeMetadata } from '@/hooks/use-knowledge-request'; import { PromptEditor } from '@/pages/agent/form/components/prompt-editor'; import { Plus, X } from 'lucide-react'; import { useCallback } from 'react'; import { useFieldArray, useFormContext } from 'react-hook-form'; import { useTranslation } from 'react-i18next'; +import { RAGFlowFormItem } from '../ragflow-form'; +import { RAGFlowSelect } from '../ui/select'; export function MetadataFilterConditions({ kbIds, @@ -36,10 +39,13 @@ export function MetadataFilterConditions({ const { t } = useTranslation(); const form = useFormContext(); const name = prefix + 'meta_data_filter.manual'; + const logic = prefix + 'meta_data_filter.logic'; const metadata = useFetchKnowledgeMetadata(kbIds); const switchOperatorOptions = useBuildSwitchOperatorOptions(); + const switchLogicOperatorOptions = useBuildSwitchLogicOperatorOptions(); + const { fields, remove, append } = useFieldArray({ name, control: form.control, @@ -47,13 +53,14 @@ export function MetadataFilterConditions({ const add = useCallback( (key: string) => () => { + form.setValue(logic, SwitchLogicOperator.And); append({ key, value: '', op: SwitchOperatorOptions[0].value, }); }, - [append], + [append, form, logic], ); return ( @@ -77,73 +84,92 @@ export function MetadataFilterConditions({
-
- {fields.map((field, index) => { - const typeField = `${name}.${index}.key`; - return ( -
- ( - - - - - - - )} - /> - - ( - - - - - - - )} - /> - - ( - - - {canReference ? ( - - ) : ( - +
+ {fields.length > 1 && ( +
+ + + +
+
+ )} +
+ {fields.map((field, index) => { + const typeField = `${name}.${index}.key`; + return ( +
+
+
+ ( + + + + + + )} - - - - )} - /> - -
- ); - })} -
+ /> + + ( + + + + + + + )} + /> +
+ ( + + + {canReference ? ( + + ) : ( + + )} + + + + )} + /> +
+ + + ); + })} +
+ ); } diff --git a/web/src/constants/agent.tsx b/web/src/constants/agent.tsx index 0ba1d927c..5877b91b1 100644 --- a/web/src/constants/agent.tsx +++ b/web/src/constants/agent.tsx @@ -179,3 +179,8 @@ export enum JsonSchemaDataType { Array = 'array', Object = 'object', } + +export enum SwitchLogicOperator { + And = 'and', + Or = 'or', +} diff --git a/web/src/hooks/logic-hooks/use-build-options.ts b/web/src/hooks/logic-hooks/use-build-options.ts new file mode 100644 index 000000000..62370e9bd --- /dev/null +++ b/web/src/hooks/logic-hooks/use-build-options.ts @@ -0,0 +1,12 @@ +import { SwitchLogicOperator } from '@/constants/agent'; +import { buildOptions } from '@/utils/form'; +import { useTranslation } from 'react-i18next'; + +export function useBuildSwitchLogicOperatorOptions() { + const { t } = useTranslation(); + return buildOptions( + SwitchLogicOperator, + t, + 'flow.switchLogicOperatorOptions', + ); +} diff --git a/web/src/pages/agent/constant/index.tsx b/web/src/pages/agent/constant/index.tsx index c357e8cb7..4a442271b 100644 --- a/web/src/pages/agent/constant/index.tsx +++ b/web/src/pages/agent/constant/index.tsx @@ -10,6 +10,7 @@ import { JsonSchemaDataType, Operator, ProgrammingLanguage, + SwitchLogicOperator, SwitchOperatorOptions, initialLlmBaseValues, } from '@/constants/agent'; @@ -51,8 +52,6 @@ import { export const BeginId = 'begin'; -export const SwitchLogicOperatorOptions = ['and', 'or']; - export const CommonOperatorList = Object.values(Operator).filter( (x) => x !== Operator.Note, ); @@ -308,7 +307,7 @@ export const initialExeSqlValues = { export const initialSwitchValues = { conditions: [ { - logical_operator: SwitchLogicOperatorOptions[0], + logical_operator: SwitchLogicOperator.And, items: [ { operator: SwitchOperatorOptions[0].value, diff --git a/web/src/pages/agent/form/switch-form/index.tsx b/web/src/pages/agent/form/switch-form/index.tsx index f9ccee919..53f4995af 100644 --- a/web/src/pages/agent/form/switch-form/index.tsx +++ b/web/src/pages/agent/form/switch-form/index.tsx @@ -11,16 +11,17 @@ import { import { RAGFlowSelect } from '@/components/ui/select'; import { Separator } from '@/components/ui/separator'; import { Textarea } from '@/components/ui/textarea'; +import { SwitchLogicOperator } from '@/constants/agent'; import { useBuildSwitchOperatorOptions } from '@/hooks/logic-hooks/use-build-operator-options'; +import { useBuildSwitchLogicOperatorOptions } from '@/hooks/logic-hooks/use-build-options'; import { cn } from '@/lib/utils'; import { zodResolver } from '@hookform/resolvers/zod'; import { t } from 'i18next'; import { X } from 'lucide-react'; -import { memo, useCallback, useMemo } from 'react'; +import { memo, useCallback } from 'react'; import { useFieldArray, useForm, useFormContext } from 'react-hook-form'; import { useTranslation } from 'react-i18next'; import { z } from 'zod'; -import { SwitchLogicOperatorOptions } from '../../constant'; import { IOperatorForm } from '../../interface'; import { FormWrapper } from '../components/form-wrapper'; import { QueryVariable } from '../components/query-variable'; @@ -185,12 +186,7 @@ function SwitchForm({ node }: IOperatorForm) { control: form.control, }); - const switchLogicOperatorOptions = useMemo(() => { - return SwitchLogicOperatorOptions.map((x) => ({ - value: x, - label: t(`flow.switchLogicOperatorOptions.${x}`), - })); - }, [t]); + const switchLogicOperatorOptions = useBuildSwitchLogicOperatorOptions(); useWatchFormChange(node?.id, form); @@ -253,7 +249,7 @@ function SwitchForm({ node }: IOperatorForm) { append({ - logical_operator: SwitchLogicOperatorOptions[0], + logical_operator: SwitchLogicOperator.And, [ItemKey]: [ { operator: switchOperatorOptions[0].value, From a0959b9d38328e104ffeb7fe54945339095d55c1 Mon Sep 17 00:00:00 2001 From: chanx <1243304602@qq.com> Date: Fri, 21 Nov 2025 17:20:26 +0800 Subject: [PATCH 12/12] Fix:Resolves the issue of sessions not being saved when the variable is array. (#11446) ### What problem does this PR solve? Fix:Resolves the issue of sessions not being saved when the variable is array. ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue) --- web/src/components/dynamic-form.tsx | 9 +++-- web/src/locales/en.ts | 2 +- web/src/locales/zh.ts | 2 +- .../agent/gobal-variable-sheet/constant.ts | 2 +- .../hooks/use-object-fields.tsx | 33 ++++++++++++++++--- 5 files changed, 38 insertions(+), 10 deletions(-) diff --git a/web/src/components/dynamic-form.tsx b/web/src/components/dynamic-form.tsx index c74e4c381..ca0b08763 100644 --- a/web/src/components/dynamic-form.tsx +++ b/web/src/components/dynamic-form.tsx @@ -356,6 +356,13 @@ const DynamicForm = { ...combinedErrors, ...fieldErrors, } as any; + + console.log('combinedErrors', combinedErrors); + for (const key in combinedErrors) { + if (Array.isArray(combinedErrors[key])) { + combinedErrors[key] = combinedErrors[key][0]; + } + } console.log('combinedErrors', combinedErrors); return { values: Object.keys(combinedErrors).length ? {} : data, @@ -720,9 +727,7 @@ const DynamicForm = { type="button" disabled={submitLoading} onClick={() => { - console.log('form submit'); (async () => { - console.log('form submit2'); try { let beValid = await form.formControl.trigger(); console.log('form valid', beValid, form, form.formControl); diff --git a/web/src/locales/en.ts b/web/src/locales/en.ts index 8a3488cee..8ab8e17b3 100644 --- a/web/src/locales/en.ts +++ b/web/src/locales/en.ts @@ -1046,7 +1046,7 @@ Example: https://fsn1.your-objectstorage.com`, downloadFileType: 'Download file type', formatTypeError: 'Format or type error', variableNameMessage: - 'Variable name can only contain letters and underscores', + 'Variable name can only contain letters and underscores and numbers', variableDescription: 'Variable Description', defaultValue: 'Default Value', conversationVariable: 'Conversation variable', diff --git a/web/src/locales/zh.ts b/web/src/locales/zh.ts index 375b2fadc..70a78c825 100644 --- a/web/src/locales/zh.ts +++ b/web/src/locales/zh.ts @@ -980,7 +980,7 @@ General:实体和关系提取提示来自 GitHub - microsoft/graphrag:基于 downloadFileTypeTip: '文件下载的类型', downloadFileType: '文件类型', formatTypeError: '格式或类型错误', - variableNameMessage: '名称只能包含字母和下划线', + variableNameMessage: '名称只能包含字母,数字和下划线', variableDescription: '变量的描述', defaultValue: '默认值', conversationVariable: '会话变量', diff --git a/web/src/pages/agent/gobal-variable-sheet/constant.ts b/web/src/pages/agent/gobal-variable-sheet/constant.ts index 935540c15..8470ffa86 100644 --- a/web/src/pages/agent/gobal-variable-sheet/constant.ts +++ b/web/src/pages/agent/gobal-variable-sheet/constant.ts @@ -18,7 +18,7 @@ export const GlobalFormFields = [ placeholder: t('common.namePlaceholder'), required: true, validation: { - pattern: /^[a-zA-Z_]+$/, + pattern: /^[a-zA-Z_0-9]+$/, message: t('flow.variableNameMessage'), }, type: FormFieldType.Text, diff --git a/web/src/pages/agent/gobal-variable-sheet/hooks/use-object-fields.tsx b/web/src/pages/agent/gobal-variable-sheet/hooks/use-object-fields.tsx index 7e60a7aec..c41e766f2 100644 --- a/web/src/pages/agent/gobal-variable-sheet/hooks/use-object-fields.tsx +++ b/web/src/pages/agent/gobal-variable-sheet/hooks/use-object-fields.tsx @@ -3,6 +3,7 @@ import { BlockButton, Button } from '@/components/ui/button'; import { Input } from '@/components/ui/input'; import { Segmented } from '@/components/ui/segmented'; import { t } from 'i18next'; +import { isEmpty } from 'lodash'; import { Trash2, X } from 'lucide-react'; import { useCallback } from 'react'; import { FieldValues } from 'react-hook-form'; @@ -36,14 +37,19 @@ export const useObjectFields = () => { path: (string | number)[] = [], ): Array<{ path: (string | number)[]; message: string }> => { const errors: Array<{ path: (string | number)[]; message: string }> = []; - - if (obj !== null && typeof obj === 'object' && !Array.isArray(obj)) { + if (typeof obj === 'object' && !Array.isArray(obj)) { + if (isEmpty(obj)) { + errors.push({ + path: [...path], + message: 'No empty parameters are allowed.', + }); + } for (const key in obj) { if (obj.hasOwnProperty(key)) { - if (!/^[a-zA-Z_]+$/.test(key)) { + if (!/^[a-zA-Z_0-9]+$/.test(key)) { errors.push({ path: [...path, key], - message: `Key "${key}" is invalid. Keys can only contain letters and underscores.`, + message: `Key "${key}" is invalid. Keys can only contain letters and underscores and numbers.`, }); } const nestedErrors = validateKeys(obj[key], [...path, key]); @@ -108,6 +114,21 @@ export const useObjectFields = () => { } }, []); + const arrayObjectValidate = useCallback((value: any) => { + try { + if (validateKeys(value, [])?.length > 0) { + throw new Error(t('flow.formatTypeError')); + } + if (value && typeof value === 'string' && !JSON.parse(value)) { + throw new Error(t('flow.formatTypeError')); + } + return true; + } catch (e) { + console.log('object-render-error', e, value); + throw new Error(t('flow.formatTypeError')); + } + }, []); + const arrayStringRender = useCallback((field: FieldValues, type = 'text') => { const values = Array.isArray(field.value) ? field.value @@ -253,8 +274,9 @@ export const useObjectFields = () => { const handleCustomValidate = (value: TypesWithArray) => { switch (value) { case TypesWithArray.Object: - case TypesWithArray.ArrayObject: return objectValidate; + case TypesWithArray.ArrayObject: + return arrayObjectValidate; case TypesWithArray.ArrayString: return arrayStringValidate; case TypesWithArray.ArrayNumber: @@ -284,6 +306,7 @@ export const useObjectFields = () => { return { objectRender, objectValidate, + arrayObjectValidate, arrayStringRender, arrayStringValidate, arrayNumberRender,