diff --git a/README.md b/README.md index d82721d98..ded81f099 100644 --- a/README.md +++ b/README.md @@ -86,7 +86,7 @@ Try our demo at [https://demo.ragflow.io](https://demo.ragflow.io). ## 🔥 Latest Updates - 2025-11-19 Supports Gemini 3 Pro. -- 2025-11-12 Supports data synchronization from Confluence, AWS S3, Discord, Google Drive. +- 2025-11-12 Supports data synchronization from Confluence, S3, Notion, Discord, Google Drive. - 2025-10-23 Supports MinerU & Docling as document parsing methods. - 2025-10-15 Supports orchestrable ingestion pipeline. - 2025-08-08 Supports OpenAI's latest GPT-5 series models. diff --git a/README_id.md b/README_id.md index 953fce4c5..11b09b4fb 100644 --- a/README_id.md +++ b/README_id.md @@ -86,7 +86,7 @@ Coba demo kami di [https://demo.ragflow.io](https://demo.ragflow.io). ## 🔥 Pembaruan Terbaru - 2025-11-19 Mendukung Gemini 3 Pro. -- 2025-11-12 Mendukung sinkronisasi data dari Confluence, AWS S3, Discord, Google Drive. +- 2025-11-12 Mendukung sinkronisasi data dari Confluence, S3, Notion, Discord, Google Drive. - 2025-10-23 Mendukung MinerU & Docling sebagai metode penguraian dokumen. - 2025-10-15 Dukungan untuk jalur data yang terorkestrasi. - 2025-08-08 Mendukung model seri GPT-5 terbaru dari OpenAI. diff --git a/README_ja.md b/README_ja.md index 7711d3ff0..5e471b5c2 100644 --- a/README_ja.md +++ b/README_ja.md @@ -67,7 +67,7 @@ ## 🔥 最新情報 - 2025-11-19 Gemini 3 Proをサポートしています -- 2025-11-12 Confluence、AWS S3、Discord、Google Drive からのデータ同期をサポートします。 +- 2025-11-12 Confluence、S3、Notion、Discord、Google Drive からのデータ同期をサポートします。 - 2025-10-23 ドキュメント解析方法として MinerU と Docling をサポートします。 - 2025-10-15 オーケストレーションされたデータパイプラインのサポート。 - 2025-08-08 OpenAI の最新 GPT-5 シリーズモデルをサポートします。 diff --git a/README_ko.md b/README_ko.md index 386fd2faa..f34f23279 100644 --- a/README_ko.md +++ b/README_ko.md @@ -68,7 +68,7 @@ ## 🔥 업데이트 - 2025-11-19 Gemini 3 Pro를 지원합니다. -- 2025-11-12 Confluence, AWS S3, Discord, Google Drive에서 데이터 동기화를 지원합니다. +- 2025-11-12 Confluence, S3, Notion, Discord, Google Drive에서 데이터 동기화를 지원합니다. - 2025-10-23 문서 파싱 방법으로 MinerU 및 Docling을 지원합니다. - 2025-10-15 조정된 데이터 파이프라인 지원. - 2025-08-08 OpenAI의 최신 GPT-5 시리즈 모델을 지원합니다. diff --git a/README_pt_br.md b/README_pt_br.md index 487ec5530..71690ebb9 100644 --- a/README_pt_br.md +++ b/README_pt_br.md @@ -87,7 +87,7 @@ Experimente nossa demo em [https://demo.ragflow.io](https://demo.ragflow.io). ## 🔥 Últimas Atualizações - 19-11-2025 Suporta Gemini 3 Pro. -- 12-11-2025 Suporta a sincronização de dados do Confluence, AWS S3, Discord e Google Drive. +- 12-11-2025 Suporta a sincronização de dados do Confluence, S3, Notion, Discord e Google Drive. - 23-10-2025 Suporta MinerU e Docling como métodos de análise de documentos. - 15-10-2025 Suporte para pipelines de dados orquestrados. - 08-08-2025 Suporta a mais recente série GPT-5 da OpenAI. diff --git a/README_tzh.md b/README_tzh.md index eab5938e4..7756aacc8 100644 --- a/README_tzh.md +++ b/README_tzh.md @@ -86,7 +86,7 @@ ## 🔥 近期更新 - 2025-11-19 支援 Gemini 3 Pro. -- 2025-11-12 支援從 Confluence、AWS S3、Discord、Google Drive 進行資料同步。 +- 2025-11-12 支援從 Confluence、S3、Notion、Discord、Google Drive 進行資料同步。 - 2025-10-23 支援 MinerU 和 Docling 作為文件解析方法。 - 2025-10-15 支援可編排的資料管道。 - 2025-08-08 支援 OpenAI 最新的 GPT-5 系列模型。 diff --git a/README_zh.md b/README_zh.md index 58394b5fd..799c3aaea 100644 --- a/README_zh.md +++ b/README_zh.md @@ -86,7 +86,7 @@ ## 🔥 近期更新 - 2025-11-19 支持 Gemini 3 Pro. -- 2025-11-12 支持从 Confluence、AWS S3、Discord、Google Drive 进行数据同步。 +- 2025-11-12 支持从 Confluence、S3、Notion、Discord、Google Drive 进行数据同步。 - 2025-10-23 支持 MinerU 和 Docling 作为文档解析方法。 - 2025-10-15 支持可编排的数据管道。 - 2025-08-08 支持 OpenAI 最新的 GPT-5 系列模型。 diff --git a/agent/component/iteration.py b/agent/component/iteration.py index cff09d622..ae5c0b677 100644 --- a/agent/component/iteration.py +++ b/agent/component/iteration.py @@ -32,7 +32,7 @@ class IterationParam(ComponentParamBase): def __init__(self): super().__init__() self.items_ref = "" - self.veriable={} + self.variable={} def get_input_form(self) -> dict[str, dict]: return { diff --git a/agent/tools/retrieval.py b/agent/tools/retrieval.py index c3a01e517..fd9096cb6 100644 --- a/agent/tools/retrieval.py +++ b/agent/tools/retrieval.py @@ -137,7 +137,7 @@ class Retrieval(ToolBase, ABC): if not doc_ids: doc_ids = None elif self._param.meta_data_filter.get("method") == "manual": - filters=self._param.meta_data_filter["manual"] + filters = self._param.meta_data_filter["manual"] for flt in filters: pat = re.compile(self.variable_ref_patt) s = flt["value"] @@ -166,8 +166,8 @@ class Retrieval(ToolBase, ABC): out_parts.append(s[last:]) flt["value"] = "".join(out_parts) doc_ids.extend(meta_filter(metas, filters, self._param.meta_data_filter.get("logic", "and"))) - if not doc_ids: - doc_ids = None + if filters and not doc_ids: + doc_ids = ["-999"] if self._param.cross_languages: query = cross_languages(kbs[0].tenant_id, None, query, self._param.cross_languages) diff --git a/api/apps/__init__.py b/api/apps/__init__.py index a53f67c06..a6e33c13b 100644 --- a/api/apps/__init__.py +++ b/api/apps/__init__.py @@ -24,7 +24,7 @@ from flasgger import Swagger from itsdangerous.url_safe import URLSafeTimedSerializer as Serializer from quart_cors import cors from common.constants import StatusEnum -from api.db.db_models import close_connection +from api.db.db_models import close_connection, APIToken from api.db.services import UserService from api.utils.json_encode import CustomJSONEncoder from api.utils import commands @@ -124,6 +124,10 @@ def _load_user(): user = UserService.query( access_token=access_token, status=StatusEnum.VALID.value ) + if not user and len(authorization.split()) == 2: + objs = APIToken.query(token=authorization.split()[1]) + if objs: + user = UserService.query(id=objs[0].tenant_id, status=StatusEnum.VALID.value) if user: if not user[0].access_token or not user[0].access_token.strip(): logging.warning(f"User {user[0].email} has empty access_token in database") diff --git a/api/apps/chunk_app.py b/api/apps/chunk_app.py index e121bcba7..b43fb9af1 100644 --- a/api/apps/chunk_app.py +++ b/api/apps/chunk_app.py @@ -311,8 +311,8 @@ async def retrieval_test(): doc_ids = None elif meta_data_filter.get("method") == "manual": doc_ids.extend(meta_filter(metas, meta_data_filter["manual"], meta_data_filter.get("logic", "and"))) - if not doc_ids: - doc_ids = None + if meta_data_filter["manual"] and not doc_ids: + doc_ids = ["-999"] try: tenants = UserTenantService.query(user_id=current_user.id) diff --git a/api/apps/sdk/doc.py b/api/apps/sdk/doc.py index 84300ac3c..52acebc43 100644 --- a/api/apps/sdk/doc.py +++ b/api/apps/sdk/doc.py @@ -1434,6 +1434,7 @@ async def retrieval_test(tenant_id): question = req["question"] doc_ids = req.get("document_ids", []) use_kg = req.get("use_kg", False) + toc_enhance = req.get("toc_enhance", False) langs = req.get("cross_languages", []) if not isinstance(doc_ids, list): return get_error_data_result("`documents` should be a list") @@ -1445,6 +1446,8 @@ async def retrieval_test(tenant_id): metadata_condition = req.get("metadata_condition", {}) or {} metas = DocumentService.get_meta_by_kbs(kb_ids) doc_ids = meta_filter(metas, convert_conditions(metadata_condition), metadata_condition.get("logic", "and")) + if metadata_condition and not doc_ids: + doc_ids = ["-999"] similarity_threshold = float(req.get("similarity_threshold", 0.2)) vector_similarity_weight = float(req.get("vector_similarity_weight", 0.3)) top = int(req.get("top_k", 1024)) @@ -1485,6 +1488,11 @@ async def retrieval_test(tenant_id): highlight=highlight, rank_feature=label_question(question, kbs), ) + if toc_enhance: + chat_mdl = LLMBundle(kb.tenant_id, LLMType.CHAT) + cks = settings.retriever.retrieval_by_toc(question, ranks["chunks"], tenant_ids, chat_mdl, size) + if cks: + ranks["chunks"] = cks if use_kg: ck = settings.kg_retriever.retrieval(question, [k.tenant_id for k in kbs], kb_ids, embd_mdl, LLMBundle(kb.tenant_id, LLMType.CHAT)) if ck["content_with_weight"]: diff --git a/api/apps/sdk/session.py b/api/apps/sdk/session.py index 533375622..074401ede 100644 --- a/api/apps/sdk/session.py +++ b/api/apps/sdk/session.py @@ -446,8 +446,8 @@ async def agent_completions(tenant_id, agent_id): if req.get("stream", True): - def generate(): - for answer in agent_completion(tenant_id=tenant_id, agent_id=agent_id, **req): + async def generate(): + async for answer in agent_completion(tenant_id=tenant_id, agent_id=agent_id, **req): if isinstance(answer, str): try: ans = json.loads(answer[5:]) # remove "data:" @@ -471,7 +471,7 @@ async def agent_completions(tenant_id, agent_id): full_content = "" reference = {} final_ans = "" - for answer in agent_completion(tenant_id=tenant_id, agent_id=agent_id, **req): + async for answer in agent_completion(tenant_id=tenant_id, agent_id=agent_id, **req): try: ans = json.loads(answer[5:]) @@ -873,7 +873,7 @@ async def agent_bot_completions(agent_id): resp.headers.add_header("Content-Type", "text/event-stream; charset=utf-8") return resp - for answer in agent_completion(objs[0].tenant_id, agent_id, **req): + async for answer in agent_completion(objs[0].tenant_id, agent_id, **req): return get_result(data=answer) @@ -981,8 +981,8 @@ async def retrieval_test_embedded(): doc_ids = None elif meta_data_filter.get("method") == "manual": doc_ids.extend(meta_filter(metas, meta_data_filter["manual"], meta_data_filter.get("logic", "and"))) - if not doc_ids: - doc_ids = None + if meta_data_filter["manual"] and not doc_ids: + doc_ids = ["-999"] try: tenants = UserTenantService.query(user_id=tenant_id) diff --git a/api/db/services/dialog_service.py b/api/db/services/dialog_service.py index db878574d..0a09ea532 100644 --- a/api/db/services/dialog_service.py +++ b/api/db/services/dialog_service.py @@ -415,9 +415,10 @@ def chat(dialog, messages, stream=True, **kwargs): if not attachments: attachments = None elif dialog.meta_data_filter.get("method") == "manual": - attachments.extend(meta_filter(metas, dialog.meta_data_filter["manual"], dialog.meta_data_filter.get("logic", "and"))) - if not attachments: - attachments = None + conds = dialog.meta_data_filter["manual"] + attachments.extend(meta_filter(metas, conds, dialog.meta_data_filter.get("logic", "and"))) + if conds and not attachments: + attachments = ["-999"] if prompt_config.get("keyword", False): questions[-1] += keyword_extraction(chat_mdl, questions[-1]) @@ -787,8 +788,8 @@ def ask(question, kb_ids, tenant_id, chat_llm_name=None, search_config={}): doc_ids = None elif meta_data_filter.get("method") == "manual": doc_ids.extend(meta_filter(metas, meta_data_filter["manual"], meta_data_filter.get("logic", "and"))) - if not doc_ids: - doc_ids = None + if meta_data_filter["manual"] and not doc_ids: + doc_ids = ["-999"] kbinfos = retriever.retrieval( question=question, @@ -862,8 +863,8 @@ def gen_mindmap(question, kb_ids, tenant_id, search_config={}): doc_ids = None elif meta_data_filter.get("method") == "manual": doc_ids.extend(meta_filter(metas, meta_data_filter["manual"], meta_data_filter.get("logic", "and"))) - if not doc_ids: - doc_ids = None + if meta_data_filter["manual"] and not doc_ids: + doc_ids = ["-999"] ranks = settings.retriever.retrieval( question=question, diff --git a/common/data_source/notion_connector.py b/common/data_source/notion_connector.py index 8c6a522ad..e29bbbe76 100644 --- a/common/data_source/notion_connector.py +++ b/common/data_source/notion_connector.py @@ -1,38 +1,45 @@ +import html import logging from collections.abc import Generator +from datetime import datetime, timezone +from pathlib import Path from typing import Any, Optional +from urllib.parse import urlparse + from retry import retry from common.data_source.config import ( INDEX_BATCH_SIZE, - DocumentSource, NOTION_CONNECTOR_DISABLE_RECURSIVE_PAGE_LOOKUP + NOTION_CONNECTOR_DISABLE_RECURSIVE_PAGE_LOOKUP, + DocumentSource, +) +from common.data_source.exceptions import ( + ConnectorMissingCredentialError, + ConnectorValidationError, + CredentialExpiredError, + InsufficientPermissionsError, + UnexpectedValidationError, ) from common.data_source.interfaces import ( LoadConnector, PollConnector, - SecondsSinceUnixEpoch + SecondsSinceUnixEpoch, ) from common.data_source.models import ( Document, - TextSection, GenerateDocumentsOutput -) -from common.data_source.exceptions import ( - ConnectorValidationError, - CredentialExpiredError, - InsufficientPermissionsError, - UnexpectedValidationError, ConnectorMissingCredentialError -) -from common.data_source.models import ( - NotionPage, + GenerateDocumentsOutput, NotionBlock, - NotionSearchResponse + NotionPage, + NotionSearchResponse, + TextSection, ) from common.data_source.utils import ( - rl_requests, batch_generator, + datetime_from_string, fetch_notion_data, + filter_pages_by_time, properties_to_str, - filter_pages_by_time, datetime_from_string + rl_requests, ) @@ -61,11 +68,9 @@ class NotionConnector(LoadConnector, PollConnector): self.recursive_index_enabled = recursive_index_enabled or bool(root_page_id) @retry(tries=3, delay=1, backoff=2) - def _fetch_child_blocks( - self, block_id: str, cursor: Optional[str] = None - ) -> dict[str, Any] | None: + def _fetch_child_blocks(self, block_id: str, cursor: Optional[str] = None) -> dict[str, Any] | None: """Fetch all child blocks via the Notion API.""" - logging.debug(f"Fetching children of block with ID '{block_id}'") + logging.debug(f"[Notion]: Fetching children of block with ID {block_id}") block_url = f"https://api.notion.com/v1/blocks/{block_id}/children" query_params = {"start_cursor": cursor} if cursor else None @@ -79,49 +84,42 @@ class NotionConnector(LoadConnector, PollConnector): response.raise_for_status() return response.json() except Exception as e: - if hasattr(e, 'response') and e.response.status_code == 404: - logging.error( - f"Unable to access block with ID '{block_id}'. " - f"This is likely due to the block not being shared with the integration." - ) + if hasattr(e, "response") and e.response.status_code == 404: + logging.error(f"[Notion]: Unable to access block with ID {block_id}. This is likely due to the block not being shared with the integration.") return None else: - logging.exception(f"Error fetching blocks: {e}") + logging.exception(f"[Notion]: Error fetching blocks: {e}") raise @retry(tries=3, delay=1, backoff=2) def _fetch_page(self, page_id: str) -> NotionPage: """Fetch a page from its ID via the Notion API.""" - logging.debug(f"Fetching page for ID '{page_id}'") + logging.debug(f"[Notion]: Fetching page for ID {page_id}") page_url = f"https://api.notion.com/v1/pages/{page_id}" try: data = fetch_notion_data(page_url, self.headers, "GET") return NotionPage(**data) except Exception as e: - logging.warning(f"Failed to fetch page, trying database for ID '{page_id}': {e}") + logging.warning(f"[Notion]: Failed to fetch page, trying database for ID {page_id}: {e}") return self._fetch_database_as_page(page_id) @retry(tries=3, delay=1, backoff=2) def _fetch_database_as_page(self, database_id: str) -> NotionPage: """Attempt to fetch a database as a page.""" - logging.debug(f"Fetching database for ID '{database_id}' as a page") + logging.debug(f"[Notion]: Fetching database for ID {database_id} as a page") database_url = f"https://api.notion.com/v1/databases/{database_id}" data = fetch_notion_data(database_url, self.headers, "GET") database_name = data.get("title") - database_name = ( - database_name[0].get("text", {}).get("content") if database_name else None - ) + database_name = database_name[0].get("text", {}).get("content") if database_name else None return NotionPage(**data, database_name=database_name) @retry(tries=3, delay=1, backoff=2) - def _fetch_database( - self, database_id: str, cursor: Optional[str] = None - ) -> dict[str, Any]: + def _fetch_database(self, database_id: str, cursor: Optional[str] = None) -> dict[str, Any]: """Fetch a database from its ID via the Notion API.""" - logging.debug(f"Fetching database for ID '{database_id}'") + logging.debug(f"[Notion]: Fetching database for ID {database_id}") block_url = f"https://api.notion.com/v1/databases/{database_id}/query" body = {"start_cursor": cursor} if cursor else None @@ -129,17 +127,12 @@ class NotionConnector(LoadConnector, PollConnector): data = fetch_notion_data(block_url, self.headers, "POST", body) return data except Exception as e: - if hasattr(e, 'response') and e.response.status_code in [404, 400]: - logging.error( - f"Unable to access database with ID '{database_id}'. " - f"This is likely due to the database not being shared with the integration." - ) + if hasattr(e, "response") and e.response.status_code in [404, 400]: + logging.error(f"[Notion]: Unable to access database with ID {database_id}. This is likely due to the database not being shared with the integration.") return {"results": [], "next_cursor": None} raise - def _read_pages_from_database( - self, database_id: str - ) -> tuple[list[NotionBlock], list[str]]: + def _read_pages_from_database(self, database_id: str) -> tuple[list[NotionBlock], list[str]]: """Returns a list of top level blocks and all page IDs in the database.""" result_blocks: list[NotionBlock] = [] result_pages: list[str] = [] @@ -158,10 +151,10 @@ class NotionConnector(LoadConnector, PollConnector): if self.recursive_index_enabled: if obj_type == "page": - logging.debug(f"Found page with ID '{obj_id}' in database '{database_id}'") + logging.debug(f"[Notion]: Found page with ID {obj_id} in database {database_id}") result_pages.append(result["id"]) elif obj_type == "database": - logging.debug(f"Found database with ID '{obj_id}' in database '{database_id}'") + logging.debug(f"[Notion]: Found database with ID {obj_id} in database {database_id}") _, child_pages = self._read_pages_from_database(obj_id) result_pages.extend(child_pages) @@ -172,44 +165,229 @@ class NotionConnector(LoadConnector, PollConnector): return result_blocks, result_pages - def _read_blocks(self, base_block_id: str) -> tuple[list[NotionBlock], list[str]]: - """Reads all child blocks for the specified block, returns blocks and child page ids.""" + def _extract_rich_text(self, rich_text_array: list[dict[str, Any]]) -> str: + collected_text: list[str] = [] + for rich_text in rich_text_array: + content = "" + r_type = rich_text.get("type") + + if r_type == "equation": + expr = rich_text.get("equation", {}).get("expression") + if expr: + content = expr + elif r_type == "mention": + mention = rich_text.get("mention", {}) or {} + mention_type = mention.get("type") + mention_value = mention.get(mention_type, {}) if mention_type else {} + if mention_type == "date": + start = mention_value.get("start") + end = mention_value.get("end") + if start and end: + content = f"{start} - {end}" + elif start: + content = start + elif mention_type in {"page", "database"}: + content = mention_value.get("id", rich_text.get("plain_text", "")) + elif mention_type == "link_preview": + content = mention_value.get("url", rich_text.get("plain_text", "")) + else: + content = rich_text.get("plain_text", "") or str(mention_value) + else: + if rich_text.get("plain_text"): + content = rich_text["plain_text"] + elif "text" in rich_text and rich_text["text"].get("content"): + content = rich_text["text"]["content"] + + href = rich_text.get("href") + if content and href: + content = f"{content} ({href})" + + if content: + collected_text.append(content) + + return "".join(collected_text).strip() + + def _build_table_html(self, table_block_id: str) -> str | None: + rows: list[str] = [] + cursor = None + while True: + data = self._fetch_child_blocks(table_block_id, cursor) + if data is None: + break + + for result in data["results"]: + if result.get("type") != "table_row": + continue + cells_html: list[str] = [] + for cell in result["table_row"].get("cells", []): + cell_text = self._extract_rich_text(cell) + cell_html = html.escape(cell_text) if cell_text else "" + cells_html.append(f"{cell_html}") + rows.append(f"{''.join(cells_html)}") + + if data.get("next_cursor") is None: + break + cursor = data["next_cursor"] + + if not rows: + return None + return "\n" + "\n".join(rows) + "\n
" + + def _download_file(self, url: str) -> bytes | None: + try: + response = rl_requests.get(url, timeout=60) + response.raise_for_status() + return response.content + except Exception as exc: + logging.warning(f"[Notion]: Failed to download Notion file from {url}: {exc}") + return None + + def _extract_file_metadata(self, result_obj: dict[str, Any], block_id: str) -> tuple[str | None, str, str | None]: + file_source_type = result_obj.get("type") + file_source = result_obj.get(file_source_type, {}) if file_source_type else {} + url = file_source.get("url") + + name = result_obj.get("name") or file_source.get("name") + if url and not name: + parsed_name = Path(urlparse(url).path).name + name = parsed_name or f"notion_file_{block_id}" + elif not name: + name = f"notion_file_{block_id}" + + caption = self._extract_rich_text(result_obj.get("caption", [])) if "caption" in result_obj else None + + return url, name, caption + + def _build_attachment_document( + self, + block_id: str, + url: str, + name: str, + caption: Optional[str], + page_last_edited_time: Optional[str], + ) -> Document | None: + file_bytes = self._download_file(url) + if file_bytes is None: + return None + + extension = Path(name).suffix or Path(urlparse(url).path).suffix or ".bin" + if extension and not extension.startswith("."): + extension = f".{extension}" + if not extension: + extension = ".bin" + + updated_at = datetime_from_string(page_last_edited_time) if page_last_edited_time else datetime.now(timezone.utc) + semantic_identifier = caption or name or f"Notion file {block_id}" + + return Document( + id=block_id, + blob=file_bytes, + source=DocumentSource.NOTION, + semantic_identifier=semantic_identifier, + extension=extension, + size_bytes=len(file_bytes), + doc_updated_at=updated_at, + ) + + def _read_blocks(self, base_block_id: str, page_last_edited_time: Optional[str] = None) -> tuple[list[NotionBlock], list[str], list[Document]]: result_blocks: list[NotionBlock] = [] child_pages: list[str] = [] + attachments: list[Document] = [] cursor = None while True: data = self._fetch_child_blocks(base_block_id, cursor) if data is None: - return result_blocks, child_pages + return result_blocks, child_pages, attachments for result in data["results"]: - logging.debug(f"Found child block for block with ID '{base_block_id}': {result}") + logging.debug(f"[Notion]: Found child block for block with ID {base_block_id}: {result}") result_block_id = result["id"] result_type = result["type"] result_obj = result[result_type] if result_type in ["ai_block", "unsupported", "external_object_instance_page"]: - logging.warning(f"Skipping unsupported block type '{result_type}'") + logging.warning(f"[Notion]: Skipping unsupported block type {result_type}") + continue + + if result_type == "table": + table_html = self._build_table_html(result_block_id) + if table_html: + result_blocks.append( + NotionBlock( + id=result_block_id, + text=table_html, + prefix="\n\n", + ) + ) + continue + + if result_type == "equation": + expr = result_obj.get("expression") + if expr: + result_blocks.append( + NotionBlock( + id=result_block_id, + text=expr, + prefix="\n", + ) + ) continue cur_result_text_arr = [] if "rich_text" in result_obj: - for rich_text in result_obj["rich_text"]: - if "text" in rich_text: - text = rich_text["text"]["content"] - cur_result_text_arr.append(text) + text = self._extract_rich_text(result_obj["rich_text"]) + if text: + cur_result_text_arr.append(text) + + if result_type == "bulleted_list_item": + if cur_result_text_arr: + cur_result_text_arr[0] = f"- {cur_result_text_arr[0]}" + else: + cur_result_text_arr = ["- "] + + if result_type == "numbered_list_item": + if cur_result_text_arr: + cur_result_text_arr[0] = f"1. {cur_result_text_arr[0]}" + else: + cur_result_text_arr = ["1. "] + + if result_type == "to_do": + checked = result_obj.get("checked") + checkbox_prefix = "[x]" if checked else "[ ]" + if cur_result_text_arr: + cur_result_text_arr = [f"{checkbox_prefix} {cur_result_text_arr[0]}"] + cur_result_text_arr[1:] + else: + cur_result_text_arr = [checkbox_prefix] + + if result_type in {"file", "image", "pdf", "video", "audio"}: + file_url, file_name, caption = self._extract_file_metadata(result_obj, result_block_id) + if file_url: + attachment_doc = self._build_attachment_document( + block_id=result_block_id, + url=file_url, + name=file_name, + caption=caption, + page_last_edited_time=page_last_edited_time, + ) + if attachment_doc: + attachments.append(attachment_doc) + + attachment_label = caption or file_name + if attachment_label: + cur_result_text_arr.append(f"{result_type.capitalize()}: {attachment_label}") if result["has_children"]: if result_type == "child_page": child_pages.append(result_block_id) else: - logging.debug(f"Entering sub-block: {result_block_id}") - subblocks, subblock_child_pages = self._read_blocks(result_block_id) - logging.debug(f"Finished sub-block: {result_block_id}") + logging.debug(f"[Notion]: Entering sub-block: {result_block_id}") + subblocks, subblock_child_pages, subblock_attachments = self._read_blocks(result_block_id, page_last_edited_time) + logging.debug(f"[Notion]: Finished sub-block: {result_block_id}") result_blocks.extend(subblocks) child_pages.extend(subblock_child_pages) + attachments.extend(subblock_attachments) if result_type == "child_database": inner_blocks, inner_child_pages = self._read_pages_from_database(result_block_id) @@ -231,7 +409,7 @@ class NotionConnector(LoadConnector, PollConnector): cursor = data["next_cursor"] - return result_blocks, child_pages + return result_blocks, child_pages, attachments def _read_page_title(self, page: NotionPage) -> Optional[str]: """Extracts the title from a Notion page.""" @@ -245,9 +423,7 @@ class NotionConnector(LoadConnector, PollConnector): return None - def _read_pages( - self, pages: list[NotionPage] - ) -> Generator[Document, None, None]: + def _read_pages(self, pages: list[NotionPage], start: SecondsSinceUnixEpoch | None = None, end: SecondsSinceUnixEpoch | None = None) -> Generator[Document, None, None]: """Reads pages for rich text content and generates Documents.""" all_child_page_ids: list[str] = [] @@ -255,11 +431,17 @@ class NotionConnector(LoadConnector, PollConnector): if isinstance(page, dict): page = NotionPage(**page) if page.id in self.indexed_pages: - logging.debug(f"Already indexed page with ID '{page.id}'. Skipping.") + logging.debug(f"[Notion]: Already indexed page with ID {page.id}. Skipping.") continue - logging.info(f"Reading page with ID '{page.id}', with url {page.url}") - page_blocks, child_page_ids = self._read_blocks(page.id) + if start is not None and end is not None: + page_ts = datetime_from_string(page.last_edited_time).timestamp() + if not (page_ts > start and page_ts <= end): + logging.debug(f"[Notion]: Skipping page {page.id} outside polling window.") + continue + + logging.info(f"[Notion]: Reading page with ID {page.id}, with url {page.url}") + page_blocks, child_page_ids, attachment_docs = self._read_blocks(page.id, page.last_edited_time) all_child_page_ids.extend(child_page_ids) self.indexed_pages.add(page.id) @@ -268,14 +450,12 @@ class NotionConnector(LoadConnector, PollConnector): if not page_blocks: if not raw_page_title: - logging.warning(f"No blocks OR title found for page with ID '{page.id}'. Skipping.") + logging.warning(f"[Notion]: No blocks OR title found for page with ID {page.id}. Skipping.") continue text = page_title if page.properties: - text += "\n\n" + "\n".join( - [f"{key}: {value}" for key, value in page.properties.items()] - ) + text += "\n\n" + "\n".join([f"{key}: {value}" for key, value in page.properties.items()]) sections = [TextSection(link=page.url, text=text)] else: sections = [ @@ -286,45 +466,39 @@ class NotionConnector(LoadConnector, PollConnector): for block in page_blocks ] - blob = ("\n".join([sec.text for sec in sections])).encode("utf-8") + joined_text = "\n".join(sec.text for sec in sections) + blob = joined_text.encode("utf-8") yield Document( - id=page.id, - blob=blob, - source=DocumentSource.NOTION, - semantic_identifier=page_title, - extension=".txt", - size_bytes=len(blob), - doc_updated_at=datetime_from_string(page.last_edited_time) + id=page.id, blob=blob, source=DocumentSource.NOTION, semantic_identifier=page_title, extension=".txt", size_bytes=len(blob), doc_updated_at=datetime_from_string(page.last_edited_time) ) + for attachment_doc in attachment_docs: + yield attachment_doc + if self.recursive_index_enabled and all_child_page_ids: for child_page_batch_ids in batch_generator(all_child_page_ids, INDEX_BATCH_SIZE): - child_page_batch = [ - self._fetch_page(page_id) - for page_id in child_page_batch_ids - if page_id not in self.indexed_pages - ] - yield from self._read_pages(child_page_batch) + child_page_batch = [self._fetch_page(page_id) for page_id in child_page_batch_ids if page_id not in self.indexed_pages] + yield from self._read_pages(child_page_batch, start, end) @retry(tries=3, delay=1, backoff=2) def _search_notion(self, query_dict: dict[str, Any]) -> NotionSearchResponse: """Search for pages from a Notion database.""" - logging.debug(f"Searching for pages in Notion with query_dict: {query_dict}") + logging.debug(f"[Notion]: Searching for pages in Notion with query_dict: {query_dict}") data = fetch_notion_data("https://api.notion.com/v1/search", self.headers, "POST", query_dict) return NotionSearchResponse(**data) - def _recursive_load(self) -> Generator[list[Document], None, None]: + def _recursive_load(self, start: SecondsSinceUnixEpoch | None = None, end: SecondsSinceUnixEpoch | None = None) -> Generator[list[Document], None, None]: """Recursively load pages starting from root page ID.""" if self.root_page_id is None or not self.recursive_index_enabled: raise RuntimeError("Recursive page lookup is not enabled") - logging.info(f"Recursively loading pages from Notion based on root page with ID: {self.root_page_id}") + logging.info(f"[Notion]: Recursively loading pages from Notion based on root page with ID: {self.root_page_id}") pages = [self._fetch_page(page_id=self.root_page_id)] - yield from batch_generator(self._read_pages(pages), self.batch_size) + yield from batch_generator(self._read_pages(pages, start, end), self.batch_size) def load_credentials(self, credentials: dict[str, Any]) -> dict[str, Any] | None: """Applies integration token to headers.""" - self.headers["Authorization"] = f'Bearer {credentials["notion_integration_token"]}' + self.headers["Authorization"] = f"Bearer {credentials['notion_integration_token']}" return None def load_from_state(self) -> GenerateDocumentsOutput: @@ -348,12 +522,10 @@ class NotionConnector(LoadConnector, PollConnector): else: break - def poll_source( - self, start: SecondsSinceUnixEpoch, end: SecondsSinceUnixEpoch - ) -> GenerateDocumentsOutput: + def poll_source(self, start: SecondsSinceUnixEpoch, end: SecondsSinceUnixEpoch) -> GenerateDocumentsOutput: """Poll Notion for updated pages within a time period.""" if self.recursive_index_enabled and self.root_page_id: - yield from self._recursive_load() + yield from self._recursive_load(start, end) return query_dict = { @@ -367,7 +539,7 @@ class NotionConnector(LoadConnector, PollConnector): pages = filter_pages_by_time(db_res.results, start, end, "last_edited_time") if pages: - yield from batch_generator(self._read_pages(pages), self.batch_size) + yield from batch_generator(self._read_pages(pages, start, end), self.batch_size) if db_res.has_more: query_dict["start_cursor"] = db_res.next_cursor else: diff --git a/deepdoc/parser/pdf_parser.py b/deepdoc/parser/pdf_parser.py index 6d8431c82..f6613c2f5 100644 --- a/deepdoc/parser/pdf_parser.py +++ b/deepdoc/parser/pdf_parser.py @@ -1091,7 +1091,7 @@ class RAGFlowPdfParser: logging.debug("Images converted.") self.is_english = [ - re.search(r"[a-zA-Z0-9,/¸;:'\[\]\(\)!@#$%^&*\"?<>._-]{30,}", "".join(random.choices([c["text"] for c in self.page_chars[i]], k=min(100, len(self.page_chars[i]))))) + re.search(r"[ a-zA-Z0-9,/¸;:'\[\]\(\)!@#$%^&*\"?<>._-]{30,}", "".join(random.choices([c["text"] for c in self.page_chars[i]], k=min(100, len(self.page_chars[i]))))) for i in range(len(self.page_chars)) ] if sum([1 if e else 0 for e in self.is_english]) > len(self.page_images) / 2: @@ -1148,7 +1148,7 @@ class RAGFlowPdfParser: if not self.is_english and not any([c for c in self.page_chars]) and self.boxes: bxes = [b for bxs in self.boxes for b in bxs] - self.is_english = re.search(r"[\na-zA-Z0-9,/¸;:'\[\]\(\)!@#$%^&*\"?<>._-]{30,}", "".join([b["text"] for b in random.choices(bxes, k=min(30, len(bxes)))])) + self.is_english = re.search(r"[ \na-zA-Z0-9,/¸;:'\[\]\(\)!@#$%^&*\"?<>._-]{30,}", "".join([b["text"] for b in random.choices(bxes, k=min(30, len(bxes)))])) logging.debug(f"Is it English: {self.is_english}") diff --git a/docs/references/http_api_reference.md b/docs/references/http_api_reference.md index bc1b15670..253745432 100644 --- a/docs/references/http_api_reference.md +++ b/docs/references/http_api_reference.md @@ -2072,6 +2072,7 @@ Retrieves chunks from specified datasets. - `"cross_languages"`: `list[string]` - `"metadata_condition"`: `object` - `"use_kg"`: `boolean` + - `"toc_enhance"`: `boolean` ##### Request example ```bash @@ -2122,6 +2123,8 @@ curl --request POST \ The number of chunks engaged in vector cosine computation. Defaults to `1024`. - `"use_kg"`: (*Body parameter*), `boolean` The search includes text chunks related to the knowledge graph of the selected dataset to handle complex multi-hop queries. Defaults to `False`. +- `"toc_enhance"`: (*Body parameter*), `boolean` + The search includes table of content enhancement in order to boost rank of relevant chunks. Files parsed with `TOC Enhance` enabled is prerequisite. Defaults to `False`. - `"rerank_id"`: (*Body parameter*), `integer` The ID of the rerank model. - `"keyword"`: (*Body parameter*), `boolean` @@ -2136,6 +2139,9 @@ curl --request POST \ The languages that should be translated into, in order to achieve keywords retrievals in different languages. - `"metadata_condition"`: (*Body parameter*), `object` The metadata condition used for filtering chunks: + - `"logic"`: (*Body parameter*), `string` + - `"and"` Intersection of the result from each condition (default). + - `"or"` union of the result from each condition. - `"conditions"`: (*Body parameter*), `array` A list of metadata filter conditions. - `"name"`: `string` - The metadata field name to filter by, e.g., `"author"`, `"company"`, `"url"`. Ensure this parameter before use. See [Set metadata](../guides/dataset/set_metadata.md) for details. diff --git a/rag/app/book.py b/rag/app/book.py index 5ea28d40d..5bdaec72d 100644 --- a/rag/app/book.py +++ b/rag/app/book.py @@ -113,6 +113,7 @@ def chunk(filename, binary=None, from_page=0, to_page=100000, lang = lang, callback = callback, pdf_cls = Pdf, + layout_recognizer = layout_recognizer, **kwargs ) diff --git a/rag/app/laws.py b/rag/app/laws.py index dd97e4e3a..ba2592833 100644 --- a/rag/app/laws.py +++ b/rag/app/laws.py @@ -172,6 +172,7 @@ def chunk(filename, binary=None, from_page=0, to_page=100000, lang = lang, callback = callback, pdf_cls = Pdf, + layout_recognizer = layout_recognizer, **kwargs ) diff --git a/rag/app/manual.py b/rag/app/manual.py index 124864041..b3a4ae38d 100644 --- a/rag/app/manual.py +++ b/rag/app/manual.py @@ -213,6 +213,7 @@ def chunk(filename, binary=None, from_page=0, to_page=100000, lang = lang, callback = callback, pdf_cls = Pdf, + layout_recognizer = layout_recognizer, parse_method = "manual", **kwargs ) diff --git a/rag/app/one.py b/rag/app/one.py index 5574aaa51..7cd1bb785 100644 --- a/rag/app/one.py +++ b/rag/app/one.py @@ -99,6 +99,7 @@ def chunk(filename, binary=None, from_page=0, to_page=100000, lang = lang, callback = callback, pdf_cls = Pdf, + layout_recognizer = layout_recognizer, **kwargs ) diff --git a/rag/app/presentation.py b/rag/app/presentation.py index cd1d308ec..6a872528f 100644 --- a/rag/app/presentation.py +++ b/rag/app/presentation.py @@ -142,6 +142,7 @@ def chunk(filename, binary=None, from_page=0, to_page=100000, lang = lang, callback = callback, pdf_cls = Pdf, + layout_recognizer = layout_recognizer, **kwargs ) diff --git a/rag/nlp/__init__.py b/rag/nlp/__init__.py index f61019377..add454ade 100644 --- a/rag/nlp/__init__.py +++ b/rag/nlp/__init__.py @@ -437,16 +437,16 @@ def not_title(txt): return re.search(r"[,;,。;!!]", txt) def tree_merge(bull, sections, depth): - + if not sections or bull < 0: return sections if isinstance(sections[0], type("")): sections = [(s, "") for s in sections] - + # filter out position information in pdf sections sections = [(t, o) for t, o in sections if t and len(t.split("@")[0].strip()) > 1 and not re.match(r"[0-9]+$", t.split("@")[0].strip())] - + def get_level(bull, section): text, layout = section text = re.sub(r"\u3000", " ", text).strip() @@ -465,7 +465,7 @@ def tree_merge(bull, sections, depth): level, text = get_level(bull, section) if not text.strip("\n"): continue - + lines.append((level, text)) level_set.add(level) @@ -608,6 +608,26 @@ def naive_merge(sections: str | list, chunk_token_num=128, delimiter="\n。; cks[-1] += t tk_nums[-1] += tnum + custom_delimiters = [m.group(1) for m in re.finditer(r"`([^`]+)`", delimiter)] + has_custom = bool(custom_delimiters) + if has_custom: + custom_pattern = "|".join(re.escape(t) for t in sorted(set(custom_delimiters), key=len, reverse=True)) + cks, tk_nums = [], [] + for sec, pos in sections: + split_sec = re.split(r"(%s)" % custom_pattern, sec, flags=re.DOTALL) + for sub_sec in split_sec: + if re.fullmatch(custom_pattern, sub_sec or ""): + continue + text = "\n" + sub_sec + local_pos = pos + if num_tokens_from_string(text) < 8: + local_pos = "" + if local_pos and text.find(local_pos) < 0: + text += local_pos + cks.append(text) + tk_nums.append(num_tokens_from_string(text)) + return cks + dels = get_delimiters(delimiter) for sec, pos in sections: if num_tokens_from_string(sec) < chunk_token_num: @@ -657,6 +677,29 @@ def naive_merge_with_images(texts, images, chunk_token_num=128, delimiter="\n。 result_images[-1] = concat_img(result_images[-1], image) tk_nums[-1] += tnum + custom_delimiters = [m.group(1) for m in re.finditer(r"`([^`]+)`", delimiter)] + has_custom = bool(custom_delimiters) + if has_custom: + custom_pattern = "|".join(re.escape(t) for t in sorted(set(custom_delimiters), key=len, reverse=True)) + cks, result_images, tk_nums = [], [], [] + for text, image in zip(texts, images): + text_str = text[0] if isinstance(text, tuple) else text + text_pos = text[1] if isinstance(text, tuple) and len(text) > 1 else "" + split_sec = re.split(r"(%s)" % custom_pattern, text_str) + for sub_sec in split_sec: + if re.fullmatch(custom_pattern, sub_sec or ""): + continue + text_seg = "\n" + sub_sec + local_pos = text_pos + if num_tokens_from_string(text_seg) < 8: + local_pos = "" + if local_pos and text_seg.find(local_pos) < 0: + text_seg += local_pos + cks.append(text_seg) + result_images.append(image) + tk_nums.append(num_tokens_from_string(text_seg)) + return cks, result_images + dels = get_delimiters(delimiter) for text, image in zip(texts, images): # if text is tuple, unpack it @@ -748,6 +791,23 @@ def naive_merge_docx(sections, chunk_token_num=128, delimiter="\n。;!?"): images[-1] = concat_img(images[-1], image) tk_nums[-1] += tnum + custom_delimiters = [m.group(1) for m in re.finditer(r"`([^`]+)`", delimiter)] + has_custom = bool(custom_delimiters) + if has_custom: + custom_pattern = "|".join(re.escape(t) for t in sorted(set(custom_delimiters), key=len, reverse=True)) + cks, images, tk_nums = [], [], [] + pattern = r"(%s)" % custom_pattern + for sec, image in sections: + split_sec = re.split(pattern, sec) + for sub_sec in split_sec: + if not sub_sec or re.fullmatch(custom_pattern, sub_sec): + continue + text_seg = "\n" + sub_sec + cks.append(text_seg) + images.append(image) + tk_nums.append(num_tokens_from_string(text_seg)) + return cks, images + dels = get_delimiters(delimiter) pattern = r"(%s)" % dels @@ -789,7 +849,7 @@ class Node: self.level = level self.depth = depth self.texts = texts or [] - self.children = [] + self.children = [] def add_child(self, child_node): self.children.append(child_node) @@ -835,7 +895,7 @@ class Node: return self def get_tree(self): - tree_list = [] + tree_list = [] self._dfs(self, tree_list, []) return tree_list @@ -860,7 +920,7 @@ class Node: # A leaf title within depth emits its title path as a chunk (header-only section) elif not child and (1 <= level <= self.depth): tree_list.append("\n".join(path_titles)) - + # Recurse into children with the updated title path for c in child: - self._dfs(c, tree_list, path_titles) \ No newline at end of file + self._dfs(c, tree_list, path_titles) diff --git a/web/src/components/back-button/index.tsx b/web/src/components/back-button/index.tsx index c790d6882..118042128 100644 --- a/web/src/components/back-button/index.tsx +++ b/web/src/components/back-button/index.tsx @@ -29,7 +29,10 @@ const BackButton: React.FC = ({ return ( diff --git a/web/src/components/edit-tag/index.tsx b/web/src/components/edit-tag/index.tsx index 1921853d0..a05fadad6 100644 --- a/web/src/components/edit-tag/index.tsx +++ b/web/src/components/edit-tag/index.tsx @@ -102,8 +102,8 @@ const EditTag = React.forwardRef( {Array.isArray(tagChild) && tagChild.length > 0 && <>{tagChild}} {!inputVisible && ( - - ); - })} - + /> + + ( + + + + + + + )} + /> + + ( + + + {canReference ? ( + + ) : ( + + )} + + + + )} + /> + + + + ); + })} + + ); } diff --git a/web/src/components/originui/input.tsx b/web/src/components/originui/input.tsx index dac582e3a..6ae8c2104 100644 --- a/web/src/components/originui/input.tsx +++ b/web/src/components/originui/input.tsx @@ -32,13 +32,13 @@ const Input = function ({ type={type} data-slot="input" className={cn( - 'border-input file:text-foreground placeholder:text-muted-foreground/70 flex h-9 w-full min-w-0 rounded-md border bg-transparent px-3 py-1 text-sm shadow-xs transition-[color,box-shadow] outline-none file:inline-flex file:h-7 file:border-0 file:bg-transparent file:text-sm file:font-medium disabled:pointer-events-none disabled:cursor-not-allowed disabled:opacity-50', - 'focus-visible:border-ring focus-visible:ring-ring/50 focus-visible:ring-[3px]', + 'border-border-button file:text-foreground placeholder:text-text-disabled flex h-9 w-full min-w-0 rounded-md border bg-transparent px-3 py-1 text-sm shadow-xs transition-[color,box-shadow] outline-none file:inline-flex file:h-7 file:border-0 file:bg-transparent file:text-sm file:font-medium disabled:pointer-events-none disabled:cursor-not-allowed disabled:opacity-50', + 'focus-visible:border-border-button focus-visible:ring-text-primary/50 focus-visible:ring-1', 'aria-invalid:ring-destructive/20 dark:aria-invalid:ring-destructive/40 aria-invalid:border-destructive', type === 'search' && '[&::-webkit-search-cancel-button]:appearance-none [&::-webkit-search-decoration]:appearance-none [&::-webkit-search-results-button]:appearance-none [&::-webkit-search-results-decoration]:appearance-none', type === 'file' && - 'text-muted-foreground/70 file:border-input file:text-foreground p-0 pr-3 italic file:me-3 file:h-full file:border-0 file:border-r file:border-solid file:bg-transparent file:px-3 file:text-sm file:font-medium file:not-italic', + 'text-text-disabled file:border-input file:text-foreground p-0 pr-3 italic file:me-3 file:h-full file:border-0 file:border-r file:border-solid file:bg-transparent file:px-3 file:text-sm file:font-medium file:not-italic', icon && iconPosition === 'left' && 'pl-7', icon && iconPosition === 'right' && 'pr-7', className, diff --git a/web/src/components/originui/password-input.tsx b/web/src/components/originui/password-input.tsx index 891a0dfea..3fb60635c 100644 --- a/web/src/components/originui/password-input.tsx +++ b/web/src/components/originui/password-input.tsx @@ -24,20 +24,6 @@ export default React.forwardRef( ref={ref} {...props} /> - ); diff --git a/web/src/components/originui/select-with-search.tsx b/web/src/components/originui/select-with-search.tsx index 2d095f157..e3cec6240 100644 --- a/web/src/components/originui/select-with-search.tsx +++ b/web/src/components/originui/select-with-search.tsx @@ -140,7 +140,7 @@ export const SelectWithSearch = forwardRef< ref={ref} disabled={disabled} className={cn( - '!bg-bg-input hover:bg-background border-border-button w-full justify-between px-3 font-normal outline-offset-0 outline-none focus-visible:outline-[3px] [&_svg]:pointer-events-auto', + '!bg-bg-input hover:bg-background border-border-button w-full justify-between px-3 font-normal outline-offset-0 outline-none focus-visible:outline-[3px] [&_svg]:pointer-events-auto group', triggerClassName, )} > @@ -155,12 +155,12 @@ export const SelectWithSearch = forwardRef< {value && allowClear && ( <> )} @@ -173,12 +173,17 @@ export const SelectWithSearch = forwardRef< - - - + + {options && options.length > 0 && ( + + )} + {t('common.noDataFound')} {options.map((group, idx) => { if (group.options) { @@ -209,6 +214,7 @@ export const SelectWithSearch = forwardRef< value={group.value} disabled={group.disabled} onSelect={handleSelect} + className="min-h-10" > {group.label} diff --git a/web/src/components/parse-configuration/raptor-form-fields.tsx b/web/src/components/parse-configuration/raptor-form-fields.tsx index a1713ad08..dc089cdb0 100644 --- a/web/src/components/parse-configuration/raptor-form-fields.tsx +++ b/web/src/components/parse-configuration/raptor-form-fields.tsx @@ -221,10 +221,12 @@ const RaptorFormFields = ({ defaultValue={0} type="number" suffix={ - +
+ +
} /> diff --git a/web/src/components/similarity-slider/index.tsx b/web/src/components/similarity-slider/index.tsx index 05a7f29ce..73666b504 100644 --- a/web/src/components/similarity-slider/index.tsx +++ b/web/src/components/similarity-slider/index.tsx @@ -59,6 +59,7 @@ interface SimilaritySliderFormFieldProps { similarityName?: string; vectorSimilarityWeightName?: string; isTooltipShown?: boolean; + numberInputClassName?: string; } export const initialSimilarityThresholdValue = { @@ -86,6 +87,7 @@ export function SimilaritySliderFormField({ similarityName = 'similarity_threshold', vectorSimilarityWeightName = 'vector_similarity_weight', isTooltipShown, + numberInputClassName, }: SimilaritySliderFormFieldProps) { const { t } = useTranslate('knowledgeDetails'); const form = useFormContext(); @@ -101,6 +103,7 @@ export function SimilaritySliderFormField({ step={0.01} layout={FormLayout.Vertical} tooltip={isTooltipShown && t('similarityThresholdTip')} + numberInputClassName={numberInputClassName} > -
+
@@ -158,6 +161,7 @@ export function SimilaritySliderFormField({ className={cn( 'h-6 w-10 p-0 text-center bg-bg-input border-border-default border text-text-secondary', '[appearance:textfield] [&::-webkit-outer-spin-button]:appearance-none [&::-webkit-inner-spin-button]:appearance-none', + numberInputClassName, )} max={1} min={0} diff --git a/web/src/components/slider-input-form-field.tsx b/web/src/components/slider-input-form-field.tsx index 40b2c6804..0344c4eeb 100644 --- a/web/src/components/slider-input-form-field.tsx +++ b/web/src/components/slider-input-form-field.tsx @@ -25,6 +25,7 @@ type SliderInputFormFieldProps = { tooltip?: ReactNode; defaultValue?: number; className?: string; + numberInputClassName?: string; } & FormLayoutType; export function SliderInputFormField({ @@ -36,6 +37,7 @@ export function SliderInputFormField({ tooltip, defaultValue, className, + numberInputClassName, layout = FormLayout.Horizontal, }: SliderInputFormFieldProps) { const form = useFormContext(); @@ -61,7 +63,7 @@ export function SliderInputFormField({
, React.ComponentPropsWithoutRef >(({ className, ...props }, ref) => ( -
+
e.stopPropagation()} onMouseEnter={(e) => e.currentTarget.focus()} tabIndex={-1} @@ -96,7 +102,7 @@ const CommandGroup = React.forwardRef< = ({ direction = 'horizontal', type = 'horizontal', text, - color = 'border-muted-foreground/50', + color = 'border-border-button', margin = 'my-4', className = '', }) => { diff --git a/web/src/components/ui/input.tsx b/web/src/components/ui/input.tsx index ab72c828b..c8f0044bf 100644 --- a/web/src/components/ui/input.tsx +++ b/web/src/components/ui/input.tsx @@ -2,8 +2,7 @@ import * as React from 'react'; import { cn } from '@/lib/utils'; import { Eye, EyeOff, Search } from 'lucide-react'; -import { useState } from 'react'; -import { Button } from './button'; +import { useEffect, useMemo, useRef, useState } from 'react'; export interface InputProps extends Omit, 'prefix'> { @@ -18,6 +17,20 @@ const Input = React.forwardRef( const { defaultValue, ...restProps } = props; const inputValue = isControlled ? value : defaultValue; const [showPassword, setShowPassword] = useState(false); + const [prefixWidth, setPrefixWidth] = useState(0); + const [suffixWidth, setSuffixWidth] = useState(0); + + const prefixRef = useRef(null); + const suffixRef = useRef(null); + + useEffect(() => { + if (prefixRef.current) { + setPrefixWidth(prefixRef.current.offsetWidth); + } + if (suffixRef.current) { + setSuffixWidth(suffixRef.current.offsetWidth); + } + }, [prefix, suffix, prefixRef, suffixRef]); const handleChange: React.ChangeEventHandler = (e) => { if (type === 'number') { const numValue = e.target.value === '' ? '' : Number(e.target.value); @@ -35,40 +48,60 @@ const Input = React.forwardRef( const isPasswordInput = type === 'password'; - const inputEl = ( - + const inputEl = useMemo( + () => ( + + ), + [ + prefixWidth, + suffixWidth, + isPasswordInput, + inputValue, + className, + handleChange, + restProps, + ], ); if (prefix || suffix || isPasswordInput) { return (
{prefix && ( - + {prefix} )} {inputEl} {suffix && ( ( )} {isPasswordInput && ( - + )}
); diff --git a/web/src/components/ui/modal/modal.tsx b/web/src/components/ui/modal/modal.tsx index af516b1e6..c40e5f90b 100644 --- a/web/src/components/ui/modal/modal.tsx +++ b/web/src/components/ui/modal/modal.tsx @@ -27,7 +27,10 @@ export interface ModalProps { okText?: ReactNode | string; onOk?: () => void; onCancel?: () => void; + okButtonClassName?: string; + cancelButtonClassName?: string; disabled?: boolean; + style?: React.CSSProperties; } export interface ModalType extends FC { show: typeof modalIns.show; @@ -56,7 +59,10 @@ const Modal: ModalType = ({ confirmLoading, cancelText, okText, + okButtonClassName, + cancelButtonClassName, disabled = false, + style, }) => { const sizeClasses = { small: 'max-w-md', @@ -111,7 +117,10 @@ const Modal: ModalType = ({ @@ -122,6 +131,7 @@ const Modal: ModalType = ({ className={cn( 'px-2 py-1 bg-primary text-primary-foreground rounded-md hover:bg-primary/90', { 'cursor-not-allowed': disabled }, + okButtonClassName, )} > {confirmLoading && ( @@ -153,23 +163,26 @@ const Modal: ModalType = ({ handleOk, showfooter, footerClassName, + okButtonClassName, + cancelButtonClassName, ]); return ( maskClosable && onOpenChange?.(false)} > e.stopPropagation()} > {/* title */} {(title || closable) && (
@@ -378,15 +378,17 @@ export const MultiSelect = React.forwardRef< align="start" onEscapeKeyDown={() => setIsPopoverOpen(false)} > - - - + + {options && options.length > 0 && ( + + )} + No results found. - {showSelectAll && ( + {showSelectAll && options && options.length > 0 && ( ))} - -
+
+ {selectedValues.length > 0 && ( <> )} - setIsPopoverOpen(false)} - className="flex-1 justify-center cursor-pointer max-w-full" - > - {t('common.close')} - + {options && options.length > 0 && ( + setIsPopoverOpen(false)} + className="flex-1 justify-center cursor-pointer max-w-full" + > + {t('common.close')} + + )}
diff --git a/web/src/components/ui/select.tsx b/web/src/components/ui/select.tsx index a2edd3263..f88208702 100644 --- a/web/src/components/ui/select.tsx +++ b/web/src/components/ui/select.tsx @@ -59,7 +59,7 @@ const SelectScrollUpButton = React.forwardRef< ( return ( ; + } + + return ( + + ); + } + + return ( + + ); + }, + [form, isDarkTheme], + ); + + return ( +
+ + append({ + [keyField]: '', + [valueField]: '', + [modeField]: InputMode.Constant, + [operatorField]: TypesWithArray.String, + }) + } + > +
+ {fields.map((field, index) => { + const keyFieldAlias = `${name}.${index}.${keyField}`; + const valueFieldAlias = `${name}.${index}.${valueField}`; + const operatorFieldAlias = `${name}.${index}.${operatorField}`; + const modeFieldAlias = `${name}.${index}.${modeField}`; + + return ( +
+
+
+ + + + + + {(field) => ( + { + handleVariableTypeChange( + val, + valueFieldAlias, + modeFieldAlias, + ); + field.onChange(val); + }} + options={VariableTypeOptions} + > + )} + + + + {(field) => ( + { + handleModeChange( + val, + valueFieldAlias, + operatorFieldAlias, + ); + field.onChange(val); + }} + options={InputModeOptions} + > + )} + +
+ + {renderParameter(operatorFieldAlias, modeFieldAlias)} + +
+ + +
+ ); + })} +
+
+ ); +} diff --git a/web/src/pages/agent/form/iteration-form/index.tsx b/web/src/pages/agent/form/iteration-form/index.tsx index 6132b7f5e..c528fa720 100644 --- a/web/src/pages/agent/form/iteration-form/index.tsx +++ b/web/src/pages/agent/form/iteration-form/index.tsx @@ -1,4 +1,3 @@ -import { FormContainer } from '@/components/form-container'; import { Form } from '@/components/ui/form'; import { zodResolver } from '@hookform/resolvers/zod'; import { memo, useMemo } from 'react'; @@ -10,12 +9,21 @@ import { FormWrapper } from '../components/form-wrapper'; import { Output } from '../components/output'; import { QueryVariable } from '../components/query-variable'; import { DynamicOutput } from './dynamic-output'; +import { DynamicVariables } from './dynamic-variables'; import { OutputArray } from './interface'; import { useValues } from './use-values'; import { useWatchFormChange } from './use-watch-form-change'; const FormSchema = z.object({ query: z.string().optional(), + variables: z.array( + z.object({ + variable: z.string().optional(), + operator: z.string().optional(), + parameter: z.string().or(z.number()).or(z.boolean()).optional(), + mode: z.string(), + }), + ), outputs: z.array(z.object({ name: z.string(), value: z.any() })).optional(), }); @@ -41,12 +49,11 @@ function IterationForm({ node }: INextOperatorForm) { return (
- - - + + diff --git a/web/src/pages/agent/form/iteration-form/use-build-logical-options.ts b/web/src/pages/agent/form/iteration-form/use-build-logical-options.ts new file mode 100644 index 000000000..a7f960e98 --- /dev/null +++ b/web/src/pages/agent/form/iteration-form/use-build-logical-options.ts @@ -0,0 +1,59 @@ +import { buildOptions } from '@/utils/form'; +import { camelCase } from 'lodash'; +import { useCallback } from 'react'; +import { useTranslation } from 'react-i18next'; +import { + JsonSchemaDataType, + VariableAssignerLogicalArrayOperator, + VariableAssignerLogicalNumberOperator, + VariableAssignerLogicalNumberOperatorLabelMap, + VariableAssignerLogicalOperator, +} from '../../constant'; + +export function useBuildLogicalOptions() { + const { t } = useTranslation(); + + const buildVariableAssignerLogicalOptions = useCallback( + (record: Record) => { + return buildOptions( + record, + t, + 'flow.variableAssignerLogicalOperatorOptions', + true, + ); + }, + [t], + ); + + const buildLogicalOptions = useCallback( + (type: string) => { + if ( + type?.toLowerCase().startsWith(JsonSchemaDataType.Array.toLowerCase()) + ) { + return buildVariableAssignerLogicalOptions( + VariableAssignerLogicalArrayOperator, + ); + } + + if (type === JsonSchemaDataType.Number) { + return Object.values(VariableAssignerLogicalNumberOperator).map( + (val) => ({ + label: t( + `flow.variableAssignerLogicalOperatorOptions.${camelCase(VariableAssignerLogicalNumberOperatorLabelMap[val as keyof typeof VariableAssignerLogicalNumberOperatorLabelMap] || val)}`, + ), + value: val, + }), + ); + } + + return buildVariableAssignerLogicalOptions( + VariableAssignerLogicalOperator, + ); + }, + [buildVariableAssignerLogicalOptions, t], + ); + + return { + buildLogicalOptions, + }; +} diff --git a/web/src/pages/agent/form/switch-form/index.tsx b/web/src/pages/agent/form/switch-form/index.tsx index f9ccee919..53f4995af 100644 --- a/web/src/pages/agent/form/switch-form/index.tsx +++ b/web/src/pages/agent/form/switch-form/index.tsx @@ -11,16 +11,17 @@ import { import { RAGFlowSelect } from '@/components/ui/select'; import { Separator } from '@/components/ui/separator'; import { Textarea } from '@/components/ui/textarea'; +import { SwitchLogicOperator } from '@/constants/agent'; import { useBuildSwitchOperatorOptions } from '@/hooks/logic-hooks/use-build-operator-options'; +import { useBuildSwitchLogicOperatorOptions } from '@/hooks/logic-hooks/use-build-options'; import { cn } from '@/lib/utils'; import { zodResolver } from '@hookform/resolvers/zod'; import { t } from 'i18next'; import { X } from 'lucide-react'; -import { memo, useCallback, useMemo } from 'react'; +import { memo, useCallback } from 'react'; import { useFieldArray, useForm, useFormContext } from 'react-hook-form'; import { useTranslation } from 'react-i18next'; import { z } from 'zod'; -import { SwitchLogicOperatorOptions } from '../../constant'; import { IOperatorForm } from '../../interface'; import { FormWrapper } from '../components/form-wrapper'; import { QueryVariable } from '../components/query-variable'; @@ -185,12 +186,7 @@ function SwitchForm({ node }: IOperatorForm) { control: form.control, }); - const switchLogicOperatorOptions = useMemo(() => { - return SwitchLogicOperatorOptions.map((x) => ({ - value: x, - label: t(`flow.switchLogicOperatorOptions.${x}`), - })); - }, [t]); + const switchLogicOperatorOptions = useBuildSwitchLogicOperatorOptions(); useWatchFormChange(node?.id, form); @@ -253,7 +249,7 @@ function SwitchForm({ node }: IOperatorForm) { append({ - logical_operator: SwitchLogicOperatorOptions[0], + logical_operator: SwitchLogicOperator.And, [ItemKey]: [ { operator: switchOperatorOptions[0].value, diff --git a/web/src/pages/agent/gobal-variable-sheet/constant.ts b/web/src/pages/agent/gobal-variable-sheet/constant.ts index 72a7d6342..8470ffa86 100644 --- a/web/src/pages/agent/gobal-variable-sheet/constant.ts +++ b/web/src/pages/agent/gobal-variable-sheet/constant.ts @@ -1,7 +1,7 @@ import { FormFieldConfig, FormFieldType } from '@/components/dynamic-form'; -import { buildSelectOptions } from '@/utils/component-util'; import { t } from 'i18next'; import { TypesWithArray } from '../constant'; +import { buildConversationVariableSelectOptions } from '../utils'; export { TypesWithArray } from '../constant'; // const TypesWithoutArray = Object.values(JsonSchemaDataType).filter( // (item) => item !== JsonSchemaDataType.Array, @@ -18,7 +18,7 @@ export const GlobalFormFields = [ placeholder: t('common.namePlaceholder'), required: true, validation: { - pattern: /^[a-zA-Z_]+$/, + pattern: /^[a-zA-Z_0-9]+$/, message: t('flow.variableNameMessage'), }, type: FormFieldType.Text, @@ -29,7 +29,7 @@ export const GlobalFormFields = [ placeholder: '', required: true, type: FormFieldType.Select, - options: buildSelectOptions(Object.values(TypesWithArray)), + options: buildConversationVariableSelectOptions(), }, { label: t('flow.defaultValue'), diff --git a/web/src/pages/agent/gobal-variable-sheet/hooks/use-object-fields.tsx b/web/src/pages/agent/gobal-variable-sheet/hooks/use-object-fields.tsx index 7e60a7aec..c41e766f2 100644 --- a/web/src/pages/agent/gobal-variable-sheet/hooks/use-object-fields.tsx +++ b/web/src/pages/agent/gobal-variable-sheet/hooks/use-object-fields.tsx @@ -3,6 +3,7 @@ import { BlockButton, Button } from '@/components/ui/button'; import { Input } from '@/components/ui/input'; import { Segmented } from '@/components/ui/segmented'; import { t } from 'i18next'; +import { isEmpty } from 'lodash'; import { Trash2, X } from 'lucide-react'; import { useCallback } from 'react'; import { FieldValues } from 'react-hook-form'; @@ -36,14 +37,19 @@ export const useObjectFields = () => { path: (string | number)[] = [], ): Array<{ path: (string | number)[]; message: string }> => { const errors: Array<{ path: (string | number)[]; message: string }> = []; - - if (obj !== null && typeof obj === 'object' && !Array.isArray(obj)) { + if (typeof obj === 'object' && !Array.isArray(obj)) { + if (isEmpty(obj)) { + errors.push({ + path: [...path], + message: 'No empty parameters are allowed.', + }); + } for (const key in obj) { if (obj.hasOwnProperty(key)) { - if (!/^[a-zA-Z_]+$/.test(key)) { + if (!/^[a-zA-Z_0-9]+$/.test(key)) { errors.push({ path: [...path, key], - message: `Key "${key}" is invalid. Keys can only contain letters and underscores.`, + message: `Key "${key}" is invalid. Keys can only contain letters and underscores and numbers.`, }); } const nestedErrors = validateKeys(obj[key], [...path, key]); @@ -108,6 +114,21 @@ export const useObjectFields = () => { } }, []); + const arrayObjectValidate = useCallback((value: any) => { + try { + if (validateKeys(value, [])?.length > 0) { + throw new Error(t('flow.formatTypeError')); + } + if (value && typeof value === 'string' && !JSON.parse(value)) { + throw new Error(t('flow.formatTypeError')); + } + return true; + } catch (e) { + console.log('object-render-error', e, value); + throw new Error(t('flow.formatTypeError')); + } + }, []); + const arrayStringRender = useCallback((field: FieldValues, type = 'text') => { const values = Array.isArray(field.value) ? field.value @@ -253,8 +274,9 @@ export const useObjectFields = () => { const handleCustomValidate = (value: TypesWithArray) => { switch (value) { case TypesWithArray.Object: - case TypesWithArray.ArrayObject: return objectValidate; + case TypesWithArray.ArrayObject: + return arrayObjectValidate; case TypesWithArray.ArrayString: return arrayStringValidate; case TypesWithArray.ArrayNumber: @@ -284,6 +306,7 @@ export const useObjectFields = () => { return { objectRender, objectValidate, + arrayObjectValidate, arrayStringRender, arrayStringValidate, arrayNumberRender, diff --git a/web/src/pages/agent/utils.ts b/web/src/pages/agent/utils.ts index f4e4a4b1d..6ae2935b4 100644 --- a/web/src/pages/agent/utils.ts +++ b/web/src/pages/agent/utils.ts @@ -7,6 +7,7 @@ import { ICategorizeItemResult, } from '@/interfaces/database/agent'; import { DSLComponents, RAGFlowNodeType } from '@/interfaces/database/flow'; +import { buildSelectOptions } from '@/utils/component-util'; import { removeUselessFieldsFromValues } from '@/utils/form'; import { Edge, Node, XYPosition } from '@xyflow/react'; import { FormInstance, FormListFieldData } from 'antd'; @@ -30,6 +31,7 @@ import { NoDebugOperatorsList, NodeHandleId, Operator, + TypesWithArray, } from './constant'; import { DataOperationsFormSchemaType } from './form/data-operations-form'; import { ExtractorFormSchemaType } from './form/extractor-form'; @@ -766,3 +768,7 @@ export function buildBeginQueryWithObject( export function getArrayElementType(type: string) { return typeof type === 'string' ? type.match(/<([^>]+)>/)?.at(1) ?? '' : ''; } + +export function buildConversationVariableSelectOptions() { + return buildSelectOptions(Object.values(TypesWithArray)); +} diff --git a/web/src/pages/dataset/sidebar/index.tsx b/web/src/pages/dataset/sidebar/index.tsx index d48019e51..f921d66ba 100644 --- a/web/src/pages/dataset/sidebar/index.tsx +++ b/web/src/pages/dataset/sidebar/index.tsx @@ -92,7 +92,7 @@ export function SideBar({ refreshCount }: PropType) { key={itemIdx} variant={active ? 'secondary' : 'ghost'} className={cn( - 'w-full justify-start gap-2.5 px-3 relative h-10 text-text-sub-title-invert', + 'w-full justify-start gap-2.5 px-3 relative h-10 text-text-secondary', { 'bg-bg-card': active, 'text-text-primary': active, diff --git a/web/src/pages/next-search/search-setting-aisummery-config.tsx b/web/src/pages/next-search/search-setting-aisummery-config.tsx deleted file mode 100644 index 8764c3014..000000000 --- a/web/src/pages/next-search/search-setting-aisummery-config.tsx +++ /dev/null @@ -1,236 +0,0 @@ -import { SliderInputSwitchFormField } from '@/components/llm-setting-items/slider'; -import { SelectWithSearch } from '@/components/originui/select-with-search'; -import { - FormControl, - FormField, - FormItem, - FormLabel, - FormMessage, -} from '@/components/ui/form'; -import { - Select, - SelectContent, - SelectItem, - SelectTrigger, - SelectValue, -} from '@/components/ui/select'; -import { - LlmModelType, - ModelVariableType, - settledModelVariableMap, -} from '@/constants/knowledge'; -import { useTranslate } from '@/hooks/common-hooks'; -import { useComposeLlmOptionsByModelTypes } from '@/hooks/llm-hooks'; -import { camelCase, isEqual } from 'lodash'; -import { useCallback } from 'react'; -import { useFormContext } from 'react-hook-form'; -import { z } from 'zod'; - -interface LlmSettingFieldItemsProps { - prefix?: string; - options?: any[]; -} -const LlmSettingEnableSchema = { - temperatureEnabled: z.boolean(), - topPEnabled: z.boolean(), - presencePenaltyEnabled: z.boolean(), - frequencyPenaltyEnabled: z.boolean(), -}; -export const LlmSettingSchema = { - llm_id: z.string(), - parameter: z.string().optional(), - temperature: z.coerce.number().optional(), - top_p: z.coerce.number().optional(), - presence_penalty: z.coerce.number().optional(), - frequency_penalty: z.coerce.number().optional(), - ...LlmSettingEnableSchema, - // maxTokensEnabled: z.boolean(), -}; - -export function LlmSettingFieldItems({ - prefix, - options, -}: LlmSettingFieldItemsProps) { - const form = useFormContext(); - const { t } = useTranslate('chat'); - - const modelOptions = useComposeLlmOptionsByModelTypes([ - LlmModelType.Chat, - LlmModelType.Image2text, - ]); - - const handleChange = useCallback( - (parameter: string) => { - const values = - settledModelVariableMap[ - parameter as keyof typeof settledModelVariableMap - ]; - const enabledKeys = Object.keys(LlmSettingEnableSchema); - - for (const key in values) { - if (Object.prototype.hasOwnProperty.call(values, key)) { - const element = values[key as keyof typeof values]; - form.setValue(`${prefix}.${key}`, element); - } - } - if (enabledKeys && enabledKeys.length) { - for (const key of enabledKeys) { - form.setValue(`${prefix}.${key}`, true); - } - } - }, - [form, prefix], - ); - - const parameterOptions = Object.values(ModelVariableType).map((x) => ({ - label: t(camelCase(x)), - value: x, - })) as unknown as { label: string; value: ModelVariableType | 'Custom' }[]; - parameterOptions.push({ - label: t(camelCase('Custom')), - value: 'Custom', - }); - - const getFieldWithPrefix = useCallback( - (name: string) => { - return prefix ? `${prefix}.${name}` : name; - }, - [prefix], - ); - - const checkParameterIsEquel = () => { - const [ - parameter, - topPValue, - frequencyPenaltyValue, - temperatureValue, - presencePenaltyValue, - ] = form.getValues([ - getFieldWithPrefix('parameter'), - getFieldWithPrefix('temperature'), - getFieldWithPrefix('top_p'), - getFieldWithPrefix('frequency_penalty'), - getFieldWithPrefix('presence_penalty'), - ]); - if (parameter && parameter !== 'Custom') { - const parameterValue = - settledModelVariableMap[parameter as keyof typeof ModelVariableType]; - const parameterRealValue = { - top_p: topPValue, - temperature: temperatureValue, - frequency_penalty: frequencyPenaltyValue, - presence_penalty: presencePenaltyValue, - }; - if (!isEqual(parameterValue, parameterRealValue)) { - form.setValue(getFieldWithPrefix('parameter'), 'Custom'); - } - } - }; - - return ( -
- ( - - - * - {t('model')} - - - - - - - )} - /> - ( - - {t('freedom')} - -
- -
-
- -
- )} - /> - { - checkParameterIsEquel(); - }} - > - { - checkParameterIsEquel(); - }} - > - { - checkParameterIsEquel(); - }} - > - { - checkParameterIsEquel(); - }} - > - {/* */} -
- ); -} diff --git a/web/src/pages/next-search/search-setting.tsx b/web/src/pages/next-search/search-setting.tsx index 665d7e49b..365c366cb 100644 --- a/web/src/pages/next-search/search-setting.tsx +++ b/web/src/pages/next-search/search-setting.tsx @@ -1,6 +1,10 @@ // src/pages/next-search/search-setting.tsx import { AvatarUpload } from '@/components/avatar-upload'; +import { + LlmSettingFieldItems, + LlmSettingSchema, +} from '@/components/llm-setting-items/next'; import { MetadataFilter, MetadataFilterSchema, @@ -46,10 +50,10 @@ import { IllmSettingProps, useUpdateSearch, } from '../next-searches/hooks'; -import { - LlmSettingFieldItems, - LlmSettingSchema, -} from './search-setting-aisummery-config'; +// import { +// LlmSettingFieldItems, +// LlmSettingSchema, +// } from './search-setting-aisummery-config'; interface SearchSettingProps { open: boolean; @@ -397,6 +401,7 @@ const SearchSetting: React.FC = ({ isTooltipShown similarityName="search_config.similarity_threshold" vectorSimilarityWeightName="search_config.vector_similarity_weight" + numberInputClassName="rounded-sm" > {/* Rerank Model */} = ({ = ({ )} /> {aiSummaryDisabled && ( + // )} {/* Feature Controls */} diff --git a/web/src/pages/user-setting/data-source/add-datasource-modal.tsx b/web/src/pages/user-setting/data-source/add-datasource-modal.tsx index 8ac94d857..cf0a7ae44 100644 --- a/web/src/pages/user-setting/data-source/add-datasource-modal.tsx +++ b/web/src/pages/user-setting/data-source/add-datasource-modal.tsx @@ -62,6 +62,7 @@ const AddDataSourceModal = ({ sourceData?.id as keyof typeof DataSourceFormDefaultValues ] as FieldValues } + labelClassName="font-normal" >
React.ReactNode; - horizontal?: boolean; - onChange?: (value: any) => void; -} - -// Component props interface -interface DynamicFormProps { - fields: FormFieldConfig[]; - onSubmit: SubmitHandler; - className?: string; - children?: React.ReactNode; - defaultValues?: DefaultValues; -} - -// Form ref interface -export interface DynamicFormRef { - submit: () => void; - getValues: () => any; - reset: (values?: any) => void; -} - -// Generate Zod validation schema based on field configurations -const generateSchema = (fields: FormFieldConfig[]): ZodSchema => { - const schema: Record = {}; - const nestedSchemas: Record> = {}; - - fields.forEach((field) => { - let fieldSchema: ZodSchema; - - // Create base validation schema based on field type - switch (field.type) { - case FormFieldType.Email: - fieldSchema = z.string().email('Please enter a valid email address'); - break; - case FormFieldType.Number: - fieldSchema = z.coerce.number(); - if (field.validation?.min !== undefined) { - fieldSchema = (fieldSchema as z.ZodNumber).min( - field.validation.min, - field.validation.message || - `Value cannot be less than ${field.validation.min}`, - ); - } - if (field.validation?.max !== undefined) { - fieldSchema = (fieldSchema as z.ZodNumber).max( - field.validation.max, - field.validation.message || - `Value cannot be greater than ${field.validation.max}`, - ); - } - break; - case FormFieldType.Checkbox: - fieldSchema = z.boolean(); - break; - case FormFieldType.Tag: - fieldSchema = z.array(z.string()); - break; - default: - fieldSchema = z.string(); - break; - } - - // Handle required fields - if (field.required) { - if (field.type === FormFieldType.Checkbox) { - fieldSchema = (fieldSchema as z.ZodBoolean).refine( - (val) => val === true, - { - message: `${field.label} is required`, - }, - ); - } else if (field.type === FormFieldType.Tag) { - fieldSchema = (fieldSchema as z.ZodArray).min(1, { - message: `${field.label} is required`, - }); - } else { - fieldSchema = (fieldSchema as z.ZodString).min(1, { - message: `${field.label} is required`, - }); - } - } - - if (!field.required) { - fieldSchema = fieldSchema.optional(); - } - - // Handle other validation rules - if ( - field.type !== FormFieldType.Number && - field.type !== FormFieldType.Checkbox && - field.type !== FormFieldType.Tag && - field.required - ) { - fieldSchema = fieldSchema as z.ZodString; - - if (field.validation?.minLength !== undefined) { - fieldSchema = (fieldSchema as z.ZodString).min( - field.validation.minLength, - field.validation.message || - `Enter at least ${field.validation.minLength} characters`, - ); - } - - if (field.validation?.maxLength !== undefined) { - fieldSchema = (fieldSchema as z.ZodString).max( - field.validation.maxLength, - field.validation.message || - `Enter up to ${field.validation.maxLength} characters`, - ); - } - - if (field.validation?.pattern) { - fieldSchema = (fieldSchema as z.ZodString).regex( - field.validation.pattern, - field.validation.message || 'Invalid input format', - ); - } - } - - if (field.name.includes('.')) { - const keys = field.name.split('.'); - const firstKey = keys[0]; - - if (!nestedSchemas[firstKey]) { - nestedSchemas[firstKey] = {}; - } - - let currentSchema = nestedSchemas[firstKey]; - for (let i = 1; i < keys.length - 1; i++) { - const key = keys[i]; - if (!currentSchema[key]) { - currentSchema[key] = {}; - } - currentSchema = currentSchema[key]; - } - - const lastKey = keys[keys.length - 1]; - currentSchema[lastKey] = fieldSchema; - } else { - schema[field.name] = fieldSchema; - } - }); - - Object.keys(nestedSchemas).forEach((key) => { - const buildNestedSchema = (obj: Record): ZodSchema => { - const nestedSchema: Record = {}; - Object.keys(obj).forEach((subKey) => { - if ( - typeof obj[subKey] === 'object' && - !(obj[subKey] instanceof z.ZodType) - ) { - nestedSchema[subKey] = buildNestedSchema(obj[subKey]); - } else { - nestedSchema[subKey] = obj[subKey]; - } - }); - return z.object(nestedSchema); - }; - - schema[key] = buildNestedSchema(nestedSchemas[key]); - }); - return z.object(schema); -}; - -// Generate default values based on field configurations -const generateDefaultValues = ( - fields: FormFieldConfig[], -): DefaultValues => { - const defaultValues: Record = {}; - - fields.forEach((field) => { - if (field.name.includes('.')) { - const keys = field.name.split('.'); - let current = defaultValues; - - for (let i = 0; i < keys.length - 1; i++) { - const key = keys[i]; - if (!current[key]) { - current[key] = {}; - } - current = current[key]; - } - - const lastKey = keys[keys.length - 1]; - if (field.defaultValue !== undefined) { - current[lastKey] = field.defaultValue; - } else if (field.type === FormFieldType.Checkbox) { - current[lastKey] = false; - } else if (field.type === FormFieldType.Tag) { - current[lastKey] = []; - } else { - current[lastKey] = ''; - } - } else { - if (field.defaultValue !== undefined) { - defaultValues[field.name] = field.defaultValue; - } else if (field.type === FormFieldType.Checkbox) { - defaultValues[field.name] = false; - } else if (field.type === FormFieldType.Tag) { - defaultValues[field.name] = []; - } else { - defaultValues[field.name] = ''; - } - } - }); - - return defaultValues as DefaultValues; -}; - -// Dynamic form component -const DynamicForm = { - Root: forwardRef( - ( - { - fields, - onSubmit, - className = '', - children, - defaultValues: formDefaultValues = {} as DefaultValues, - }: DynamicFormProps, - ref: React.Ref, - ) => { - // Generate validation schema and default values - const schema = useMemo(() => generateSchema(fields), [fields]); - - const defaultValues = useMemo(() => { - const value = { - ...generateDefaultValues(fields), - ...formDefaultValues, - }; - console.log('generateDefaultValues', fields, value); - return value; - }, [fields, formDefaultValues]); - - // Initialize form - const form = useForm({ - resolver: zodResolver(schema), - defaultValues, - }); - - // Expose form methods via ref - useImperativeHandle(ref, () => ({ - submit: () => form.handleSubmit(onSubmit)(), - getValues: () => form.getValues(), - reset: (values?: T) => { - if (values) { - form.reset(values); - } else { - form.reset(); - } - }, - setError: form.setError, - clearErrors: form.clearErrors, - trigger: form.trigger, - })); - - useEffect(() => { - if (formDefaultValues && Object.keys(formDefaultValues).length > 0) { - form.reset({ - ...generateDefaultValues(fields), - ...formDefaultValues, - }); - } - }, [form, formDefaultValues, fields]); - - // Submit handler - // const handleSubmit = form.handleSubmit(onSubmit); - - // Render form fields - const renderField = (field: FormFieldConfig) => { - if (field.render) { - return ( - - {(fieldProps) => { - const finalFieldProps = field.onChange - ? { - ...fieldProps, - onChange: (e: any) => { - fieldProps.onChange(e); - field.onChange?.(e.target?.value ?? e); - }, - } - : fieldProps; - return field.render?.(finalFieldProps); - }} - - ); - } - switch (field.type) { - case FormFieldType.Textarea: - return ( - - {(fieldProps) => { - const finalFieldProps = field.onChange - ? { - ...fieldProps, - onChange: (e: any) => { - fieldProps.onChange(e); - field.onChange?.(e.target.value); - }, - } - : fieldProps; - return ( -