Merge branch 'main' of github.com:infiniflow/ragflow into feature/1117

2025-11-20 11:24:44 +08:00 · 2025-11-20 11:24:44 +08:00 · 906586079c
commit 906586079c
parent 136eca79e6 fa5cf10f56
55 changed files with 7025 additions and 4046 deletions
--- a/README.md
+++ b/README.md
@ -192,7 +192,8 @@ releases! 🌟
 ```bash
   $ cd ragflow/docker
  
-   # Optional: use a stable tag (see releases: https://github.com/infiniflow/ragflow/releases), e.g.: git checkout v0.22.1
+   # git checkout v0.22.1
+   # Optional: use a stable tag (see releases: https://github.com/infiniflow/ragflow/releases)
   # This steps ensures the **entrypoint.sh** file in the code matches the Docker image version.
   
   # Use CPU for DeepDoc tasks:
--- a/README_id.md
+++ b/README_id.md
@ -192,7 +192,8 @@ Coba demo kami di [https://demo.ragflow.io](https://demo.ragflow.io).
 ```bash
   $ cd ragflow/docker
   
-   # Opsional: gunakan tag stabil (lihat releases: https://github.com/infiniflow/ragflow/releases), contoh: git checkout v0.22.1
+   # git checkout v0.22.1
+   # Opsional: gunakan tag stabil (lihat releases: https://github.com/infiniflow/ragflow/releases)
   # This steps ensures the **entrypoint.sh** file in the code matches the Docker image version.

   # Use CPU for DeepDoc tasks:
--- a/README_ja.md
+++ b/README_ja.md
@ -172,7 +172,8 @@
 ```bash
   $ cd ragflow/docker

-   # 任意: 安定版タグを利用 (一覧: https://github.com/infiniflow/ragflow/releases) 例: git checkout v0.22.1
+   # git checkout v0.22.1
+   # 任意: 安定版タグを利用 (一覧: https://github.com/infiniflow/ragflow/releases)
   # この手順は、コード内の entrypoint.sh ファイルが Docker イメージのバージョンと一致していることを確認します。

   # Use CPU for DeepDoc tasks:
--- a/README_ko.md
+++ b/README_ko.md
@ -174,7 +174,8 @@
   ```bash
   $ cd ragflow/docker
   
-   # Optional: use a stable tag (see releases: https://github.com/infiniflow/ragflow/releases), e.g.: git checkout v0.22.1
+   # git checkout v0.22.1
+   # Optional: use a stable tag (see releases: https://github.com/infiniflow/ragflow/releases)
   # 이 단계는 코드의 entrypoint.sh 파일이 Docker 이미지 버전과 일치하도록 보장합니다.

   # Use CPU for DeepDoc tasks:
--- a/README_pt_br.md
+++ b/README_pt_br.md
@ -192,7 +192,8 @@ Experimente nossa demo em [https://demo.ragflow.io](https://demo.ragflow.io).
 ```bash
   $ cd ragflow/docker
   
-   # Opcional: use uma tag estável (veja releases: https://github.com/infiniflow/ragflow/releases), ex.: git checkout v0.22.1
+   # git checkout v0.22.1
+   # Opcional: use uma tag estável (veja releases: https://github.com/infiniflow/ragflow/releases)
   # Esta etapa garante que o arquivo entrypoint.sh no código corresponda à versão da imagem do Docker.

   # Use CPU for DeepDoc tasks:
--- a/README_tzh.md
+++ b/README_tzh.md
@ -191,7 +191,8 @@
 ```bash
   $ cd ragflow/docker
   
-   # 可選：使用穩定版標籤（查看發佈：https://github.com/infiniflow/ragflow/releases），例：git checkout v0.22.1
+   # git checkout v0.22.1
+   # 可選：使用穩定版標籤（查看發佈：https://github.com/infiniflow/ragflow/releases）
   # 此步驟確保程式碼中的 entrypoint.sh 檔案與 Docker 映像版本一致。

   # Use CPU for DeepDoc tasks:
--- a/README_zh.md
+++ b/README_zh.md
@ -192,7 +192,8 @@
   ```bash
   $ cd ragflow/docker
   
-   # 可选：使用稳定版本标签（查看发布：https://github.com/infiniflow/ragflow/releases），例如：git checkout v0.22.1
+   # git checkout v0.22.1
+   # 可选：使用稳定版本标签（查看发布：https://github.com/infiniflow/ragflow/releases）
   # 这一步确保代码中的 entrypoint.sh 文件与 Docker 镜像的版本保持一致。

   # Use CPU for DeepDoc tasks:
--- a/admin/server/auth.py
+++ b/admin/server/auth.py
@ -31,7 +31,7 @@ from common.constants import ActiveEnum, StatusEnum
 from api.utils.crypt import decrypt
 from common.misc_utils import get_uuid
 from common.time_utils import current_timestamp, datetime_format, get_format_time
-from common.connection_utils import construct_response
+from common.connection_utils import sync_construct_response
 from common import settings


@ -130,7 +130,7 @@ def login_admin(email: str, password: str):
    user.last_login_time = get_format_time()
    user.save()
    msg = "Welcome back!"
-    return construct_response(data=resp, auth=user.get_id(), message=msg)
+    return sync_construct_response(data=resp, auth=user.get_id(), message=msg)


 def check_admin(username: str, password: str):
--- a/api/apps/kb_app.py
+++ b/api/apps/kb_app.py
@ -886,6 +886,7 @@ async def check_embedding():

        try:
            v, _ = emb_mdl.encode([title, txt_in])
+            assert len(v[1]) == len(ck["vector"]), f"The dimension ({len(v[1])}) of given embedding model is different from the original ({len(ck['vector'])})"
            sim_content = _cos_sim(v[1], ck["vector"])
            title_w = 0.1
            qv_mix = title_w * v[0] + (1 - title_w) * v[1]
@ -895,8 +896,8 @@ async def check_embedding():
            if sim_mix > sim:
                sim = sim_mix
                mode = "title+content"
-        except Exception:
-            return get_error_data_result(message="embedding failure")
+        except Exception as e:
+            return get_error_data_result(message=f"Embedding failure. {e}")

        eff_sims.append(sim)
        results.append({
--- a/api/db/services/canvas_service.py
+++ b/api/db/services/canvas_service.py
@ -223,6 +223,10 @@ def completion(tenant_id, agent_id, session_id=None, **kwargs):
        ans["session_id"] = session_id
        if ans["event"] == "message":
            txt += ans["data"]["content"]
+            if ans["data"].get("start_to_think", False):
+                txt += "<think>"
+            elif ans["data"].get("end_to_think", False):
+                txt += "</think>"
        yield "data:" + json.dumps(ans, ensure_ascii=False) + "\n\n"

    conv.message.append({"role": "assistant", "content": txt, "created_at": time.time(), "id": message_id})
--- a/common/connection_utils.py
+++ b/common/connection_utils.py
@ -120,3 +120,23 @@ async def construct_response(code=RetCode.SUCCESS, message="success", data=None,
    response.headers["Access-Control-Allow-Headers"] = "*"
    response.headers["Access-Control-Expose-Headers"] = "Authorization"
    return response
+
+
+def sync_construct_response(code=RetCode.SUCCESS, message="success", data=None, auth=None):
+    import flask
+    result_dict = {"code": code, "message": message, "data": data}
+    response_dict = {}
+    for key, value in result_dict.items():
+        if value is None and key != "code":
+            continue
+        else:
+            response_dict[key] = value
+    response = flask.make_response(flask.jsonify(response_dict))
+    if auth:
+        response.headers["Authorization"] = auth
+    response.headers["Access-Control-Allow-Origin"] = "*"
+    response.headers["Access-Control-Allow-Method"] = "*"
+    response.headers["Access-Control-Allow-Headers"] = "*"
+    response.headers["Access-Control-Allow-Headers"] = "*"
+    response.headers["Access-Control-Expose-Headers"] = "Authorization"
+    return response
--- a/common/settings.py
+++ b/common/settings.py
@ -27,6 +27,7 @@ from common.constants import SVR_QUEUE_NAME, Storage
 import rag.utils
 import rag.utils.es_conn
 import rag.utils.infinity_conn
+import rag.utils.ob_conn
 import rag.utils.opensearch_conn
 from rag.utils.azure_sas_conn import RAGFlowAzureSasBlob
 from rag.utils.azure_spn_conn import RAGFlowAzureSpnBlob
@ -103,6 +104,7 @@ INFINITY = {}
 AZURE = {}
 S3 = {}
 MINIO = {}
+OB = {}
 OSS = {}
 OS = {}

@ -227,7 +229,7 @@ def init_settings():
    FEISHU_OAUTH = get_base_config("oauth", {}).get("feishu")
    OAUTH_CONFIG = get_base_config("oauth", {})

-    global DOC_ENGINE, docStoreConn, ES, OS, INFINITY
+    global DOC_ENGINE, docStoreConn, ES, OB, OS, INFINITY
    DOC_ENGINE = os.environ.get("DOC_ENGINE", "elasticsearch")
    # DOC_ENGINE = os.environ.get('DOC_ENGINE', "opensearch")
    lower_case_doc_engine = DOC_ENGINE.lower()
@ -240,6 +242,9 @@ def init_settings():
    elif lower_case_doc_engine == "opensearch":
        OS = get_base_config("os", {})
        docStoreConn = rag.utils.opensearch_conn.OSConnection()
+    elif lower_case_doc_engine == "oceanbase":
+        OB = get_base_config("oceanbase", {})
+        docStoreConn = rag.utils.ob_conn.OBConnection()
    else:
        raise Exception(f"Not supported doc engine: {DOC_ENGINE}")

--- a/common/token_utils.py
+++ b/common/token_utils.py
@ -35,6 +35,12 @@ def num_tokens_from_string(string: str) -> int:
        return 0

 def total_token_count_from_response(resp):
+    """
+    Extract token count from LLM response in various formats.
+
+    Handles None responses and different response structures from various LLM providers.
+    Returns 0 if token count cannot be determined.
+    """
    if resp is None:
        return 0

@ -50,19 +56,19 @@ def total_token_count_from_response(resp):
        except Exception:
            pass

-    if 'usage' in resp and 'total_tokens' in resp['usage']:
+    if isinstance(resp, dict) and 'usage' in resp and 'total_tokens' in resp['usage']:
        try:
            return resp["usage"]["total_tokens"]
        except Exception:
            pass

-    if 'usage' in resp and 'input_tokens' in resp['usage'] and 'output_tokens' in resp['usage']:
+    if isinstance(resp, dict) and 'usage' in resp and 'input_tokens' in resp['usage'] and 'output_tokens' in resp['usage']:
        try:
            return resp["usage"]["input_tokens"] + resp["usage"]["output_tokens"]
        except Exception:
            pass

-    if 'meta' in resp and 'tokens' in resp['meta'] and 'input_tokens' in resp['meta']['tokens'] and 'output_tokens' in resp['meta']['tokens']:
+    if isinstance(resp, dict) and 'meta' in resp and 'tokens' in resp['meta'] and 'input_tokens' in resp['meta']['tokens'] and 'output_tokens' in resp['meta']['tokens']:
        try:
            return resp["meta"]["tokens"]["input_tokens"] + resp["meta"]["tokens"]["output_tokens"]
        except Exception:
--- a/conf/llm_factories.json
+++ b/conf/llm_factories.json
@ -4848,7 +4848,7 @@
            ]
        },
        {
-            "name": "JieKou.AI",
+            "name": "Jiekou.AI",
            "logo": "",
            "tags": "LLM,TEXT EMBEDDING,TEXT RE-RANK",
            "status": "1",
--- a/conf/service_conf.yaml
+++ b/conf/service_conf.yaml
@ -28,6 +28,14 @@ os:
 infinity:
  uri: 'localhost:23817'
  db_name: 'default_db'
+oceanbase:
+  scheme: 'oceanbase' # set 'mysql' to create connection using mysql config
+  config:
+    db_name: 'test'
+    user: 'root@ragflow'
+    password: 'infini_rag_flow'
+    host: 'localhost'
+    port: 2881
 redis:
  db: 1
  password: 'infini_rag_flow'
@ -139,5 +147,3 @@ user_default_llm:
 #  secret_id: 'tencent_secret_id'
 #  secret_key: 'tencent_secret_key'
 #  region: 'tencent_region'
-#  table_result_type: '1'
-#  markdown_image_response_type: '1'
--- a/deepdoc/parser/tcadp_parser.py
+++ b/deepdoc/parser/tcadp_parser.py
@ -192,12 +192,16 @@ class TencentCloudAPIClient:


 class TCADPParser(RAGFlowPdfParser):
-    def __init__(self, secret_id: str = None, secret_key: str = None, region: str = "ap-guangzhou"):
+    def __init__(self, secret_id: str = None, secret_key: str = None, region: str = "ap-guangzhou", 
+                 table_result_type: str = None, markdown_image_response_type: str = None):
        super().__init__()
        
        # First initialize logger
        self.logger = logging.getLogger(self.__class__.__name__)
        
+        # Log received parameters
+        self.logger.info(f"[TCADP] Initializing with parameters - table_result_type: {table_result_type}, markdown_image_response_type: {markdown_image_response_type}")
+        
        # Priority: read configuration from RAGFlow configuration system (service_conf.yaml)
        try:
            tcadp_parser = get_base_config("tcadp_config", {})
@ -205,14 +209,30 @@ class TCADPParser(RAGFlowPdfParser):
                self.secret_id = secret_id or tcadp_parser.get("secret_id")
                self.secret_key = secret_key or tcadp_parser.get("secret_key")
                self.region = region or tcadp_parser.get("region", "ap-guangzhou")
-                self.table_result_type = tcadp_parser.get("table_result_type", "1")
-                self.markdown_image_response_type = tcadp_parser.get("markdown_image_response_type", "1")
-                self.logger.info("[TCADP] Configuration read from service_conf.yaml")
+                # Set table_result_type and markdown_image_response_type from config or parameters
+                self.table_result_type = table_result_type if table_result_type is not None else tcadp_parser.get("table_result_type", "1")
+                self.markdown_image_response_type = markdown_image_response_type if markdown_image_response_type is not None else tcadp_parser.get("markdown_image_response_type", "1")
+                
            else:
                self.logger.error("[TCADP] Please configure tcadp_config in service_conf.yaml first")
+                # If config file is empty, use provided parameters or defaults
+                self.secret_id = secret_id
+                self.secret_key = secret_key
+                self.region = region or "ap-guangzhou"
+                self.table_result_type = table_result_type if table_result_type is not None else "1"
+                self.markdown_image_response_type = markdown_image_response_type if markdown_image_response_type is not None else "1"

        except ImportError:
            self.logger.info("[TCADP] Configuration module import failed")
+            # If config file is not available, use provided parameters or defaults
+            self.secret_id = secret_id
+            self.secret_key = secret_key
+            self.region = region or "ap-guangzhou"
+            self.table_result_type = table_result_type if table_result_type is not None else "1"
+            self.markdown_image_response_type = markdown_image_response_type if markdown_image_response_type is not None else "1"
+
+        # Log final values
+        self.logger.info(f"[TCADP] Final values - table_result_type: {self.table_result_type}, markdown_image_response_type: {self.markdown_image_response_type}")

        if not self.secret_id or not self.secret_key:
            raise ValueError("[TCADP] Please set Tencent Cloud API keys, configure tcadp_config in service_conf.yaml")
@ -400,6 +420,8 @@ class TCADPParser(RAGFlowPdfParser):
                        "TableResultType": self.table_result_type,
                        "MarkdownImageResponseType": self.markdown_image_response_type
                    }
+                    
+                    self.logger.info(f"[TCADP] API request config - TableResultType: {self.table_result_type}, MarkdownImageResponseType: {self.markdown_image_response_type}")

                    result = client.reconstruct_document_sse(
                        file_type=file_type, 
--- a/docker/.env
+++ b/docker/.env
@ -7,6 +7,7 @@
 # Available options:
 # - `elasticsearch` (default)
 # - `infinity` (https://github.com/infiniflow/infinity)
+# - `oceanbase` (https://github.com/oceanbase/oceanbase)
 # - `opensearch` (https://github.com/opensearch-project/OpenSearch)
 DOC_ENGINE=${DOC_ENGINE:-elasticsearch}

@ -62,6 +63,27 @@ INFINITY_THRIFT_PORT=23817
 INFINITY_HTTP_PORT=23820
 INFINITY_PSQL_PORT=5432

+# The hostname where the OceanBase service is exposed
+OCEANBASE_HOST=oceanbase
+# The port used to expose the OceanBase service
+OCEANBASE_PORT=2881
+# The username for OceanBase
+OCEANBASE_USER=root@ragflow
+# The password for OceanBase
+OCEANBASE_PASSWORD=infini_rag_flow
+# The doc database of the OceanBase service to use
+OCEANBASE_DOC_DBNAME=ragflow_doc
+
+# OceanBase container configuration
+OB_CLUSTER_NAME=${OB_CLUSTER_NAME:-ragflow}
+OB_TENANT_NAME=${OB_TENANT_NAME:-ragflow}
+OB_SYS_PASSWORD=${OCEANBASE_PASSWORD:-infini_rag_flow}
+OB_TENANT_PASSWORD=${OCEANBASE_PASSWORD:-infini_rag_flow}
+OB_MEMORY_LIMIT=${OB_MEMORY_LIMIT:-10G}
+OB_SYSTEM_MEMORY=${OB_SYSTEM_MEMORY:-2G}
+OB_DATAFILE_SIZE=${OB_DATAFILE_SIZE:-20G}
+OB_LOG_DISK_SIZE=${OB_LOG_DISK_SIZE:-20G}
+
 # The password for MySQL.
 MYSQL_PASSWORD=infini_rag_flow
 # The hostname where the MySQL service is exposed
--- a/docker/README.md
+++ b/docker/README.md
@ -138,6 +138,15 @@ The [.env](./.env) file contains important environment variables for Docker.
  - `password`: The password for MinIO.
  - `host`: The MinIO serving IP *and* port inside the Docker container. Defaults to `minio:9000`.

+- `oceanbase`
+  - `scheme`: The connection scheme. Set to `mysql` to use mysql config, or other values to use config below.
+  - `config`:
+    - `db_name`: The OceanBase database name.
+    - `user`: The username for OceanBase.
+    - `password`: The password for OceanBase.
+    - `host`: The hostname of the OceanBase service.
+    - `port`: The port of OceanBase.
+
 - `oss`
  - `access_key`: The access key ID used to authenticate requests to the OSS service.
  - `secret_key`: The secret access key used to authenticate requests to the OSS service.
--- a/docker/docker-compose-base.yml
+++ b/docker/docker-compose-base.yml
@ -72,7 +72,7 @@ services:
  infinity:
    profiles:
      - infinity
-    image: infiniflow/infinity:v0.6.5
+    image: infiniflow/infinity:v0.6.6
    volumes:
      - infinity_data:/var/infinity
      - ./infinity_conf.toml:/infinity_conf.toml
@ -96,6 +96,31 @@ services:
      retries: 120
    restart: on-failure

+  oceanbase:
+    profiles:
+      - oceanbase
+    image: oceanbase/oceanbase-ce:4.4.1.0-100000032025101610
+    volumes:
+      - ./oceanbase/data:/root/ob
+      - ./oceanbase/conf:/root/.obd/cluster
+      - ./oceanbase/init.d:/root/boot/init.d
+    ports:
+      - ${OCEANBASE_PORT:-2881}:2881
+    env_file: .env
+    environment:
+      - MODE=normal
+      - OB_SERVER_IP=127.0.0.1
+    mem_limit: ${MEM_LIMIT}
+    healthcheck:
+      test: [ 'CMD-SHELL', 'obclient -h127.0.0.1 -P2881 -uroot@${OB_TENANT_NAME:-ragflow} -p${OB_TENANT_PASSWORD:-infini_rag_flow} -e "CREATE DATABASE IF NOT EXISTS ${OCEANBASE_DOC_DBNAME:-ragflow_doc};"' ]
+      interval: 10s
+      retries: 30
+      start_period: 30s
+      timeout: 10s
+    networks:
+      - ragflow
+    restart: on-failure
+
  sandbox-executor-manager:
    profiles:
      - sandbox
@ -154,7 +179,7 @@ services:

  minio:
    image: quay.io/minio/minio:RELEASE.2025-06-13T11-33-47Z
-    command: server --console-address ":9001" /data
+    command: ["server", "--console-address", ":9001", "/data"]
    ports:
      - ${MINIO_PORT}:9000
      - ${MINIO_CONSOLE_PORT}:9001
@ -176,7 +201,7 @@ services:
  redis:
    # swr.cn-north-4.myhuaweicloud.com/ddn-k8s/docker.io/valkey/valkey:8
    image: valkey/valkey:8
-    command: redis-server --requirepass ${REDIS_PASSWORD} --maxmemory 128mb --maxmemory-policy allkeys-lru
+    command: ["redis-server", "--requirepass", "${REDIS_PASSWORD}", "--maxmemory", "128mb", "--maxmemory-policy", "allkeys-lru"]
    env_file: .env
    ports:
      - ${REDIS_PORT}:6379
@ -256,6 +281,8 @@ volumes:
    driver: local
  infinity_data:
    driver: local
+  ob_data:
+    driver: local
  mysql_data:
    driver: local
  minio_data:
--- a/docker/infinity_conf.toml
+++ b/docker/infinity_conf.toml
@ -1,5 +1,5 @@
 [general]
-version                  = "0.6.5"
+version                  = "0.6.6"
 time_zone                = "utc-8"

 [network]
--- a/docker/oceanbase/init.d/vec_memory.sql
+++ b/docker/oceanbase/init.d/vec_memory.sql
@ -0,0 +1 @@
+ALTER SYSTEM SET ob_vector_memory_limit_percentage = 30;
--- a/docker/service_conf.yaml.template
+++ b/docker/service_conf.yaml.template
@ -28,6 +28,14 @@ os:
 infinity:
  uri: '${INFINITY_HOST:-infinity}:23817'
  db_name: 'default_db'
+oceanbase:
+  scheme: 'oceanbase' # set 'mysql' to create connection using mysql config
+  config:
+    db_name: '${OCEANBASE_DOC_DBNAME:-test}'
+    user: '${OCEANBASE_USER:-root@ragflow}'
+    password: '${OCEANBASE_PASSWORD:-infini_rag_flow}'
+    host: '${OCEANBASE_HOST:-oceanbase}'
+    port: ${OCEANBASE_PORT:-2881}
 redis:
  db: 1
  password: '${REDIS_PASSWORD:-infini_rag_flow}'
@ -142,5 +150,3 @@ user_default_llm:
 #   secret_id: '${TENCENT_SECRET_ID}'
 #   secret_key: '${TENCENT_SECRET_KEY}'
 #   region: '${TENCENT_REGION}'
-#   table_result_type: '1'
-#   markdown_image_response_type: '1'
--- a/docs/release_notes.md
+++ b/docs/release_notes.md
@ -7,25 +7,29 @@ slug: /release_notes

 Key features, improvements and bug fixes in the latest releases.

+## v0.22.1
+
 Released on November 19, 2025.

-### Bug Fixes
-
- **Knowledge Base Embedding Models**: Fixed an issue where knowledge base embedding models became unavailable since v0.22.0.
- **Document Parsing**: Fixing image merging issues.
- **Chat History**: Fixed a bug where images and text were not correctly displayed together in historical chat records.
-
 ### Improvements

- **Agent**:
-  - Added support for exporting Agent outputs in Word formats.
-  - Introduced new list operations and enhanced the **Variable Aggregator** component capabilities.
- **Data Sources**:
-  - Expanded data source support to include S3-compatible storage services.
-  - Added new integration support for JIRA.
- **User Profile**: Optimized and beautified the layout of the personal center interface.
+- Agent:
+  - Supports exporting Agent outputs in Word or Markdown formats.
+  - Adds a **List operations** component.
+  - Adds a **Variable aggregator** component.
+- Data sources:
+  - Supports S3-compatible data sources, e.g., MinIO.
+  - Adds data synchronization with JIRA.
+- Continues the redesign of the **Profile** page layouts.
+- Upgrades the Flask web framework from synchronous to asynchronous, increasing concurrency and preventing blocking issues caused when requesting upstream LLM services.

-### Support new models
+### Fixed issues
+
+- A v0.22.0 issue: Users failed to parse uploaded files or switch embedding model in a dataset containing parsed files using a built-in model from a `-full` RAGFlow edition.
+- Image concatenated in Word documents. [#11310](https://github.com/infiniflow/ragflow/pull/11310)
+- Mixed images and text were not correctly displayed in the chat history.
+
+### Newly supported models

 - Gemini 3 Pro Preview

@ -99,7 +103,7 @@ Released on October 15, 2025.
 - Redesigns RAGFlow's Login and Registration pages.
 - Upgrades RAGFlow's document engine Infinity to v0.6.0.

-### Support new models
+### Newly supported models

 - Tongyi Qwen 3 series
 - Claude Sonnet 4.5
@ -122,7 +126,7 @@ Released on September 10, 2025.
  - **Execute SQL** component enhanced: Replaces the original variable reference component with a text input field, allowing users to write free-form SQL queries and reference variables. See [here](./guides/agent/agent_component_reference/execute_sql.md).
 - Chat: Re-enables **Reasoning** and **Cross-language search**.

-### Support new models
+### Newly supported models

 - Meituan LongCat
 - Kimi: kimi-k2-turbo-preview and kimi-k2-0905-preview
@ -161,7 +165,7 @@ Released on August 27, 2025.
  - Improves Markdown file parsing, with AST support to avoid unintended chunking.
  - Enhances HTML parsing, supporting bs4-based HTML tag traversal.

-### Support new models
+### Newly supported models

 ZHIPU GLM-4.5

@ -222,7 +226,7 @@ Released on August 8, 2025.
 - The **Retrieval** component now supports the dynamic specification of dataset names using variables.
 - The user interface now includes a French language option.

-### Support new models
+### Newly supported models

 - GPT-5
 - Claude 4.1
@ -286,7 +290,7 @@ Released on June 23, 2025.
 - Added support for models installed via Ollama or VLLM when creating a dataset through the API. [#8069](https://github.com/infiniflow/ragflow/pull/8069)
 - Enabled role-based authentication for S3 bucket access. [#8149](https://github.com/infiniflow/ragflow/pull/8149)

-### Support new models
+### Newly supported models

 - Qwen 3 Embedding. [#8184](https://github.com/infiniflow/ragflow/pull/8184) 
 - Voyage Multimodal 3. [#7987](https://github.com/infiniflow/ragflow/pull/7987)
--- a/helm/values.yaml
+++ b/helm/values.yaml
@ -96,7 +96,7 @@ ragflow:
 infinity:
  image:
    repository: infiniflow/infinity
-    tag: v0.6.5
+    tag: v0.6.6
    pullPolicy: IfNotPresent
    pullSecrets: []
  storage:
--- a/pyproject.toml
+++ b/pyproject.toml
@ -49,7 +49,7 @@ dependencies = [
    "html-text==0.6.2",
    "httpx[socks]>=0.28.1,<0.29.0",
    "huggingface-hub>=0.25.0,<0.26.0",
-    "infinity-sdk==0.6.5",
+    "infinity-sdk==0.6.6",
    "infinity-emb>=0.0.66,<0.0.67",
    "itsdangerous==2.1.2",
    "json-repair==0.35.0",
@ -149,6 +149,7 @@ dependencies = [
    "captcha>=0.7.1",
    "pip>=25.2",
    "pypandoc>=1.16",
+    "pyobvector==0.2.18",
 ]

 [dependency-groups]
--- a/rag/app/naive.py
+++ b/rag/app/naive.py
@ -116,7 +116,7 @@ def by_plaintext(filename, binary=None, from_page=0, to_page=100000, callback=No
    else:
        vision_model = LLMBundle(kwargs["tenant_id"], LLMType.IMAGE2TEXT, llm_name=kwargs.get("layout_recognizer", ""), lang=kwargs.get("lang", "Chinese"))
        pdf_parser = VisionParser(vision_model=vision_model, **kwargs)
-    
+
    sections, tables = pdf_parser(
        filename if not binary else binary,
        from_page=from_page,
@ -504,7 +504,7 @@ class Markdown(MarkdownParser):

        return images if images else None

-    def __call__(self, filename, binary=None, separate_tables=True,delimiter=None):
+    def __call__(self, filename, binary=None, separate_tables=True, delimiter=None):
        if binary:
            encoding = find_codec(binary)
            txt = binary.decode(encoding, errors="ignore")
@ -602,7 +602,7 @@ def chunk(filename, binary=None, from_page=0, to_page=100000,
        _SerializedRelationships.load_from_xml = load_from_xml_v2
        sections, tables = Docx()(filename, binary)

-        tables=vision_figure_parser_docx_wrapper(sections=sections,tbls=tables,callback=callback,**kwargs)
+        tables = vision_figure_parser_docx_wrapper(sections=sections, tbls=tables, callback=callback, **kwargs)

        res = tokenize_table(tables, doc, is_english)
        callback(0.8, "Finish parsing.")
@ -653,18 +653,47 @@ def chunk(filename, binary=None, from_page=0, to_page=100000,

        if name in ["tcadp", "docling", "mineru"]:
            parser_config["chunk_token_num"] = 0
-        
+
        res = tokenize_table(tables, doc, is_english)
        callback(0.8, "Finish parsing.")

    elif re.search(r"\.(csv|xlsx?)$", filename, re.IGNORECASE):
        callback(0.1, "Start to parse.")
-        excel_parser = ExcelParser()
-        if parser_config.get("html4excel"):
-            sections = [(_, "") for _ in excel_parser.html(binary, 12) if _]
+
+        # Check if tcadp_parser is selected for spreadsheet files
+        layout_recognizer = parser_config.get("layout_recognize", "DeepDOC")
+        if layout_recognizer == "TCADP Parser":
+            table_result_type = parser_config.get("table_result_type", "1")
+            markdown_image_response_type = parser_config.get("markdown_image_response_type", "1")
+            tcadp_parser = TCADPParser(
+                table_result_type=table_result_type,
+                markdown_image_response_type=markdown_image_response_type
+            )
+            if not tcadp_parser.check_installation():
+                callback(-1, "TCADP parser not available. Please check Tencent Cloud API configuration.")
+                return res
+
+            # Determine file type based on extension
+            file_type = "XLSX" if re.search(r"\.xlsx?$", filename, re.IGNORECASE) else "CSV"
+
+            sections, tables = tcadp_parser.parse_pdf(
+                filepath=filename,
+                binary=binary,
+                callback=callback,
+                output_dir=os.environ.get("TCADP_OUTPUT_DIR", ""),
+                file_type=file_type
+            )
+            parser_config["chunk_token_num"] = 0
+            res = tokenize_table(tables, doc, is_english)
+            callback(0.8, "Finish parsing.")
        else:
-            sections = [(_, "") for _ in excel_parser(binary) if _]
-        parser_config["chunk_token_num"] = 12800
+            # Default DeepDOC parser
+            excel_parser = ExcelParser()
+            if parser_config.get("html4excel"):
+                sections = [(_, "") for _ in excel_parser.html(binary, 12) if _]
+            else:
+                sections = [(_, "") for _ in excel_parser(binary) if _]
+            parser_config["chunk_token_num"] = 12800

    elif re.search(r"\.(txt|py|js|java|c|cpp|h|php|go|ts|sh|cs|kt|sql)$", filename, re.IGNORECASE):
        callback(0.1, "Start to parse.")
@ -676,7 +705,7 @@ def chunk(filename, binary=None, from_page=0, to_page=100000,
    elif re.search(r"\.(md|markdown)$", filename, re.IGNORECASE):
        callback(0.1, "Start to parse.")
        markdown_parser = Markdown(int(parser_config.get("chunk_token_num", 128)))
-        sections, tables = markdown_parser(filename, binary, separate_tables=False,delimiter=parser_config.get("delimiter", "\n!?;。；！？"))
+        sections, tables = markdown_parser(filename, binary, separate_tables=False, delimiter=parser_config.get("delimiter", "\n!?;。；！？"))

        try:
            vision_model = LLMBundle(kwargs["tenant_id"], LLMType.IMAGE2TEXT)
--- a/rag/flow/parser/parser.py
+++ b/rag/flow/parser/parser.py
@ -16,6 +16,7 @@ import io
 import json
 import os
 import random
+import re
 from functools import partial

 import trio
@ -83,6 +84,7 @@ class ParserParam(ProcessParamBase):
                "output_format": "json",
            },
            "spreadsheet": {
+                "parse_method": "deepdoc",  # deepdoc/tcadp_parser
                "output_format": "html",
                "suffix": [
                    "xls",
@ -102,8 +104,10 @@ class ParserParam(ProcessParamBase):
                "output_format": "json",
            },
            "slides": {
+                "parse_method": "deepdoc",  # deepdoc/tcadp_parser
                "suffix": [
                    "pptx",
+                    "ppt"
                ],
                "output_format": "json",
            },
@ -245,7 +249,12 @@ class Parser(ProcessBase):
                bboxes.append(box)
        elif conf.get("parse_method").lower() == "tcadp parser":
            # ADP is a document parsing tool using Tencent Cloud API
-            tcadp_parser = TCADPParser()
+            table_result_type = conf.get("table_result_type", "1")
+            markdown_image_response_type = conf.get("markdown_image_response_type", "1")
+            tcadp_parser = TCADPParser(
+                table_result_type=table_result_type,
+                markdown_image_response_type=markdown_image_response_type
+            )
            sections, _ = tcadp_parser.parse_pdf(
                filepath=name,
                binary=blob,
@ -301,14 +310,86 @@ class Parser(ProcessBase):
        self.callback(random.randint(1, 5) / 100.0, "Start to work on a Spreadsheet.")
        conf = self._param.setups["spreadsheet"]
        self.set_output("output_format", conf["output_format"])
-        spreadsheet_parser = ExcelParser()
-        if conf.get("output_format") == "html":
-            htmls = spreadsheet_parser.html(blob, 1000000000)
-            self.set_output("html", htmls[0])
-        elif conf.get("output_format") == "json":
-            self.set_output("json", [{"text": txt} for txt in spreadsheet_parser(blob) if txt])
-        elif conf.get("output_format") == "markdown":
-            self.set_output("markdown", spreadsheet_parser.markdown(blob))
+
+        parse_method = conf.get("parse_method", "deepdoc")
+
+        # Handle TCADP parser
+        if parse_method.lower() == "tcadp parser":
+            table_result_type = conf.get("table_result_type", "1")
+            markdown_image_response_type = conf.get("markdown_image_response_type", "1")
+            tcadp_parser = TCADPParser(
+                table_result_type=table_result_type,
+                markdown_image_response_type=markdown_image_response_type
+            )
+            if not tcadp_parser.check_installation():
+                raise RuntimeError("TCADP parser not available. Please check Tencent Cloud API configuration.")
+
+            # Determine file type based on extension
+            if re.search(r"\.xlsx?$", name, re.IGNORECASE):
+                file_type = "XLSX"
+            else:
+                file_type = "CSV"
+
+            self.callback(0.2, f"Using TCADP parser for {file_type} file.")
+            sections, tables = tcadp_parser.parse_pdf(
+                filepath=name,
+                binary=blob,
+                callback=self.callback,
+                file_type=file_type,
+                file_start_page=1,
+                file_end_page=1000
+            )
+
+            # Process TCADP parser output based on configured output_format
+            output_format = conf.get("output_format", "html")
+
+            if output_format == "html":
+                # For HTML output, combine sections and tables into HTML
+                html_content = ""
+                for section, position_tag in sections:
+                    if section:
+                        html_content += section + "\n"
+                for table in tables:
+                    if table:
+                        html_content += table + "\n"
+
+                self.set_output("html", html_content)
+
+            elif output_format == "json":
+                # For JSON output, create a list of text items
+                result = []
+                # Add sections as text
+                for section, position_tag in sections:
+                    if section:
+                        result.append({"text": section})
+                # Add tables as text
+                for table in tables:
+                    if table:
+                        result.append({"text": table})
+
+                self.set_output("json", result)
+
+            elif output_format == "markdown":
+                # For markdown output, combine into markdown
+                md_content = ""
+                for section, position_tag in sections:
+                    if section:
+                        md_content += section + "\n\n"
+                for table in tables:
+                    if table:
+                        md_content += table + "\n\n"
+
+                self.set_output("markdown", md_content)
+        else:
+            # Default DeepDOC parser
+            spreadsheet_parser = ExcelParser()
+            if conf.get("output_format") == "html":
+                htmls = spreadsheet_parser.html(blob, 1000000000)
+                self.set_output("html", htmls[0])
+            elif conf.get("output_format") == "json":
+                self.set_output("json", [{"text": txt} for txt in spreadsheet_parser(blob) if txt])
+            elif conf.get("output_format") == "markdown":
+                self.set_output("markdown", spreadsheet_parser.markdown(blob))

    def _word(self, name, blob):
        self.callback(random.randint(1, 5) / 100.0, "Start to work on a Word Processor Document")
@ -326,22 +407,69 @@ class Parser(ProcessBase):
            self.set_output("markdown", markdown_text)

    def _slides(self, name, blob):
-        from deepdoc.parser.ppt_parser import RAGFlowPptParser as ppt_parser
-
        self.callback(random.randint(1, 5) / 100.0, "Start to work on a PowerPoint Document")

        conf = self._param.setups["slides"]
        self.set_output("output_format", conf["output_format"])

-        ppt_parser = ppt_parser()
-        txts = ppt_parser(blob, 0, 100000, None)
+        parse_method = conf.get("parse_method", "deepdoc")

-        sections = [{"text": section} for section in txts if section.strip()]
+        # Handle TCADP parser
+        if parse_method.lower() == "tcadp parser":
+            table_result_type = conf.get("table_result_type", "1")
+            markdown_image_response_type = conf.get("markdown_image_response_type", "1")
+            tcadp_parser = TCADPParser(
+                table_result_type=table_result_type,
+                markdown_image_response_type=markdown_image_response_type
+            )
+            if not tcadp_parser.check_installation():
+                raise RuntimeError("TCADP parser not available. Please check Tencent Cloud API configuration.")

-        # json
-        assert conf.get("output_format") == "json", "have to be json for ppt"
-        if conf.get("output_format") == "json":
-            self.set_output("json", sections)
+            # Determine file type based on extension
+            if re.search(r"\.pptx?$", name, re.IGNORECASE):
+                file_type = "PPTX"
+            else:
+                file_type = "PPT"
+
+            self.callback(0.2, f"Using TCADP parser for {file_type} file.")
+
+            sections, tables = tcadp_parser.parse_pdf(
+                filepath=name,
+                binary=blob,
+                callback=self.callback,
+                file_type=file_type,
+                file_start_page=1,
+                file_end_page=1000
+            )
+
+            # Process TCADP parser output - PPT only supports json format
+            output_format = conf.get("output_format", "json")
+            if output_format == "json":
+                # For JSON output, create a list of text items
+                result = []
+                # Add sections as text
+                for section, position_tag in sections:
+                    if section:
+                        result.append({"text": section})
+                # Add tables as text
+                for table in tables:
+                    if table:
+                        result.append({"text": table})
+
+                self.set_output("json", result)
+        else:
+            # Default DeepDOC parser (supports .pptx format)
+            from deepdoc.parser.ppt_parser import RAGFlowPptParser as ppt_parser
+
+            ppt_parser = ppt_parser()
+            txts = ppt_parser(blob, 0, 100000, None)
+
+            sections = [{"text": section} for section in txts if section.strip()]
+
+            # json
+            assert conf.get("output_format") == "json", "have to be json for ppt"
+            if conf.get("output_format") == "json":
+                self.set_output("json", sections)

    def _markdown(self, name, blob):
        from functools import reduce
@ -579,6 +707,7 @@ class Parser(ProcessBase):
            "video": self._video,
            "email": self._email,
        }
+
        try:
            from_upstream = ParserFromUpstream.model_validate(kwargs)
        except Exception as e:
--- a/rag/llm/rerank_model.py
+++ b/rag/llm/rerank_model.py
@ -234,7 +234,11 @@ class CoHereRerank(Base):
    def __init__(self, key, model_name, base_url=None):
        from cohere import Client

-        self.client = Client(api_key=key, base_url=base_url)
+        # Only pass base_url if it's a non-empty string, otherwise use default Cohere API endpoint
+        client_kwargs = {"api_key": key}
+        if base_url and base_url.strip():
+            client_kwargs["base_url"] = base_url
+        self.client = Client(**client_kwargs)
        self.model_name = model_name.split("___")[0]

    def similarity(self, query: str, texts: list):
--- a/rag/nlp/query.py
+++ b/rag/nlp/query.py
@ -83,6 +83,7 @@ class FulltextQueryer:
        return txt

    def question(self, txt, tbl="qa", min_match: float = 0.6):
+        original_query = txt
        txt = FulltextQueryer.add_space_between_eng_zh(txt)
        txt = re.sub(
            r"[ :|\r\n\t,，。？?/`!！&^%%()\[\]{}<>]+",
@ -127,7 +128,7 @@ class FulltextQueryer:
                q.append(txt)
            query = " ".join(q)
            return MatchTextExpr(
-                self.query_fields, query, 100
+                self.query_fields, query, 100, {"original_query": original_query}
            ), keywords

        def need_fine_grained_tokenize(tk):
@ -212,7 +213,7 @@ class FulltextQueryer:
            if not query:
                query = otxt
            return MatchTextExpr(
-                self.query_fields, query, 100, {"minimum_should_match": min_match}
+                self.query_fields, query, 100, {"minimum_should_match": min_match, "original_query": original_query}
            ), keywords
        return None, keywords

@ -259,6 +260,7 @@ class FulltextQueryer:
            content_tks = [c.strip() for c in content_tks.strip() if c.strip()]
        tks_w = self.tw.weights(content_tks, preprocess=False)

+        origin_keywords = keywords.copy()
        keywords = [f'"{k.strip()}"' for k in keywords]
        for tk, w in sorted(tks_w, key=lambda x: x[1] * -1)[:keywords_topn]:
            tk_syns = self.syn.lookup(tk)
@ -274,4 +276,4 @@ class FulltextQueryer:
                keywords.append(f"{tk}^{w}")

        return MatchTextExpr(self.query_fields, " ".join(keywords), 100,
-                             {"minimum_should_match": min(3, len(keywords) // 10)})
+                             {"minimum_should_match": min(3, len(keywords) / 10), "original_query": " ".join(origin_keywords)})
--- a/rag/utils/ob_conn.py
+++ b/rag/utils/ob_conn.py
--- a/uv.lock
+++ b/uv.lock
--- a/web/public/iconfont.js
+++ b/web/public/iconfont.js
@ -140,6 +140,16 @@
    <path d="M0 0h1024v1024H0z"  opacity=".01"></path>
    <path d="M867.072 141.184H156.032a32 32 0 0 0 0 64h711.04a32 32 0 0 0 0-64z m0.832 226.368H403.2a32 32 0 0 0 0 64h464.704a32 32 0 0 0 0-64zM403.2 573.888h464.704a32 32 0 0 1 0 64H403.2a32 32 0 0 1 0-64z m464.704 226.368H156.864a32 32 0 0 0 0 64h711.04a32 32 0 0 0 0-64zM137.472 367.552v270.336l174.528-122.24-174.528-148.096z" ></path>
 </symbol>` +
+  ` <symbol id="icon-a-listoperations" viewBox="0 0 1024 1024">
+        <path d="M341.376 96a32 32 0 0 1 0 64h-128a10.688 10.688 0 0 0-10.688 10.688v682.624a10.752 10.752 0 0 0 10.688 10.688h128a32 32 0 0 1 0 64h-128a74.688 74.688 0 0 1-74.688-74.688V170.688A74.688 74.688 0 0 1 213.376 96h128z m469.312 0a74.688 74.688 0 0 1 74.688 74.688v682.624a74.752 74.752 0 0 1-74.688 74.688h-128a32 32 0 1 1 0-64h128a10.752 10.752 0 0 0 10.688-10.688V170.688a10.752 10.752 0 0 0-10.688-10.688h-128a32 32 0 1 1 0-64h128zM357.248 464.256a48 48 0 0 1 0 95.488l-4.928 0.256H352a48 48 0 0 1 0-96h0.32l4.928 0.256z m155.072-0.256a48 48 0 1 1 0 96H512a48 48 0 0 1 0-96h0.32z m160 0a48 48 0 0 1 0 96H672a48 48 0 0 1 0-96h0.32z" ></path>
+    </symbol>` +
+  `<symbol id="icon-aggregator" viewBox="0 0 1024 1024">
+        <path d="M949.312 533.312a32 32 0 0 1-9.344 22.592l-170.688 170.688a32 32 0 0 1-45.248-45.248l116.032-116.032H478.208l-10.176-0.128a202.688 202.688 0 0 1-135.36-59.264L41.344 214.592a32 32 0 1 1 45.312-45.248l291.264 291.328 10.24 9.344a138.688 138.688 0 0 0 89.344 31.296h362.56L724.032 385.28a32 32 0 0 1 45.248-45.248l170.688 170.624a32 32 0 0 1 9.344 22.656zM299.968 638.656a32 32 0 0 1 0 45.248L86.656 897.28a32 32 0 0 1-45.312-45.248L254.72 638.72a32 32 0 0 1 45.312 0z" ></path>
+    </symbol>` +
+  `<symbol id="icon-a-ariableassigner" viewBox="0 0 1024 1024">
+        <path d="M509.056 64c123.136 0 235.072 48.512 317.12 130.56l-41.024 37.312C714.24 161.024 617.216 119.936 509.056 119.936a391.808 391.808 0 1 0 0 783.552 392.448 392.448 0 0 0 294.784-134.272l41.024 37.312c-82.048 93.248-201.472 149.248-335.808 149.248-246.272 3.712-447.744-197.76-447.744-444.032S262.784 64 509.056 64z m-63.424 186.56a29.184 29.184 0 0 1 14.912 14.912l160.448 444.032c3.712 14.912-3.712 26.112-18.56 33.536-14.976 3.776-26.24-3.648-33.664-14.848l-48.512-149.248H341.12l-59.712 149.248a27.648 27.648 0 0 1-33.6 14.848c-14.912-3.712-18.56-18.624-14.848-33.536l179.008-444.032c3.776-11.136 22.4-18.624 33.6-14.912zM889.6 530.432c14.976 0 26.176 11.2 26.176 26.112a25.472 25.472 0 0 1-26.176 26.112h-212.608a25.472 25.472 0 0 1-26.112-26.112c0-14.912 11.2-26.112 26.112-26.112H889.6z m-529.792 0h141.824L434.432 351.36l-74.624 179.2zM889.6 411.008c14.912 0 26.176 11.2 26.176 26.112a25.536 25.536 0 0 1-26.176 26.112h-212.608a25.536 25.536 0 0 1-26.112-26.112c0-14.912 11.2-26.112 26.112-26.112H889.6z" ></path>
+  </symbol>
+` +
  '</svg>'),
  ((h) => {
    var a = (l = (l = document.getElementsByTagName('script'))[
--- a/web/src/components/jsonjoy-builder/components/schema-editor/json-schema-visualizer.tsx
+++ b/web/src/components/jsonjoy-builder/components/schema-editor/json-schema-visualizer.tsx
@ -48,7 +48,7 @@ const JsonSchemaVisualizer: FC<JsonSchemaVisualizerProps> = ({

    try {
      const parsedJson = JSON.parse(value);
-      if (onChange) {
+      if (onChange && typeof parsedJson !== 'number') {
        onChange(parsedJson);
      }
    } catch (_error) {
--- a/web/src/locales/en.ts
+++ b/web/src/locales/en.ts
@ -25,6 +25,7 @@ export default {
      portugueseBr: 'Portuguese (Brazil)',
      chinese: 'Simplified Chinese',
      traditionalChinese: 'Traditional Chinese',
+      russian: 'Russian',
      language: 'Language',
      languageMessage: 'Please input your language!',
      languagePlaceholder: 'select your language',
@ -1645,6 +1646,7 @@ The variable aggregation node (originally the variable assignment node) is a cru
      beginInputTip:
        'By defining input parameters, this content can be accessed by other components in subsequent processes.',
      query: 'Query variables',
+      queryRequired: 'Query is required',
      queryTip: 'Select the variable you want to use',
      agent: 'Agent',
      addAgent: 'Add Agent',
@ -1751,6 +1753,8 @@ The variable aggregation node (originally the variable assignment node) is a cru
 The Indexer will store the content in the corresponding data structures for the selected methods.`,
      // file: 'File',
      parserMethod: 'PDF parser',
+      tableResultType: 'Table Result Type',
+      markdownImageResponseType: 'Markdown Image Response Type',
      // systemPrompt: 'System Prompt',
      systemPromptPlaceholder:
        'Enter system prompt for image analysis, if empty the system default value will be used',
@ -1854,7 +1858,7 @@ Important structured information may include: names, dates, locations, events, k
        desc: 'Descending',
      },
      variableAssignerLogicalOperatorOptions: {
-        overwrite: 'Overwrite',
+        overwrite: 'Overwritten By',
        clear: 'Clear',
        set: 'Set',
        '+=': 'Add',
@ -1933,6 +1937,7 @@ Important structured information may include: names, dates, locations, events, k
      japanese: 'Japanese',
      korean: 'Korean',
      vietnamese: 'Vietnamese',
+      russian: 'Russian',
    },
    pagination: {
      total: 'Total {{total}}',
--- a/web/src/locales/ru.ts
+++ b/web/src/locales/ru.ts
--- a/web/src/locales/zh.ts
+++ b/web/src/locales/zh.ts
@ -1549,6 +1549,7 @@ General：实体和关系提取提示来自 GitHub - microsoft/graphrag：基于
      task: '任务',
      beginInputTip: '通过定义输入参数，此内容可以被后续流程中的其他组件访问。',
      query: '查询变量',
+      queryRequired: '查询变量是必填项',
      queryTip: '选择您想要使用的变量',
      agent: '智能体',
      addAgent: '添加智能体',
@ -1628,6 +1629,8 @@ General：实体和关系提取提示来自 GitHub - microsoft/graphrag：基于
 Tokenizer 会根据所选方式将内容存储为对应的数据结构。`,
      filenameEmbdWeight: '文件名嵌入权重',
      parserMethod: '解析方法',
+      tableResultType: '表格返回形式',
+      markdownImageResponseType: '图片返回形式',
      systemPromptPlaceholder:
        '请输入用于图像分析的系统提示词，若为空则使用系统缺省值',
      exportJson: '导出 JSON',
--- a/web/src/pages/agent/canvas/node/data-operations-node.tsx
+++ b/web/src/pages/agent/canvas/node/data-operations-node.tsx
@ -11,11 +11,12 @@ export function DataOperationsNode({
 }: NodeProps<BaseNode<DataOperationsFormSchemaType>>) {
  const { data } = props;
  const { t } = useTranslation();
+  const operations = data.form?.operations;

  return (
    <RagNode {...props}>
      <LabelCard>
-        {t(`flow.operationsOptions.${camelCase(data.form?.operations)}`)}
+        {operations && t(`flow.operationsOptions.${camelCase(operations)}`)}
      </LabelCard>
    </RagNode>
  );
--- a/web/src/pages/agent/constant/pipeline.tsx
+++ b/web/src/pages/agent/constant/pipeline.tsx
@ -169,6 +169,7 @@ export const initialParserValues = {
    {
      fileFormat: FileType.Spreadsheet,
      output_format: SpreadsheetOutputFormat.Html,
+      parse_method: ParseDocumentType.DeepDOC,
    },
    {
      fileFormat: FileType.Image,
@ -192,6 +193,7 @@ export const initialParserValues = {
    {
      fileFormat: FileType.PowerPoint,
      output_format: PptOutputFormat.Json,
+      parse_method: ParseDocumentType.DeepDOC,
    },
  ],
 };
@ -243,7 +245,7 @@ export const FileTypeSuffixMap = {
  [FileType.Email]: ['eml', 'msg'],
  [FileType.TextMarkdown]: ['md', 'markdown', 'mdx', 'txt'],
  [FileType.Docx]: ['doc', 'docx'],
-  [FileType.PowerPoint]: ['pptx'],
+  [FileType.PowerPoint]: ['pptx', 'ppt'],
  [FileType.Video]: ['mp4', 'avi', 'mkv'],
  [FileType.Audio]: [
    'da',
--- a/web/src/pages/agent/form/agent-form/index.tsx
+++ b/web/src/pages/agent/form/agent-form/index.tsx
@ -22,12 +22,13 @@ import { Switch } from '@/components/ui/switch';
 import { LlmModelType } from '@/constants/knowledge';
 import { useFindLlmByUuid } from '@/hooks/use-llm-request';
 import { zodResolver } from '@hookform/resolvers/zod';
-import { memo, useEffect, useMemo } from 'react';
+import { memo, useCallback, useEffect, useMemo } from 'react';
 import { useForm, useWatch } from 'react-hook-form';
 import { useTranslation } from 'react-i18next';
 import { z } from 'zod';
 import {
  AgentExceptionMethod,
+  AgentStructuredOutputField,
  NodeHandleId,
  VariableType,
 } from '../../constant';
@ -127,6 +128,17 @@ function AgentForm({ node }: INextOperatorForm) {
    handleStructuredOutputDialogOk,
  } = useShowStructuredOutputDialog(node?.id);

+  const updateNodeForm = useGraphStore((state) => state.updateNodeForm);
+
+  const handleShowStructuredOutput = useCallback(
+    (val: boolean) => {
+      if (node?.id && val) {
+        updateNodeForm(node?.id, {}, ['outputs', AgentStructuredOutputField]);
+      }
+    },
+    [node?.id, updateNodeForm],
+  );
+
  useEffect(() => {
    if (exceptionMethod !== AgentExceptionMethod.Goto) {
      if (node?.id) {
@ -293,7 +305,10 @@ function AgentForm({ node }: INextOperatorForm) {
                  <Switch
                    id="airplane-mode"
                    checked={field.value}
-                    onCheckedChange={field.onChange}
+                    onCheckedChange={(val) => {
+                      handleShowStructuredOutput(val);
+                      field.onChange(val);
+                    }}
                  />
                </div>
              )}
--- a/web/src/pages/agent/form/agent-form/use-watch-change.ts
+++ b/web/src/pages/agent/form/agent-form/use-watch-change.ts
@ -17,20 +17,13 @@ export function useWatchFormChange(id?: string, form?: UseFormReturn<any>) {
        prompts: [{ role: PromptRole.User, content: values.prompts }],
      };

-      if (values.showStructuredOutput) {
-        nextValues = {
-          ...nextValues,
-          outputs: {
-            ...values.outputs,
-            [AgentStructuredOutputField]:
-              values[AgentStructuredOutputField] ?? {},
-          },
-        };
-      } else {
+      if (!values.showStructuredOutput) {
        nextValues = {
          ...nextValues,
          outputs: omit(values.outputs, [AgentStructuredOutputField]),
        };
+      } else {
+        nextValues = omit(nextValues, 'outputs');
      }
      updateNodeForm(id, nextValues);
    }
--- a/web/src/pages/agent/form/components/dynamic-fom-header.tsx
+++ b/web/src/pages/agent/form/components/dynamic-fom-header.tsx
@ -7,17 +7,24 @@ export type FormListHeaderProps = {
  label: ReactNode;
  tooltip?: string;
  onClick?: () => void;
+  disabled?: boolean;
 };

 export function DynamicFormHeader({
  label,
  tooltip,
  onClick,
+  disabled = false,
 }: FormListHeaderProps) {
  return (
    <div className="flex items-center justify-between">
      <FormLabel tooltip={tooltip}>{label}</FormLabel>
-      <Button variant={'ghost'} type="button" onClick={onClick}>
+      <Button
+        variant={'ghost'}
+        type="button"
+        onClick={onClick}
+        disabled={disabled}
+      >
        <Plus />
      </Button>
    </div>
--- a/web/src/pages/agent/form/components/query-variable-list.tsx
+++ b/web/src/pages/agent/form/components/query-variable-list.tsx
@ -2,6 +2,10 @@ import { Button } from '@/components/ui/button';
 import { X } from 'lucide-react';
 import { useFieldArray, useFormContext } from 'react-hook-form';
 import { JsonSchemaDataType } from '../../constant';
+import {
+  flatOptions,
+  useFilterQueryVariableOptionsByTypes,
+} from '../../hooks/use-get-begin-query';
 import { DynamicFormHeader, FormListHeaderProps } from './dynamic-fom-header';
 import { QueryVariable } from './query-variable';

@ -16,6 +20,10 @@ export function QueryVariableList({
  const form = useFormContext();
  const name = 'query';

+  let options = useFilterQueryVariableOptionsByTypes(types);
+
+  const secondOptions = flatOptions(options);
+
  const { fields, remove, append } = useFieldArray({
    name: name,
    control: form.control,
@ -26,14 +34,15 @@ export function QueryVariableList({
      <DynamicFormHeader
        label={label}
        tooltip={tooltip}
-        onClick={() => append({ input: '' })}
+        onClick={() => append({ input: secondOptions.at(0)?.value })}
+        disabled={!secondOptions.length}
      ></DynamicFormHeader>
      <div className="space-y-5">
        {fields.map((field, index) => {
          const nameField = `${name}.${index}.input`;

          return (
-            <div key={field.id} className="flex items-center gap-2">
+            <div key={field.id} className="flex gap-2">
              <QueryVariable
                name={nameField}
                hideLabel
--- a/web/src/pages/agent/form/data-operations-form/index.tsx
+++ b/web/src/pages/agent/form/data-operations-form/index.tsx
@ -4,6 +4,7 @@ import { Form } from '@/components/ui/form';
 import { Separator } from '@/components/ui/separator';
 import { buildOptions } from '@/utils/form';
 import { zodResolver } from '@hookform/resolvers/zod';
+import { t } from 'i18next';
 import { memo } from 'react';
 import { useForm, useWatch } from 'react-hook-form';
 import { useTranslation } from 'react-i18next';
@ -25,7 +26,11 @@ import { SelectKeys } from './select-keys';
 import { Updates } from './updates';

 export const RetrievalPartialSchema = {
-  query: z.array(z.object({ input: z.string().optional() })),
+  query: z.array(
+    z.object({
+      input: z.string().min(1, { message: t('flow.queryRequired') }),
+    }),
+  ),
  operations: z.string(),
  select_keys: z.array(z.object({ name: z.string().optional() })).optional(),
  remove_keys: z.array(z.object({ name: z.string().optional() })).optional(),
--- a/web/src/pages/agent/form/parser-form/index.tsx
+++ b/web/src/pages/agent/form/parser-form/index.tsx
@ -34,6 +34,8 @@ import { OutputFormatFormField } from './common-form-fields';
 import { EmailFormFields } from './email-form-fields';
 import { ImageFormFields } from './image-form-fields';
 import { PdfFormFields } from './pdf-form-fields';
+import { PptFormFields } from './ppt-form-fields';
+import { SpreadsheetFormFields } from './spreadsheet-form-fields';
 import { buildFieldNameWithPrefix } from './utils';
 import { AudioFormFields, VideoFormFields } from './video-form-fields';

@ -41,6 +43,8 @@ const outputList = buildOutputList(initialParserValues.outputs);

 const FileFormatWidgetMap = {
  [FileType.PDF]: PdfFormFields,
+  [FileType.Spreadsheet]: SpreadsheetFormFields,
+  [FileType.PowerPoint]: PptFormFields,
  [FileType.Video]: VideoFormFields,
  [FileType.Audio]: AudioFormFields,
  [FileType.Email]: EmailFormFields,
@ -65,6 +69,8 @@ export const FormSchema = z.object({
      fields: z.array(z.string()).optional(),
      llm_id: z.string().optional(),
      system_prompt: z.string().optional(),
+      table_result_type: z.string().optional(),
+      markdown_image_response_type: z.string().optional(),
    }),
  ),
 });
@ -184,6 +190,8 @@ const ParserForm = ({ node }: INextOperatorForm) => {
      lang: '',
      fields: [],
      llm_id: '',
+      table_result_type: '',
+      markdown_image_response_type: '',
    });
  }, [append]);

--- a/web/src/pages/agent/form/parser-form/pdf-form-fields.tsx
+++ b/web/src/pages/agent/form/parser-form/pdf-form-fields.tsx
@ -1,13 +1,30 @@
 import { ParseDocumentType } from '@/components/layout-recognize-form-field';
+import {
+  SelectWithSearch,
+  SelectWithSearchFlagOptionType,
+} from '@/components/originui/select-with-search';
+import { RAGFlowFormItem } from '@/components/ragflow-form';
 import { isEmpty } from 'lodash';
 import { useEffect, useMemo } from 'react';
 import { useFormContext, useWatch } from 'react-hook-form';
+import { useTranslation } from 'react-i18next';
 import { LanguageFormField, ParserMethodFormField } from './common-form-fields';
 import { CommonProps } from './interface';
 import { useSetInitialLanguage } from './use-set-initial-language';
 import { buildFieldNameWithPrefix } from './utils';

+const tableResultTypeOptions: SelectWithSearchFlagOptionType[] = [
+  { label: 'Markdown', value: '0' },
+  { label: 'HTML', value: '1' },
+];
+
+const markdownImageResponseTypeOptions: SelectWithSearchFlagOptionType[] = [
+  { label: 'URL', value: '0' },
+  { label: 'Text', value: '1' },
+];
+
 export function PdfFormFields({ prefix }: CommonProps) {
+  const { t } = useTranslation();
  const form = useFormContext();

  const parseMethodName = buildFieldNameWithPrefix('parse_method', prefix);
@ -25,6 +42,12 @@ export function PdfFormFields({ prefix }: CommonProps) {
    );
  }, [parseMethod]);

+  const tcadpOptionsShown = useMemo(() => {
+    return (
+      !isEmpty(parseMethod) && parseMethod === ParseDocumentType.TCADPParser
+    );
+  }, [parseMethod]);
+
  useSetInitialLanguage({ prefix, languageShown });

  useEffect(() => {
@ -36,10 +59,68 @@ export function PdfFormFields({ prefix }: CommonProps) {
    }
  }, [form, parseMethodName]);

+  // Set default values for TCADP options when TCADP is selected
+  useEffect(() => {
+    if (tcadpOptionsShown) {
+      const tableResultTypeName = buildFieldNameWithPrefix(
+        'table_result_type',
+        prefix,
+      );
+      const markdownImageResponseTypeName = buildFieldNameWithPrefix(
+        'markdown_image_response_type',
+        prefix,
+      );
+
+      if (isEmpty(form.getValues(tableResultTypeName))) {
+        form.setValue(tableResultTypeName, '1', {
+          shouldValidate: true,
+          shouldDirty: true,
+        });
+      }
+      if (isEmpty(form.getValues(markdownImageResponseTypeName))) {
+        form.setValue(markdownImageResponseTypeName, '1', {
+          shouldValidate: true,
+          shouldDirty: true,
+        });
+      }
+    }
+  }, [tcadpOptionsShown, form, prefix]);
+
  return (
    <>
      <ParserMethodFormField prefix={prefix}></ParserMethodFormField>
      {languageShown && <LanguageFormField prefix={prefix}></LanguageFormField>}
+      {tcadpOptionsShown && (
+        <>
+          <RAGFlowFormItem
+            name={buildFieldNameWithPrefix('table_result_type', prefix)}
+            label={t('flow.tableResultType') || '表格返回形式'}
+          >
+            {(field) => (
+              <SelectWithSearch
+                value={field.value}
+                onChange={field.onChange}
+                options={tableResultTypeOptions}
+              ></SelectWithSearch>
+            )}
+          </RAGFlowFormItem>
+          <RAGFlowFormItem
+            name={buildFieldNameWithPrefix(
+              'markdown_image_response_type',
+              prefix,
+            )}
+            label={t('flow.markdownImageResponseType') || '图片返回形式'}
+          >
+            {(field) => (
+              <SelectWithSearch
+                value={field.value}
+                onChange={field.onChange}
+                options={markdownImageResponseTypeOptions}
+              ></SelectWithSearch>
+            )}
+          </RAGFlowFormItem>
+        </>
+      )}
    </>
  );
 }
--- a/web/src/pages/agent/form/parser-form/ppt-form-fields.tsx
+++ b/web/src/pages/agent/form/parser-form/ppt-form-fields.tsx
@ -0,0 +1,125 @@
+import { ParseDocumentType } from '@/components/layout-recognize-form-field';
+import {
+  SelectWithSearch,
+  SelectWithSearchFlagOptionType,
+} from '@/components/originui/select-with-search';
+import { RAGFlowFormItem } from '@/components/ragflow-form';
+import { isEmpty } from 'lodash';
+import { useEffect, useMemo } from 'react';
+import { useFormContext, useWatch } from 'react-hook-form';
+import { useTranslation } from 'react-i18next';
+import { ParserMethodFormField } from './common-form-fields';
+import { CommonProps } from './interface';
+import { buildFieldNameWithPrefix } from './utils';
+
+const tableResultTypeOptions: SelectWithSearchFlagOptionType[] = [
+  { label: 'Markdown', value: '0' },
+  { label: 'HTML', value: '1' },
+];
+
+const markdownImageResponseTypeOptions: SelectWithSearchFlagOptionType[] = [
+  { label: 'URL', value: '0' },
+  { label: 'Text', value: '1' },
+];
+
+export function PptFormFields({ prefix }: CommonProps) {
+  const { t } = useTranslation();
+  const form = useFormContext();
+
+  const parseMethodName = buildFieldNameWithPrefix('parse_method', prefix);
+
+  const parseMethod = useWatch({
+    name: parseMethodName,
+  });
+
+  // PPT only supports DeepDOC and TCADPParser
+  const optionsWithoutLLM = [
+    { label: ParseDocumentType.DeepDOC, value: ParseDocumentType.DeepDOC },
+    {
+      label: ParseDocumentType.TCADPParser,
+      value: ParseDocumentType.TCADPParser,
+    },
+  ];
+
+  const tcadpOptionsShown = useMemo(() => {
+    return (
+      !isEmpty(parseMethod) && parseMethod === ParseDocumentType.TCADPParser
+    );
+  }, [parseMethod]);
+
+  useEffect(() => {
+    if (isEmpty(form.getValues(parseMethodName))) {
+      form.setValue(parseMethodName, ParseDocumentType.DeepDOC, {
+        shouldValidate: true,
+        shouldDirty: true,
+      });
+    }
+  }, [form, parseMethodName]);
+
+  // Set default values for TCADP options when TCADP is selected
+  useEffect(() => {
+    if (tcadpOptionsShown) {
+      const tableResultTypeName = buildFieldNameWithPrefix(
+        'table_result_type',
+        prefix,
+      );
+      const markdownImageResponseTypeName = buildFieldNameWithPrefix(
+        'markdown_image_response_type',
+        prefix,
+      );
+
+      if (isEmpty(form.getValues(tableResultTypeName))) {
+        form.setValue(tableResultTypeName, '1', {
+          shouldValidate: true,
+          shouldDirty: true,
+        });
+      }
+      if (isEmpty(form.getValues(markdownImageResponseTypeName))) {
+        form.setValue(markdownImageResponseTypeName, '1', {
+          shouldValidate: true,
+          shouldDirty: true,
+        });
+      }
+    }
+  }, [tcadpOptionsShown, form, prefix]);
+
+  return (
+    <>
+      <ParserMethodFormField
+        prefix={prefix}
+        optionsWithoutLLM={optionsWithoutLLM}
+      ></ParserMethodFormField>
+      {tcadpOptionsShown && (
+        <>
+          <RAGFlowFormItem
+            name={buildFieldNameWithPrefix('table_result_type', prefix)}
+            label={t('flow.tableResultType') || '表格返回形式'}
+          >
+            {(field) => (
+              <SelectWithSearch
+                value={field.value}
+                onChange={field.onChange}
+                options={tableResultTypeOptions}
+              ></SelectWithSearch>
+            )}
+          </RAGFlowFormItem>
+          <RAGFlowFormItem
+            name={buildFieldNameWithPrefix(
+              'markdown_image_response_type',
+              prefix,
+            )}
+            label={t('flow.markdownImageResponseType') || '图片返回形式'}
+          >
+            {(field) => (
+              <SelectWithSearch
+                value={field.value}
+                onChange={field.onChange}
+                options={markdownImageResponseTypeOptions}
+              ></SelectWithSearch>
+            )}
+          </RAGFlowFormItem>
+        </>
+      )}
+    </>
+  );
+}
--- a/web/src/pages/agent/form/parser-form/spreadsheet-form-fields.tsx
+++ b/web/src/pages/agent/form/parser-form/spreadsheet-form-fields.tsx
@ -0,0 +1,125 @@
+import { ParseDocumentType } from '@/components/layout-recognize-form-field';
+import {
+  SelectWithSearch,
+  SelectWithSearchFlagOptionType,
+} from '@/components/originui/select-with-search';
+import { RAGFlowFormItem } from '@/components/ragflow-form';
+import { isEmpty } from 'lodash';
+import { useEffect, useMemo } from 'react';
+import { useFormContext, useWatch } from 'react-hook-form';
+import { useTranslation } from 'react-i18next';
+import { ParserMethodFormField } from './common-form-fields';
+import { CommonProps } from './interface';
+import { buildFieldNameWithPrefix } from './utils';
+
+const tableResultTypeOptions: SelectWithSearchFlagOptionType[] = [
+  { label: 'Markdown', value: '0' },
+  { label: 'HTML', value: '1' },
+];
+
+const markdownImageResponseTypeOptions: SelectWithSearchFlagOptionType[] = [
+  { label: 'URL', value: '0' },
+  { label: 'Text', value: '1' },
+];
+
+export function SpreadsheetFormFields({ prefix }: CommonProps) {
+  const { t } = useTranslation();
+  const form = useFormContext();
+
+  const parseMethodName = buildFieldNameWithPrefix('parse_method', prefix);
+
+  const parseMethod = useWatch({
+    name: parseMethodName,
+  });
+
+  // Spreadsheet only supports DeepDOC and TCADPParser
+  const optionsWithoutLLM = [
+    { label: ParseDocumentType.DeepDOC, value: ParseDocumentType.DeepDOC },
+    {
+      label: ParseDocumentType.TCADPParser,
+      value: ParseDocumentType.TCADPParser,
+    },
+  ];
+
+  const tcadpOptionsShown = useMemo(() => {
+    return (
+      !isEmpty(parseMethod) && parseMethod === ParseDocumentType.TCADPParser
+    );
+  }, [parseMethod]);
+
+  useEffect(() => {
+    if (isEmpty(form.getValues(parseMethodName))) {
+      form.setValue(parseMethodName, ParseDocumentType.DeepDOC, {
+        shouldValidate: true,
+        shouldDirty: true,
+      });
+    }
+  }, [form, parseMethodName]);
+
+  // Set default values for TCADP options when TCADP is selected
+  useEffect(() => {
+    if (tcadpOptionsShown) {
+      const tableResultTypeName = buildFieldNameWithPrefix(
+        'table_result_type',
+        prefix,
+      );
+      const markdownImageResponseTypeName = buildFieldNameWithPrefix(
+        'markdown_image_response_type',
+        prefix,
+      );
+
+      if (isEmpty(form.getValues(tableResultTypeName))) {
+        form.setValue(tableResultTypeName, '1', {
+          shouldValidate: true,
+          shouldDirty: true,
+        });
+      }
+      if (isEmpty(form.getValues(markdownImageResponseTypeName))) {
+        form.setValue(markdownImageResponseTypeName, '1', {
+          shouldValidate: true,
+          shouldDirty: true,
+        });
+      }
+    }
+  }, [tcadpOptionsShown, form, prefix]);
+
+  return (
+    <>
+      <ParserMethodFormField
+        prefix={prefix}
+        optionsWithoutLLM={optionsWithoutLLM}
+      ></ParserMethodFormField>
+      {tcadpOptionsShown && (
+        <>
+          <RAGFlowFormItem
+            name={buildFieldNameWithPrefix('table_result_type', prefix)}
+            label={t('flow.tableResultType') || '表格返回形式'}
+          >
+            {(field) => (
+              <SelectWithSearch
+                value={field.value}
+                onChange={field.onChange}
+                options={tableResultTypeOptions}
+              ></SelectWithSearch>
+            )}
+          </RAGFlowFormItem>
+          <RAGFlowFormItem
+            name={buildFieldNameWithPrefix(
+              'markdown_image_response_type',
+              prefix,
+            )}
+            label={t('flow.markdownImageResponseType') || '图片返回形式'}
+          >
+            {(field) => (
+              <SelectWithSearch
+                value={field.value}
+                onChange={field.onChange}
+                options={markdownImageResponseTypeOptions}
+              ></SelectWithSearch>
+            )}
+          </RAGFlowFormItem>
+        </>
+      )}
+    </>
+  );
+}
--- a/web/src/pages/agent/hooks/use-get-begin-query.tsx
+++ b/web/src/pages/agent/hooks/use-get-begin-query.tsx
@ -317,14 +317,18 @@ export const useGetComponentLabelByValue = (nodeId: string) => {
  return getLabel;
 };

+export function flatOptions(options: DefaultOptionType[]) {
+  return options.reduce<DefaultOptionType[]>((pre, cur) => {
+    return [...pre, ...cur.options];
+  }, []);
+}
+
 export function useFlattenQueryVariableOptions(nodeId?: string) {
  const { getNode } = useGraphStore((state) => state);
  const nextOptions = useBuildQueryVariableOptions(getNode(nodeId));

  const flattenOptions = useMemo(() => {
-    return nextOptions.reduce<DefaultOptionType[]>((pre, cur) => {
-      return [...pre, ...cur.options];
-    }, []);
+    return flatOptions(nextOptions);
  }, [nextOptions]);

  return flattenOptions;
--- a/web/src/pages/agent/operator-icon.tsx
+++ b/web/src/pages/agent/operator-icon.tsx
@ -12,9 +12,9 @@ import { ReactComponent as WenCaiIcon } from '@/assets/svg/wencai.svg';
 import { ReactComponent as WikipediaIcon } from '@/assets/svg/wikipedia.svg';
 import { ReactComponent as YahooFinanceIcon } from '@/assets/svg/yahoo-finance.svg';

-import { IconFont } from '@/components/icon-font';
+import { IconFontFill } from '@/components/icon-font';
 import { cn } from '@/lib/utils';
-import { Columns3, Equal, FileCode, HousePlus, Variable } from 'lucide-react';
+import { FileCode, HousePlus } from 'lucide-react';
 import { Operator } from './constant';

 interface IProps {
@ -37,6 +37,9 @@ export const OperatorIconMap = {
  [Operator.ExeSQL]: 'executesql-0',
  [Operator.Invoke]: 'httprequest-0',
  [Operator.Email]: 'sendemail-0',
+  [Operator.ListOperations]: 'a-listoperations',
+  [Operator.VariableAssigner]: 'a-ariableassigner',
+  [Operator.VariableAggregator]: 'aggregator',
 };

 export const SVGIconMap = {
@ -57,9 +60,6 @@ export const SVGIconMap = {
 };
 export const LucideIconMap = {
  [Operator.DataOperations]: FileCode,
-  [Operator.ListOperations]: Columns3,
-  [Operator.VariableAssigner]: Equal,
-  [Operator.VariableAggregator]: Variable,
 };

 const Empty = () => {
@ -86,7 +86,10 @@ const OperatorIcon = ({ name, className }: IProps) => {

  if (Icon) {
    return (
-      <IconFont name={Icon} className={cn('size-5 ', className)}></IconFont>
+      <IconFontFill
+        name={Icon}
+        className={cn('size-5 ', className)}
+      ></IconFontFill>
    );
  }

--- a/web/src/pages/agent/utils.ts
+++ b/web/src/pages/agent/utils.ts
@ -214,6 +214,36 @@ function transformParserParams(params: ParserFormSchemaType) {
            parse_method: cur.parse_method,
            lang: cur.lang,
          };
+          // Only include TCADP parameters if TCADP Parser is selected
+          if (cur.parse_method?.toLowerCase() === 'tcadp parser') {
+            filteredSetup.table_result_type = cur.table_result_type;
+            filteredSetup.markdown_image_response_type =
+              cur.markdown_image_response_type;
+          }
+          break;
+        case FileType.Spreadsheet:
+          filteredSetup = {
+            ...filteredSetup,
+            parse_method: cur.parse_method,
+          };
+          // Only include TCADP parameters if TCADP Parser is selected
+          if (cur.parse_method?.toLowerCase() === 'tcadp parser') {
+            filteredSetup.table_result_type = cur.table_result_type;
+            filteredSetup.markdown_image_response_type =
+              cur.markdown_image_response_type;
+          }
+          break;
+        case FileType.PowerPoint:
+          filteredSetup = {
+            ...filteredSetup,
+            parse_method: cur.parse_method,
+          };
+          // Only include TCADP parameters if TCADP Parser is selected
+          if (cur.parse_method?.toLowerCase() === 'tcadp parser') {
+            filteredSetup.table_result_type = cur.table_result_type;
+            filteredSetup.markdown_image_response_type =
+              cur.markdown_image_response_type;
+          }
          break;
        case FileType.Image:
          filteredSetup = {
--- a/web/src/pages/data-flow/constant.tsx
+++ b/web/src/pages/data-flow/constant.tsx
--- a/web/src/pages/data-flow/form/parser-form/index.tsx
+++ b/web/src/pages/data-flow/form/parser-form/index.tsx
--- a/web/src/pages/data-flow/form/parser-form/ppt-form-fields.tsx
+++ b/web/src/pages/data-flow/form/parser-form/ppt-form-fields.tsx
@ -0,0 +1,40 @@
+import { ParseDocumentType } from '@/components/layout-recognize-form-field';
+import { isEmpty } from 'lodash';
+import { useEffect } from 'react';
+import { useFormContext } from 'react-hook-form';
+import { ParserMethodFormField } from './common-form-fields';
+import { CommonProps } from './interface';
+import { buildFieldNameWithPrefix } from './utils';
+
+export function PptFormFields({ prefix }: CommonProps) {
+  const form = useFormContext();
+
+  const parseMethodName = buildFieldNameWithPrefix('parse_method', prefix);
+
+  // PPT only supports DeepDOC and TCADPParser
+  const optionsWithoutLLM = [
+    { label: ParseDocumentType.DeepDOC, value: ParseDocumentType.DeepDOC },
+    {
+      label: ParseDocumentType.TCADPParser,
+      value: ParseDocumentType.TCADPParser,
+    },
+  ];
+
+  useEffect(() => {
+    if (isEmpty(form.getValues(parseMethodName))) {
+      form.setValue(parseMethodName, ParseDocumentType.DeepDOC, {
+        shouldValidate: true,
+        shouldDirty: true,
+      });
+    }
+  }, [form, parseMethodName]);
+
+  return (
+    <>
+      <ParserMethodFormField
+        prefix={prefix}
+        optionsWithoutLLM={optionsWithoutLLM}
+      ></ParserMethodFormField>
+    </>
+  );
+}
--- a/web/src/pages/data-flow/form/parser-form/spreadsheet-form-fields.tsx
+++ b/web/src/pages/data-flow/form/parser-form/spreadsheet-form-fields.tsx
@ -0,0 +1,40 @@
+import { ParseDocumentType } from '@/components/layout-recognize-form-field';
+import { isEmpty } from 'lodash';
+import { useEffect } from 'react';
+import { useFormContext } from 'react-hook-form';
+import { ParserMethodFormField } from './common-form-fields';
+import { CommonProps } from './interface';
+import { buildFieldNameWithPrefix } from './utils';
+
+export function SpreadsheetFormFields({ prefix }: CommonProps) {
+  const form = useFormContext();
+
+  const parseMethodName = buildFieldNameWithPrefix('parse_method', prefix);
+
+  // Spreadsheet only supports DeepDOC and TCADPParser
+  const optionsWithoutLLM = [
+    { label: ParseDocumentType.DeepDOC, value: ParseDocumentType.DeepDOC },
+    {
+      label: ParseDocumentType.TCADPParser,
+      value: ParseDocumentType.TCADPParser,
+    },
+  ];
+
+  useEffect(() => {
+    if (isEmpty(form.getValues(parseMethodName))) {
+      form.setValue(parseMethodName, ParseDocumentType.DeepDOC, {
+        shouldValidate: true,
+        shouldDirty: true,
+      });
+    }
+  }, [form, parseMethodName]);
+
+  return (
+    <>
+      <ParserMethodFormField
+        prefix={prefix}
+        optionsWithoutLLM={optionsWithoutLLM}
+      ></ParserMethodFormField>
+    </>
+  );
+}
--- a/web/src/pages/data-flow/utils.ts
+++ b/web/src/pages/data-flow/utils.ts
				`@ -0,0 +1 @@`
				`ALTER SYSTEM SET ob_vector_memory_limit_percentage = 30;`