Merge branch 'main' of github.com:infiniflow/ragflow into feature/1117
This commit is contained in:
commit
906586079c
55 changed files with 7025 additions and 4046 deletions
|
|
@ -192,7 +192,8 @@ releases! 🌟
|
||||||
```bash
|
```bash
|
||||||
$ cd ragflow/docker
|
$ cd ragflow/docker
|
||||||
|
|
||||||
# Optional: use a stable tag (see releases: https://github.com/infiniflow/ragflow/releases), e.g.: git checkout v0.22.1
|
# git checkout v0.22.1
|
||||||
|
# Optional: use a stable tag (see releases: https://github.com/infiniflow/ragflow/releases)
|
||||||
# This steps ensures the **entrypoint.sh** file in the code matches the Docker image version.
|
# This steps ensures the **entrypoint.sh** file in the code matches the Docker image version.
|
||||||
|
|
||||||
# Use CPU for DeepDoc tasks:
|
# Use CPU for DeepDoc tasks:
|
||||||
|
|
|
||||||
|
|
@ -192,7 +192,8 @@ Coba demo kami di [https://demo.ragflow.io](https://demo.ragflow.io).
|
||||||
```bash
|
```bash
|
||||||
$ cd ragflow/docker
|
$ cd ragflow/docker
|
||||||
|
|
||||||
# Opsional: gunakan tag stabil (lihat releases: https://github.com/infiniflow/ragflow/releases), contoh: git checkout v0.22.1
|
# git checkout v0.22.1
|
||||||
|
# Opsional: gunakan tag stabil (lihat releases: https://github.com/infiniflow/ragflow/releases)
|
||||||
# This steps ensures the **entrypoint.sh** file in the code matches the Docker image version.
|
# This steps ensures the **entrypoint.sh** file in the code matches the Docker image version.
|
||||||
|
|
||||||
# Use CPU for DeepDoc tasks:
|
# Use CPU for DeepDoc tasks:
|
||||||
|
|
|
||||||
|
|
@ -172,7 +172,8 @@
|
||||||
```bash
|
```bash
|
||||||
$ cd ragflow/docker
|
$ cd ragflow/docker
|
||||||
|
|
||||||
# 任意: 安定版タグを利用 (一覧: https://github.com/infiniflow/ragflow/releases) 例: git checkout v0.22.1
|
# git checkout v0.22.1
|
||||||
|
# 任意: 安定版タグを利用 (一覧: https://github.com/infiniflow/ragflow/releases)
|
||||||
# この手順は、コード内の entrypoint.sh ファイルが Docker イメージのバージョンと一致していることを確認します。
|
# この手順は、コード内の entrypoint.sh ファイルが Docker イメージのバージョンと一致していることを確認します。
|
||||||
|
|
||||||
# Use CPU for DeepDoc tasks:
|
# Use CPU for DeepDoc tasks:
|
||||||
|
|
|
||||||
|
|
@ -174,7 +174,8 @@
|
||||||
```bash
|
```bash
|
||||||
$ cd ragflow/docker
|
$ cd ragflow/docker
|
||||||
|
|
||||||
# Optional: use a stable tag (see releases: https://github.com/infiniflow/ragflow/releases), e.g.: git checkout v0.22.1
|
# git checkout v0.22.1
|
||||||
|
# Optional: use a stable tag (see releases: https://github.com/infiniflow/ragflow/releases)
|
||||||
# 이 단계는 코드의 entrypoint.sh 파일이 Docker 이미지 버전과 일치하도록 보장합니다.
|
# 이 단계는 코드의 entrypoint.sh 파일이 Docker 이미지 버전과 일치하도록 보장합니다.
|
||||||
|
|
||||||
# Use CPU for DeepDoc tasks:
|
# Use CPU for DeepDoc tasks:
|
||||||
|
|
|
||||||
|
|
@ -192,7 +192,8 @@ Experimente nossa demo em [https://demo.ragflow.io](https://demo.ragflow.io).
|
||||||
```bash
|
```bash
|
||||||
$ cd ragflow/docker
|
$ cd ragflow/docker
|
||||||
|
|
||||||
# Opcional: use uma tag estável (veja releases: https://github.com/infiniflow/ragflow/releases), ex.: git checkout v0.22.1
|
# git checkout v0.22.1
|
||||||
|
# Opcional: use uma tag estável (veja releases: https://github.com/infiniflow/ragflow/releases)
|
||||||
# Esta etapa garante que o arquivo entrypoint.sh no código corresponda à versão da imagem do Docker.
|
# Esta etapa garante que o arquivo entrypoint.sh no código corresponda à versão da imagem do Docker.
|
||||||
|
|
||||||
# Use CPU for DeepDoc tasks:
|
# Use CPU for DeepDoc tasks:
|
||||||
|
|
|
||||||
|
|
@ -191,7 +191,8 @@
|
||||||
```bash
|
```bash
|
||||||
$ cd ragflow/docker
|
$ cd ragflow/docker
|
||||||
|
|
||||||
# 可選:使用穩定版標籤(查看發佈:https://github.com/infiniflow/ragflow/releases),例:git checkout v0.22.1
|
# git checkout v0.22.1
|
||||||
|
# 可選:使用穩定版標籤(查看發佈:https://github.com/infiniflow/ragflow/releases)
|
||||||
# 此步驟確保程式碼中的 entrypoint.sh 檔案與 Docker 映像版本一致。
|
# 此步驟確保程式碼中的 entrypoint.sh 檔案與 Docker 映像版本一致。
|
||||||
|
|
||||||
# Use CPU for DeepDoc tasks:
|
# Use CPU for DeepDoc tasks:
|
||||||
|
|
|
||||||
|
|
@ -192,7 +192,8 @@
|
||||||
```bash
|
```bash
|
||||||
$ cd ragflow/docker
|
$ cd ragflow/docker
|
||||||
|
|
||||||
# 可选:使用稳定版本标签(查看发布:https://github.com/infiniflow/ragflow/releases),例如:git checkout v0.22.1
|
# git checkout v0.22.1
|
||||||
|
# 可选:使用稳定版本标签(查看发布:https://github.com/infiniflow/ragflow/releases)
|
||||||
# 这一步确保代码中的 entrypoint.sh 文件与 Docker 镜像的版本保持一致。
|
# 这一步确保代码中的 entrypoint.sh 文件与 Docker 镜像的版本保持一致。
|
||||||
|
|
||||||
# Use CPU for DeepDoc tasks:
|
# Use CPU for DeepDoc tasks:
|
||||||
|
|
|
||||||
|
|
@ -31,7 +31,7 @@ from common.constants import ActiveEnum, StatusEnum
|
||||||
from api.utils.crypt import decrypt
|
from api.utils.crypt import decrypt
|
||||||
from common.misc_utils import get_uuid
|
from common.misc_utils import get_uuid
|
||||||
from common.time_utils import current_timestamp, datetime_format, get_format_time
|
from common.time_utils import current_timestamp, datetime_format, get_format_time
|
||||||
from common.connection_utils import construct_response
|
from common.connection_utils import sync_construct_response
|
||||||
from common import settings
|
from common import settings
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -130,7 +130,7 @@ def login_admin(email: str, password: str):
|
||||||
user.last_login_time = get_format_time()
|
user.last_login_time = get_format_time()
|
||||||
user.save()
|
user.save()
|
||||||
msg = "Welcome back!"
|
msg = "Welcome back!"
|
||||||
return construct_response(data=resp, auth=user.get_id(), message=msg)
|
return sync_construct_response(data=resp, auth=user.get_id(), message=msg)
|
||||||
|
|
||||||
|
|
||||||
def check_admin(username: str, password: str):
|
def check_admin(username: str, password: str):
|
||||||
|
|
|
||||||
|
|
@ -886,6 +886,7 @@ async def check_embedding():
|
||||||
|
|
||||||
try:
|
try:
|
||||||
v, _ = emb_mdl.encode([title, txt_in])
|
v, _ = emb_mdl.encode([title, txt_in])
|
||||||
|
assert len(v[1]) == len(ck["vector"]), f"The dimension ({len(v[1])}) of given embedding model is different from the original ({len(ck['vector'])})"
|
||||||
sim_content = _cos_sim(v[1], ck["vector"])
|
sim_content = _cos_sim(v[1], ck["vector"])
|
||||||
title_w = 0.1
|
title_w = 0.1
|
||||||
qv_mix = title_w * v[0] + (1 - title_w) * v[1]
|
qv_mix = title_w * v[0] + (1 - title_w) * v[1]
|
||||||
|
|
@ -895,8 +896,8 @@ async def check_embedding():
|
||||||
if sim_mix > sim:
|
if sim_mix > sim:
|
||||||
sim = sim_mix
|
sim = sim_mix
|
||||||
mode = "title+content"
|
mode = "title+content"
|
||||||
except Exception:
|
except Exception as e:
|
||||||
return get_error_data_result(message="embedding failure")
|
return get_error_data_result(message=f"Embedding failure. {e}")
|
||||||
|
|
||||||
eff_sims.append(sim)
|
eff_sims.append(sim)
|
||||||
results.append({
|
results.append({
|
||||||
|
|
|
||||||
|
|
@ -223,6 +223,10 @@ def completion(tenant_id, agent_id, session_id=None, **kwargs):
|
||||||
ans["session_id"] = session_id
|
ans["session_id"] = session_id
|
||||||
if ans["event"] == "message":
|
if ans["event"] == "message":
|
||||||
txt += ans["data"]["content"]
|
txt += ans["data"]["content"]
|
||||||
|
if ans["data"].get("start_to_think", False):
|
||||||
|
txt += "<think>"
|
||||||
|
elif ans["data"].get("end_to_think", False):
|
||||||
|
txt += "</think>"
|
||||||
yield "data:" + json.dumps(ans, ensure_ascii=False) + "\n\n"
|
yield "data:" + json.dumps(ans, ensure_ascii=False) + "\n\n"
|
||||||
|
|
||||||
conv.message.append({"role": "assistant", "content": txt, "created_at": time.time(), "id": message_id})
|
conv.message.append({"role": "assistant", "content": txt, "created_at": time.time(), "id": message_id})
|
||||||
|
|
|
||||||
|
|
@ -120,3 +120,23 @@ async def construct_response(code=RetCode.SUCCESS, message="success", data=None,
|
||||||
response.headers["Access-Control-Allow-Headers"] = "*"
|
response.headers["Access-Control-Allow-Headers"] = "*"
|
||||||
response.headers["Access-Control-Expose-Headers"] = "Authorization"
|
response.headers["Access-Control-Expose-Headers"] = "Authorization"
|
||||||
return response
|
return response
|
||||||
|
|
||||||
|
|
||||||
|
def sync_construct_response(code=RetCode.SUCCESS, message="success", data=None, auth=None):
|
||||||
|
import flask
|
||||||
|
result_dict = {"code": code, "message": message, "data": data}
|
||||||
|
response_dict = {}
|
||||||
|
for key, value in result_dict.items():
|
||||||
|
if value is None and key != "code":
|
||||||
|
continue
|
||||||
|
else:
|
||||||
|
response_dict[key] = value
|
||||||
|
response = flask.make_response(flask.jsonify(response_dict))
|
||||||
|
if auth:
|
||||||
|
response.headers["Authorization"] = auth
|
||||||
|
response.headers["Access-Control-Allow-Origin"] = "*"
|
||||||
|
response.headers["Access-Control-Allow-Method"] = "*"
|
||||||
|
response.headers["Access-Control-Allow-Headers"] = "*"
|
||||||
|
response.headers["Access-Control-Allow-Headers"] = "*"
|
||||||
|
response.headers["Access-Control-Expose-Headers"] = "Authorization"
|
||||||
|
return response
|
||||||
|
|
|
||||||
|
|
@ -27,6 +27,7 @@ from common.constants import SVR_QUEUE_NAME, Storage
|
||||||
import rag.utils
|
import rag.utils
|
||||||
import rag.utils.es_conn
|
import rag.utils.es_conn
|
||||||
import rag.utils.infinity_conn
|
import rag.utils.infinity_conn
|
||||||
|
import rag.utils.ob_conn
|
||||||
import rag.utils.opensearch_conn
|
import rag.utils.opensearch_conn
|
||||||
from rag.utils.azure_sas_conn import RAGFlowAzureSasBlob
|
from rag.utils.azure_sas_conn import RAGFlowAzureSasBlob
|
||||||
from rag.utils.azure_spn_conn import RAGFlowAzureSpnBlob
|
from rag.utils.azure_spn_conn import RAGFlowAzureSpnBlob
|
||||||
|
|
@ -103,6 +104,7 @@ INFINITY = {}
|
||||||
AZURE = {}
|
AZURE = {}
|
||||||
S3 = {}
|
S3 = {}
|
||||||
MINIO = {}
|
MINIO = {}
|
||||||
|
OB = {}
|
||||||
OSS = {}
|
OSS = {}
|
||||||
OS = {}
|
OS = {}
|
||||||
|
|
||||||
|
|
@ -227,7 +229,7 @@ def init_settings():
|
||||||
FEISHU_OAUTH = get_base_config("oauth", {}).get("feishu")
|
FEISHU_OAUTH = get_base_config("oauth", {}).get("feishu")
|
||||||
OAUTH_CONFIG = get_base_config("oauth", {})
|
OAUTH_CONFIG = get_base_config("oauth", {})
|
||||||
|
|
||||||
global DOC_ENGINE, docStoreConn, ES, OS, INFINITY
|
global DOC_ENGINE, docStoreConn, ES, OB, OS, INFINITY
|
||||||
DOC_ENGINE = os.environ.get("DOC_ENGINE", "elasticsearch")
|
DOC_ENGINE = os.environ.get("DOC_ENGINE", "elasticsearch")
|
||||||
# DOC_ENGINE = os.environ.get('DOC_ENGINE', "opensearch")
|
# DOC_ENGINE = os.environ.get('DOC_ENGINE', "opensearch")
|
||||||
lower_case_doc_engine = DOC_ENGINE.lower()
|
lower_case_doc_engine = DOC_ENGINE.lower()
|
||||||
|
|
@ -240,6 +242,9 @@ def init_settings():
|
||||||
elif lower_case_doc_engine == "opensearch":
|
elif lower_case_doc_engine == "opensearch":
|
||||||
OS = get_base_config("os", {})
|
OS = get_base_config("os", {})
|
||||||
docStoreConn = rag.utils.opensearch_conn.OSConnection()
|
docStoreConn = rag.utils.opensearch_conn.OSConnection()
|
||||||
|
elif lower_case_doc_engine == "oceanbase":
|
||||||
|
OB = get_base_config("oceanbase", {})
|
||||||
|
docStoreConn = rag.utils.ob_conn.OBConnection()
|
||||||
else:
|
else:
|
||||||
raise Exception(f"Not supported doc engine: {DOC_ENGINE}")
|
raise Exception(f"Not supported doc engine: {DOC_ENGINE}")
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -35,6 +35,12 @@ def num_tokens_from_string(string: str) -> int:
|
||||||
return 0
|
return 0
|
||||||
|
|
||||||
def total_token_count_from_response(resp):
|
def total_token_count_from_response(resp):
|
||||||
|
"""
|
||||||
|
Extract token count from LLM response in various formats.
|
||||||
|
|
||||||
|
Handles None responses and different response structures from various LLM providers.
|
||||||
|
Returns 0 if token count cannot be determined.
|
||||||
|
"""
|
||||||
if resp is None:
|
if resp is None:
|
||||||
return 0
|
return 0
|
||||||
|
|
||||||
|
|
@ -50,19 +56,19 @@ def total_token_count_from_response(resp):
|
||||||
except Exception:
|
except Exception:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
if 'usage' in resp and 'total_tokens' in resp['usage']:
|
if isinstance(resp, dict) and 'usage' in resp and 'total_tokens' in resp['usage']:
|
||||||
try:
|
try:
|
||||||
return resp["usage"]["total_tokens"]
|
return resp["usage"]["total_tokens"]
|
||||||
except Exception:
|
except Exception:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
if 'usage' in resp and 'input_tokens' in resp['usage'] and 'output_tokens' in resp['usage']:
|
if isinstance(resp, dict) and 'usage' in resp and 'input_tokens' in resp['usage'] and 'output_tokens' in resp['usage']:
|
||||||
try:
|
try:
|
||||||
return resp["usage"]["input_tokens"] + resp["usage"]["output_tokens"]
|
return resp["usage"]["input_tokens"] + resp["usage"]["output_tokens"]
|
||||||
except Exception:
|
except Exception:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
if 'meta' in resp and 'tokens' in resp['meta'] and 'input_tokens' in resp['meta']['tokens'] and 'output_tokens' in resp['meta']['tokens']:
|
if isinstance(resp, dict) and 'meta' in resp and 'tokens' in resp['meta'] and 'input_tokens' in resp['meta']['tokens'] and 'output_tokens' in resp['meta']['tokens']:
|
||||||
try:
|
try:
|
||||||
return resp["meta"]["tokens"]["input_tokens"] + resp["meta"]["tokens"]["output_tokens"]
|
return resp["meta"]["tokens"]["input_tokens"] + resp["meta"]["tokens"]["output_tokens"]
|
||||||
except Exception:
|
except Exception:
|
||||||
|
|
|
||||||
|
|
@ -4848,7 +4848,7 @@
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"name": "JieKou.AI",
|
"name": "Jiekou.AI",
|
||||||
"logo": "",
|
"logo": "",
|
||||||
"tags": "LLM,TEXT EMBEDDING,TEXT RE-RANK",
|
"tags": "LLM,TEXT EMBEDDING,TEXT RE-RANK",
|
||||||
"status": "1",
|
"status": "1",
|
||||||
|
|
|
||||||
|
|
@ -28,6 +28,14 @@ os:
|
||||||
infinity:
|
infinity:
|
||||||
uri: 'localhost:23817'
|
uri: 'localhost:23817'
|
||||||
db_name: 'default_db'
|
db_name: 'default_db'
|
||||||
|
oceanbase:
|
||||||
|
scheme: 'oceanbase' # set 'mysql' to create connection using mysql config
|
||||||
|
config:
|
||||||
|
db_name: 'test'
|
||||||
|
user: 'root@ragflow'
|
||||||
|
password: 'infini_rag_flow'
|
||||||
|
host: 'localhost'
|
||||||
|
port: 2881
|
||||||
redis:
|
redis:
|
||||||
db: 1
|
db: 1
|
||||||
password: 'infini_rag_flow'
|
password: 'infini_rag_flow'
|
||||||
|
|
@ -139,5 +147,3 @@ user_default_llm:
|
||||||
# secret_id: 'tencent_secret_id'
|
# secret_id: 'tencent_secret_id'
|
||||||
# secret_key: 'tencent_secret_key'
|
# secret_key: 'tencent_secret_key'
|
||||||
# region: 'tencent_region'
|
# region: 'tencent_region'
|
||||||
# table_result_type: '1'
|
|
||||||
# markdown_image_response_type: '1'
|
|
||||||
|
|
|
||||||
|
|
@ -192,12 +192,16 @@ class TencentCloudAPIClient:
|
||||||
|
|
||||||
|
|
||||||
class TCADPParser(RAGFlowPdfParser):
|
class TCADPParser(RAGFlowPdfParser):
|
||||||
def __init__(self, secret_id: str = None, secret_key: str = None, region: str = "ap-guangzhou"):
|
def __init__(self, secret_id: str = None, secret_key: str = None, region: str = "ap-guangzhou",
|
||||||
|
table_result_type: str = None, markdown_image_response_type: str = None):
|
||||||
super().__init__()
|
super().__init__()
|
||||||
|
|
||||||
# First initialize logger
|
# First initialize logger
|
||||||
self.logger = logging.getLogger(self.__class__.__name__)
|
self.logger = logging.getLogger(self.__class__.__name__)
|
||||||
|
|
||||||
|
# Log received parameters
|
||||||
|
self.logger.info(f"[TCADP] Initializing with parameters - table_result_type: {table_result_type}, markdown_image_response_type: {markdown_image_response_type}")
|
||||||
|
|
||||||
# Priority: read configuration from RAGFlow configuration system (service_conf.yaml)
|
# Priority: read configuration from RAGFlow configuration system (service_conf.yaml)
|
||||||
try:
|
try:
|
||||||
tcadp_parser = get_base_config("tcadp_config", {})
|
tcadp_parser = get_base_config("tcadp_config", {})
|
||||||
|
|
@ -205,14 +209,30 @@ class TCADPParser(RAGFlowPdfParser):
|
||||||
self.secret_id = secret_id or tcadp_parser.get("secret_id")
|
self.secret_id = secret_id or tcadp_parser.get("secret_id")
|
||||||
self.secret_key = secret_key or tcadp_parser.get("secret_key")
|
self.secret_key = secret_key or tcadp_parser.get("secret_key")
|
||||||
self.region = region or tcadp_parser.get("region", "ap-guangzhou")
|
self.region = region or tcadp_parser.get("region", "ap-guangzhou")
|
||||||
self.table_result_type = tcadp_parser.get("table_result_type", "1")
|
# Set table_result_type and markdown_image_response_type from config or parameters
|
||||||
self.markdown_image_response_type = tcadp_parser.get("markdown_image_response_type", "1")
|
self.table_result_type = table_result_type if table_result_type is not None else tcadp_parser.get("table_result_type", "1")
|
||||||
self.logger.info("[TCADP] Configuration read from service_conf.yaml")
|
self.markdown_image_response_type = markdown_image_response_type if markdown_image_response_type is not None else tcadp_parser.get("markdown_image_response_type", "1")
|
||||||
|
|
||||||
else:
|
else:
|
||||||
self.logger.error("[TCADP] Please configure tcadp_config in service_conf.yaml first")
|
self.logger.error("[TCADP] Please configure tcadp_config in service_conf.yaml first")
|
||||||
|
# If config file is empty, use provided parameters or defaults
|
||||||
|
self.secret_id = secret_id
|
||||||
|
self.secret_key = secret_key
|
||||||
|
self.region = region or "ap-guangzhou"
|
||||||
|
self.table_result_type = table_result_type if table_result_type is not None else "1"
|
||||||
|
self.markdown_image_response_type = markdown_image_response_type if markdown_image_response_type is not None else "1"
|
||||||
|
|
||||||
except ImportError:
|
except ImportError:
|
||||||
self.logger.info("[TCADP] Configuration module import failed")
|
self.logger.info("[TCADP] Configuration module import failed")
|
||||||
|
# If config file is not available, use provided parameters or defaults
|
||||||
|
self.secret_id = secret_id
|
||||||
|
self.secret_key = secret_key
|
||||||
|
self.region = region or "ap-guangzhou"
|
||||||
|
self.table_result_type = table_result_type if table_result_type is not None else "1"
|
||||||
|
self.markdown_image_response_type = markdown_image_response_type if markdown_image_response_type is not None else "1"
|
||||||
|
|
||||||
|
# Log final values
|
||||||
|
self.logger.info(f"[TCADP] Final values - table_result_type: {self.table_result_type}, markdown_image_response_type: {self.markdown_image_response_type}")
|
||||||
|
|
||||||
if not self.secret_id or not self.secret_key:
|
if not self.secret_id or not self.secret_key:
|
||||||
raise ValueError("[TCADP] Please set Tencent Cloud API keys, configure tcadp_config in service_conf.yaml")
|
raise ValueError("[TCADP] Please set Tencent Cloud API keys, configure tcadp_config in service_conf.yaml")
|
||||||
|
|
@ -400,6 +420,8 @@ class TCADPParser(RAGFlowPdfParser):
|
||||||
"TableResultType": self.table_result_type,
|
"TableResultType": self.table_result_type,
|
||||||
"MarkdownImageResponseType": self.markdown_image_response_type
|
"MarkdownImageResponseType": self.markdown_image_response_type
|
||||||
}
|
}
|
||||||
|
|
||||||
|
self.logger.info(f"[TCADP] API request config - TableResultType: {self.table_result_type}, MarkdownImageResponseType: {self.markdown_image_response_type}")
|
||||||
|
|
||||||
result = client.reconstruct_document_sse(
|
result = client.reconstruct_document_sse(
|
||||||
file_type=file_type,
|
file_type=file_type,
|
||||||
|
|
|
||||||
22
docker/.env
22
docker/.env
|
|
@ -7,6 +7,7 @@
|
||||||
# Available options:
|
# Available options:
|
||||||
# - `elasticsearch` (default)
|
# - `elasticsearch` (default)
|
||||||
# - `infinity` (https://github.com/infiniflow/infinity)
|
# - `infinity` (https://github.com/infiniflow/infinity)
|
||||||
|
# - `oceanbase` (https://github.com/oceanbase/oceanbase)
|
||||||
# - `opensearch` (https://github.com/opensearch-project/OpenSearch)
|
# - `opensearch` (https://github.com/opensearch-project/OpenSearch)
|
||||||
DOC_ENGINE=${DOC_ENGINE:-elasticsearch}
|
DOC_ENGINE=${DOC_ENGINE:-elasticsearch}
|
||||||
|
|
||||||
|
|
@ -62,6 +63,27 @@ INFINITY_THRIFT_PORT=23817
|
||||||
INFINITY_HTTP_PORT=23820
|
INFINITY_HTTP_PORT=23820
|
||||||
INFINITY_PSQL_PORT=5432
|
INFINITY_PSQL_PORT=5432
|
||||||
|
|
||||||
|
# The hostname where the OceanBase service is exposed
|
||||||
|
OCEANBASE_HOST=oceanbase
|
||||||
|
# The port used to expose the OceanBase service
|
||||||
|
OCEANBASE_PORT=2881
|
||||||
|
# The username for OceanBase
|
||||||
|
OCEANBASE_USER=root@ragflow
|
||||||
|
# The password for OceanBase
|
||||||
|
OCEANBASE_PASSWORD=infini_rag_flow
|
||||||
|
# The doc database of the OceanBase service to use
|
||||||
|
OCEANBASE_DOC_DBNAME=ragflow_doc
|
||||||
|
|
||||||
|
# OceanBase container configuration
|
||||||
|
OB_CLUSTER_NAME=${OB_CLUSTER_NAME:-ragflow}
|
||||||
|
OB_TENANT_NAME=${OB_TENANT_NAME:-ragflow}
|
||||||
|
OB_SYS_PASSWORD=${OCEANBASE_PASSWORD:-infini_rag_flow}
|
||||||
|
OB_TENANT_PASSWORD=${OCEANBASE_PASSWORD:-infini_rag_flow}
|
||||||
|
OB_MEMORY_LIMIT=${OB_MEMORY_LIMIT:-10G}
|
||||||
|
OB_SYSTEM_MEMORY=${OB_SYSTEM_MEMORY:-2G}
|
||||||
|
OB_DATAFILE_SIZE=${OB_DATAFILE_SIZE:-20G}
|
||||||
|
OB_LOG_DISK_SIZE=${OB_LOG_DISK_SIZE:-20G}
|
||||||
|
|
||||||
# The password for MySQL.
|
# The password for MySQL.
|
||||||
MYSQL_PASSWORD=infini_rag_flow
|
MYSQL_PASSWORD=infini_rag_flow
|
||||||
# The hostname where the MySQL service is exposed
|
# The hostname where the MySQL service is exposed
|
||||||
|
|
|
||||||
|
|
@ -138,6 +138,15 @@ The [.env](./.env) file contains important environment variables for Docker.
|
||||||
- `password`: The password for MinIO.
|
- `password`: The password for MinIO.
|
||||||
- `host`: The MinIO serving IP *and* port inside the Docker container. Defaults to `minio:9000`.
|
- `host`: The MinIO serving IP *and* port inside the Docker container. Defaults to `minio:9000`.
|
||||||
|
|
||||||
|
- `oceanbase`
|
||||||
|
- `scheme`: The connection scheme. Set to `mysql` to use mysql config, or other values to use config below.
|
||||||
|
- `config`:
|
||||||
|
- `db_name`: The OceanBase database name.
|
||||||
|
- `user`: The username for OceanBase.
|
||||||
|
- `password`: The password for OceanBase.
|
||||||
|
- `host`: The hostname of the OceanBase service.
|
||||||
|
- `port`: The port of OceanBase.
|
||||||
|
|
||||||
- `oss`
|
- `oss`
|
||||||
- `access_key`: The access key ID used to authenticate requests to the OSS service.
|
- `access_key`: The access key ID used to authenticate requests to the OSS service.
|
||||||
- `secret_key`: The secret access key used to authenticate requests to the OSS service.
|
- `secret_key`: The secret access key used to authenticate requests to the OSS service.
|
||||||
|
|
|
||||||
|
|
@ -72,7 +72,7 @@ services:
|
||||||
infinity:
|
infinity:
|
||||||
profiles:
|
profiles:
|
||||||
- infinity
|
- infinity
|
||||||
image: infiniflow/infinity:v0.6.5
|
image: infiniflow/infinity:v0.6.6
|
||||||
volumes:
|
volumes:
|
||||||
- infinity_data:/var/infinity
|
- infinity_data:/var/infinity
|
||||||
- ./infinity_conf.toml:/infinity_conf.toml
|
- ./infinity_conf.toml:/infinity_conf.toml
|
||||||
|
|
@ -96,6 +96,31 @@ services:
|
||||||
retries: 120
|
retries: 120
|
||||||
restart: on-failure
|
restart: on-failure
|
||||||
|
|
||||||
|
oceanbase:
|
||||||
|
profiles:
|
||||||
|
- oceanbase
|
||||||
|
image: oceanbase/oceanbase-ce:4.4.1.0-100000032025101610
|
||||||
|
volumes:
|
||||||
|
- ./oceanbase/data:/root/ob
|
||||||
|
- ./oceanbase/conf:/root/.obd/cluster
|
||||||
|
- ./oceanbase/init.d:/root/boot/init.d
|
||||||
|
ports:
|
||||||
|
- ${OCEANBASE_PORT:-2881}:2881
|
||||||
|
env_file: .env
|
||||||
|
environment:
|
||||||
|
- MODE=normal
|
||||||
|
- OB_SERVER_IP=127.0.0.1
|
||||||
|
mem_limit: ${MEM_LIMIT}
|
||||||
|
healthcheck:
|
||||||
|
test: [ 'CMD-SHELL', 'obclient -h127.0.0.1 -P2881 -uroot@${OB_TENANT_NAME:-ragflow} -p${OB_TENANT_PASSWORD:-infini_rag_flow} -e "CREATE DATABASE IF NOT EXISTS ${OCEANBASE_DOC_DBNAME:-ragflow_doc};"' ]
|
||||||
|
interval: 10s
|
||||||
|
retries: 30
|
||||||
|
start_period: 30s
|
||||||
|
timeout: 10s
|
||||||
|
networks:
|
||||||
|
- ragflow
|
||||||
|
restart: on-failure
|
||||||
|
|
||||||
sandbox-executor-manager:
|
sandbox-executor-manager:
|
||||||
profiles:
|
profiles:
|
||||||
- sandbox
|
- sandbox
|
||||||
|
|
@ -154,7 +179,7 @@ services:
|
||||||
|
|
||||||
minio:
|
minio:
|
||||||
image: quay.io/minio/minio:RELEASE.2025-06-13T11-33-47Z
|
image: quay.io/minio/minio:RELEASE.2025-06-13T11-33-47Z
|
||||||
command: server --console-address ":9001" /data
|
command: ["server", "--console-address", ":9001", "/data"]
|
||||||
ports:
|
ports:
|
||||||
- ${MINIO_PORT}:9000
|
- ${MINIO_PORT}:9000
|
||||||
- ${MINIO_CONSOLE_PORT}:9001
|
- ${MINIO_CONSOLE_PORT}:9001
|
||||||
|
|
@ -176,7 +201,7 @@ services:
|
||||||
redis:
|
redis:
|
||||||
# swr.cn-north-4.myhuaweicloud.com/ddn-k8s/docker.io/valkey/valkey:8
|
# swr.cn-north-4.myhuaweicloud.com/ddn-k8s/docker.io/valkey/valkey:8
|
||||||
image: valkey/valkey:8
|
image: valkey/valkey:8
|
||||||
command: redis-server --requirepass ${REDIS_PASSWORD} --maxmemory 128mb --maxmemory-policy allkeys-lru
|
command: ["redis-server", "--requirepass", "${REDIS_PASSWORD}", "--maxmemory", "128mb", "--maxmemory-policy", "allkeys-lru"]
|
||||||
env_file: .env
|
env_file: .env
|
||||||
ports:
|
ports:
|
||||||
- ${REDIS_PORT}:6379
|
- ${REDIS_PORT}:6379
|
||||||
|
|
@ -256,6 +281,8 @@ volumes:
|
||||||
driver: local
|
driver: local
|
||||||
infinity_data:
|
infinity_data:
|
||||||
driver: local
|
driver: local
|
||||||
|
ob_data:
|
||||||
|
driver: local
|
||||||
mysql_data:
|
mysql_data:
|
||||||
driver: local
|
driver: local
|
||||||
minio_data:
|
minio_data:
|
||||||
|
|
|
||||||
|
|
@ -1,5 +1,5 @@
|
||||||
[general]
|
[general]
|
||||||
version = "0.6.5"
|
version = "0.6.6"
|
||||||
time_zone = "utc-8"
|
time_zone = "utc-8"
|
||||||
|
|
||||||
[network]
|
[network]
|
||||||
|
|
|
||||||
1
docker/oceanbase/init.d/vec_memory.sql
Normal file
1
docker/oceanbase/init.d/vec_memory.sql
Normal file
|
|
@ -0,0 +1 @@
|
||||||
|
ALTER SYSTEM SET ob_vector_memory_limit_percentage = 30;
|
||||||
|
|
@ -28,6 +28,14 @@ os:
|
||||||
infinity:
|
infinity:
|
||||||
uri: '${INFINITY_HOST:-infinity}:23817'
|
uri: '${INFINITY_HOST:-infinity}:23817'
|
||||||
db_name: 'default_db'
|
db_name: 'default_db'
|
||||||
|
oceanbase:
|
||||||
|
scheme: 'oceanbase' # set 'mysql' to create connection using mysql config
|
||||||
|
config:
|
||||||
|
db_name: '${OCEANBASE_DOC_DBNAME:-test}'
|
||||||
|
user: '${OCEANBASE_USER:-root@ragflow}'
|
||||||
|
password: '${OCEANBASE_PASSWORD:-infini_rag_flow}'
|
||||||
|
host: '${OCEANBASE_HOST:-oceanbase}'
|
||||||
|
port: ${OCEANBASE_PORT:-2881}
|
||||||
redis:
|
redis:
|
||||||
db: 1
|
db: 1
|
||||||
password: '${REDIS_PASSWORD:-infini_rag_flow}'
|
password: '${REDIS_PASSWORD:-infini_rag_flow}'
|
||||||
|
|
@ -142,5 +150,3 @@ user_default_llm:
|
||||||
# secret_id: '${TENCENT_SECRET_ID}'
|
# secret_id: '${TENCENT_SECRET_ID}'
|
||||||
# secret_key: '${TENCENT_SECRET_KEY}'
|
# secret_key: '${TENCENT_SECRET_KEY}'
|
||||||
# region: '${TENCENT_REGION}'
|
# region: '${TENCENT_REGION}'
|
||||||
# table_result_type: '1'
|
|
||||||
# markdown_image_response_type: '1'
|
|
||||||
|
|
|
||||||
|
|
@ -7,25 +7,29 @@ slug: /release_notes
|
||||||
|
|
||||||
Key features, improvements and bug fixes in the latest releases.
|
Key features, improvements and bug fixes in the latest releases.
|
||||||
|
|
||||||
|
## v0.22.1
|
||||||
|
|
||||||
Released on November 19, 2025.
|
Released on November 19, 2025.
|
||||||
|
|
||||||
### Bug Fixes
|
|
||||||
|
|
||||||
- **Knowledge Base Embedding Models**: Fixed an issue where knowledge base embedding models became unavailable since v0.22.0.
|
|
||||||
- **Document Parsing**: Fixing image merging issues.
|
|
||||||
- **Chat History**: Fixed a bug where images and text were not correctly displayed together in historical chat records.
|
|
||||||
|
|
||||||
### Improvements
|
### Improvements
|
||||||
|
|
||||||
- **Agent**:
|
- Agent:
|
||||||
- Added support for exporting Agent outputs in Word formats.
|
- Supports exporting Agent outputs in Word or Markdown formats.
|
||||||
- Introduced new list operations and enhanced the **Variable Aggregator** component capabilities.
|
- Adds a **List operations** component.
|
||||||
- **Data Sources**:
|
- Adds a **Variable aggregator** component.
|
||||||
- Expanded data source support to include S3-compatible storage services.
|
- Data sources:
|
||||||
- Added new integration support for JIRA.
|
- Supports S3-compatible data sources, e.g., MinIO.
|
||||||
- **User Profile**: Optimized and beautified the layout of the personal center interface.
|
- Adds data synchronization with JIRA.
|
||||||
|
- Continues the redesign of the **Profile** page layouts.
|
||||||
|
- Upgrades the Flask web framework from synchronous to asynchronous, increasing concurrency and preventing blocking issues caused when requesting upstream LLM services.
|
||||||
|
|
||||||
### Support new models
|
### Fixed issues
|
||||||
|
|
||||||
|
- A v0.22.0 issue: Users failed to parse uploaded files or switch embedding model in a dataset containing parsed files using a built-in model from a `-full` RAGFlow edition.
|
||||||
|
- Image concatenated in Word documents. [#11310](https://github.com/infiniflow/ragflow/pull/11310)
|
||||||
|
- Mixed images and text were not correctly displayed in the chat history.
|
||||||
|
|
||||||
|
### Newly supported models
|
||||||
|
|
||||||
- Gemini 3 Pro Preview
|
- Gemini 3 Pro Preview
|
||||||
|
|
||||||
|
|
@ -99,7 +103,7 @@ Released on October 15, 2025.
|
||||||
- Redesigns RAGFlow's Login and Registration pages.
|
- Redesigns RAGFlow's Login and Registration pages.
|
||||||
- Upgrades RAGFlow's document engine Infinity to v0.6.0.
|
- Upgrades RAGFlow's document engine Infinity to v0.6.0.
|
||||||
|
|
||||||
### Support new models
|
### Newly supported models
|
||||||
|
|
||||||
- Tongyi Qwen 3 series
|
- Tongyi Qwen 3 series
|
||||||
- Claude Sonnet 4.5
|
- Claude Sonnet 4.5
|
||||||
|
|
@ -122,7 +126,7 @@ Released on September 10, 2025.
|
||||||
- **Execute SQL** component enhanced: Replaces the original variable reference component with a text input field, allowing users to write free-form SQL queries and reference variables. See [here](./guides/agent/agent_component_reference/execute_sql.md).
|
- **Execute SQL** component enhanced: Replaces the original variable reference component with a text input field, allowing users to write free-form SQL queries and reference variables. See [here](./guides/agent/agent_component_reference/execute_sql.md).
|
||||||
- Chat: Re-enables **Reasoning** and **Cross-language search**.
|
- Chat: Re-enables **Reasoning** and **Cross-language search**.
|
||||||
|
|
||||||
### Support new models
|
### Newly supported models
|
||||||
|
|
||||||
- Meituan LongCat
|
- Meituan LongCat
|
||||||
- Kimi: kimi-k2-turbo-preview and kimi-k2-0905-preview
|
- Kimi: kimi-k2-turbo-preview and kimi-k2-0905-preview
|
||||||
|
|
@ -161,7 +165,7 @@ Released on August 27, 2025.
|
||||||
- Improves Markdown file parsing, with AST support to avoid unintended chunking.
|
- Improves Markdown file parsing, with AST support to avoid unintended chunking.
|
||||||
- Enhances HTML parsing, supporting bs4-based HTML tag traversal.
|
- Enhances HTML parsing, supporting bs4-based HTML tag traversal.
|
||||||
|
|
||||||
### Support new models
|
### Newly supported models
|
||||||
|
|
||||||
ZHIPU GLM-4.5
|
ZHIPU GLM-4.5
|
||||||
|
|
||||||
|
|
@ -222,7 +226,7 @@ Released on August 8, 2025.
|
||||||
- The **Retrieval** component now supports the dynamic specification of dataset names using variables.
|
- The **Retrieval** component now supports the dynamic specification of dataset names using variables.
|
||||||
- The user interface now includes a French language option.
|
- The user interface now includes a French language option.
|
||||||
|
|
||||||
### Support new models
|
### Newly supported models
|
||||||
|
|
||||||
- GPT-5
|
- GPT-5
|
||||||
- Claude 4.1
|
- Claude 4.1
|
||||||
|
|
@ -286,7 +290,7 @@ Released on June 23, 2025.
|
||||||
- Added support for models installed via Ollama or VLLM when creating a dataset through the API. [#8069](https://github.com/infiniflow/ragflow/pull/8069)
|
- Added support for models installed via Ollama or VLLM when creating a dataset through the API. [#8069](https://github.com/infiniflow/ragflow/pull/8069)
|
||||||
- Enabled role-based authentication for S3 bucket access. [#8149](https://github.com/infiniflow/ragflow/pull/8149)
|
- Enabled role-based authentication for S3 bucket access. [#8149](https://github.com/infiniflow/ragflow/pull/8149)
|
||||||
|
|
||||||
### Support new models
|
### Newly supported models
|
||||||
|
|
||||||
- Qwen 3 Embedding. [#8184](https://github.com/infiniflow/ragflow/pull/8184)
|
- Qwen 3 Embedding. [#8184](https://github.com/infiniflow/ragflow/pull/8184)
|
||||||
- Voyage Multimodal 3. [#7987](https://github.com/infiniflow/ragflow/pull/7987)
|
- Voyage Multimodal 3. [#7987](https://github.com/infiniflow/ragflow/pull/7987)
|
||||||
|
|
|
||||||
|
|
@ -96,7 +96,7 @@ ragflow:
|
||||||
infinity:
|
infinity:
|
||||||
image:
|
image:
|
||||||
repository: infiniflow/infinity
|
repository: infiniflow/infinity
|
||||||
tag: v0.6.5
|
tag: v0.6.6
|
||||||
pullPolicy: IfNotPresent
|
pullPolicy: IfNotPresent
|
||||||
pullSecrets: []
|
pullSecrets: []
|
||||||
storage:
|
storage:
|
||||||
|
|
|
||||||
|
|
@ -49,7 +49,7 @@ dependencies = [
|
||||||
"html-text==0.6.2",
|
"html-text==0.6.2",
|
||||||
"httpx[socks]>=0.28.1,<0.29.0",
|
"httpx[socks]>=0.28.1,<0.29.0",
|
||||||
"huggingface-hub>=0.25.0,<0.26.0",
|
"huggingface-hub>=0.25.0,<0.26.0",
|
||||||
"infinity-sdk==0.6.5",
|
"infinity-sdk==0.6.6",
|
||||||
"infinity-emb>=0.0.66,<0.0.67",
|
"infinity-emb>=0.0.66,<0.0.67",
|
||||||
"itsdangerous==2.1.2",
|
"itsdangerous==2.1.2",
|
||||||
"json-repair==0.35.0",
|
"json-repair==0.35.0",
|
||||||
|
|
@ -149,6 +149,7 @@ dependencies = [
|
||||||
"captcha>=0.7.1",
|
"captcha>=0.7.1",
|
||||||
"pip>=25.2",
|
"pip>=25.2",
|
||||||
"pypandoc>=1.16",
|
"pypandoc>=1.16",
|
||||||
|
"pyobvector==0.2.18",
|
||||||
]
|
]
|
||||||
|
|
||||||
[dependency-groups]
|
[dependency-groups]
|
||||||
|
|
|
||||||
|
|
@ -116,7 +116,7 @@ def by_plaintext(filename, binary=None, from_page=0, to_page=100000, callback=No
|
||||||
else:
|
else:
|
||||||
vision_model = LLMBundle(kwargs["tenant_id"], LLMType.IMAGE2TEXT, llm_name=kwargs.get("layout_recognizer", ""), lang=kwargs.get("lang", "Chinese"))
|
vision_model = LLMBundle(kwargs["tenant_id"], LLMType.IMAGE2TEXT, llm_name=kwargs.get("layout_recognizer", ""), lang=kwargs.get("lang", "Chinese"))
|
||||||
pdf_parser = VisionParser(vision_model=vision_model, **kwargs)
|
pdf_parser = VisionParser(vision_model=vision_model, **kwargs)
|
||||||
|
|
||||||
sections, tables = pdf_parser(
|
sections, tables = pdf_parser(
|
||||||
filename if not binary else binary,
|
filename if not binary else binary,
|
||||||
from_page=from_page,
|
from_page=from_page,
|
||||||
|
|
@ -504,7 +504,7 @@ class Markdown(MarkdownParser):
|
||||||
|
|
||||||
return images if images else None
|
return images if images else None
|
||||||
|
|
||||||
def __call__(self, filename, binary=None, separate_tables=True,delimiter=None):
|
def __call__(self, filename, binary=None, separate_tables=True, delimiter=None):
|
||||||
if binary:
|
if binary:
|
||||||
encoding = find_codec(binary)
|
encoding = find_codec(binary)
|
||||||
txt = binary.decode(encoding, errors="ignore")
|
txt = binary.decode(encoding, errors="ignore")
|
||||||
|
|
@ -602,7 +602,7 @@ def chunk(filename, binary=None, from_page=0, to_page=100000,
|
||||||
_SerializedRelationships.load_from_xml = load_from_xml_v2
|
_SerializedRelationships.load_from_xml = load_from_xml_v2
|
||||||
sections, tables = Docx()(filename, binary)
|
sections, tables = Docx()(filename, binary)
|
||||||
|
|
||||||
tables=vision_figure_parser_docx_wrapper(sections=sections,tbls=tables,callback=callback,**kwargs)
|
tables = vision_figure_parser_docx_wrapper(sections=sections, tbls=tables, callback=callback, **kwargs)
|
||||||
|
|
||||||
res = tokenize_table(tables, doc, is_english)
|
res = tokenize_table(tables, doc, is_english)
|
||||||
callback(0.8, "Finish parsing.")
|
callback(0.8, "Finish parsing.")
|
||||||
|
|
@ -653,18 +653,47 @@ def chunk(filename, binary=None, from_page=0, to_page=100000,
|
||||||
|
|
||||||
if name in ["tcadp", "docling", "mineru"]:
|
if name in ["tcadp", "docling", "mineru"]:
|
||||||
parser_config["chunk_token_num"] = 0
|
parser_config["chunk_token_num"] = 0
|
||||||
|
|
||||||
res = tokenize_table(tables, doc, is_english)
|
res = tokenize_table(tables, doc, is_english)
|
||||||
callback(0.8, "Finish parsing.")
|
callback(0.8, "Finish parsing.")
|
||||||
|
|
||||||
elif re.search(r"\.(csv|xlsx?)$", filename, re.IGNORECASE):
|
elif re.search(r"\.(csv|xlsx?)$", filename, re.IGNORECASE):
|
||||||
callback(0.1, "Start to parse.")
|
callback(0.1, "Start to parse.")
|
||||||
excel_parser = ExcelParser()
|
|
||||||
if parser_config.get("html4excel"):
|
# Check if tcadp_parser is selected for spreadsheet files
|
||||||
sections = [(_, "") for _ in excel_parser.html(binary, 12) if _]
|
layout_recognizer = parser_config.get("layout_recognize", "DeepDOC")
|
||||||
|
if layout_recognizer == "TCADP Parser":
|
||||||
|
table_result_type = parser_config.get("table_result_type", "1")
|
||||||
|
markdown_image_response_type = parser_config.get("markdown_image_response_type", "1")
|
||||||
|
tcadp_parser = TCADPParser(
|
||||||
|
table_result_type=table_result_type,
|
||||||
|
markdown_image_response_type=markdown_image_response_type
|
||||||
|
)
|
||||||
|
if not tcadp_parser.check_installation():
|
||||||
|
callback(-1, "TCADP parser not available. Please check Tencent Cloud API configuration.")
|
||||||
|
return res
|
||||||
|
|
||||||
|
# Determine file type based on extension
|
||||||
|
file_type = "XLSX" if re.search(r"\.xlsx?$", filename, re.IGNORECASE) else "CSV"
|
||||||
|
|
||||||
|
sections, tables = tcadp_parser.parse_pdf(
|
||||||
|
filepath=filename,
|
||||||
|
binary=binary,
|
||||||
|
callback=callback,
|
||||||
|
output_dir=os.environ.get("TCADP_OUTPUT_DIR", ""),
|
||||||
|
file_type=file_type
|
||||||
|
)
|
||||||
|
parser_config["chunk_token_num"] = 0
|
||||||
|
res = tokenize_table(tables, doc, is_english)
|
||||||
|
callback(0.8, "Finish parsing.")
|
||||||
else:
|
else:
|
||||||
sections = [(_, "") for _ in excel_parser(binary) if _]
|
# Default DeepDOC parser
|
||||||
parser_config["chunk_token_num"] = 12800
|
excel_parser = ExcelParser()
|
||||||
|
if parser_config.get("html4excel"):
|
||||||
|
sections = [(_, "") for _ in excel_parser.html(binary, 12) if _]
|
||||||
|
else:
|
||||||
|
sections = [(_, "") for _ in excel_parser(binary) if _]
|
||||||
|
parser_config["chunk_token_num"] = 12800
|
||||||
|
|
||||||
elif re.search(r"\.(txt|py|js|java|c|cpp|h|php|go|ts|sh|cs|kt|sql)$", filename, re.IGNORECASE):
|
elif re.search(r"\.(txt|py|js|java|c|cpp|h|php|go|ts|sh|cs|kt|sql)$", filename, re.IGNORECASE):
|
||||||
callback(0.1, "Start to parse.")
|
callback(0.1, "Start to parse.")
|
||||||
|
|
@ -676,7 +705,7 @@ def chunk(filename, binary=None, from_page=0, to_page=100000,
|
||||||
elif re.search(r"\.(md|markdown)$", filename, re.IGNORECASE):
|
elif re.search(r"\.(md|markdown)$", filename, re.IGNORECASE):
|
||||||
callback(0.1, "Start to parse.")
|
callback(0.1, "Start to parse.")
|
||||||
markdown_parser = Markdown(int(parser_config.get("chunk_token_num", 128)))
|
markdown_parser = Markdown(int(parser_config.get("chunk_token_num", 128)))
|
||||||
sections, tables = markdown_parser(filename, binary, separate_tables=False,delimiter=parser_config.get("delimiter", "\n!?;。;!?"))
|
sections, tables = markdown_parser(filename, binary, separate_tables=False, delimiter=parser_config.get("delimiter", "\n!?;。;!?"))
|
||||||
|
|
||||||
try:
|
try:
|
||||||
vision_model = LLMBundle(kwargs["tenant_id"], LLMType.IMAGE2TEXT)
|
vision_model = LLMBundle(kwargs["tenant_id"], LLMType.IMAGE2TEXT)
|
||||||
|
|
|
||||||
|
|
@ -16,6 +16,7 @@ import io
|
||||||
import json
|
import json
|
||||||
import os
|
import os
|
||||||
import random
|
import random
|
||||||
|
import re
|
||||||
from functools import partial
|
from functools import partial
|
||||||
|
|
||||||
import trio
|
import trio
|
||||||
|
|
@ -83,6 +84,7 @@ class ParserParam(ProcessParamBase):
|
||||||
"output_format": "json",
|
"output_format": "json",
|
||||||
},
|
},
|
||||||
"spreadsheet": {
|
"spreadsheet": {
|
||||||
|
"parse_method": "deepdoc", # deepdoc/tcadp_parser
|
||||||
"output_format": "html",
|
"output_format": "html",
|
||||||
"suffix": [
|
"suffix": [
|
||||||
"xls",
|
"xls",
|
||||||
|
|
@ -102,8 +104,10 @@ class ParserParam(ProcessParamBase):
|
||||||
"output_format": "json",
|
"output_format": "json",
|
||||||
},
|
},
|
||||||
"slides": {
|
"slides": {
|
||||||
|
"parse_method": "deepdoc", # deepdoc/tcadp_parser
|
||||||
"suffix": [
|
"suffix": [
|
||||||
"pptx",
|
"pptx",
|
||||||
|
"ppt"
|
||||||
],
|
],
|
||||||
"output_format": "json",
|
"output_format": "json",
|
||||||
},
|
},
|
||||||
|
|
@ -245,7 +249,12 @@ class Parser(ProcessBase):
|
||||||
bboxes.append(box)
|
bboxes.append(box)
|
||||||
elif conf.get("parse_method").lower() == "tcadp parser":
|
elif conf.get("parse_method").lower() == "tcadp parser":
|
||||||
# ADP is a document parsing tool using Tencent Cloud API
|
# ADP is a document parsing tool using Tencent Cloud API
|
||||||
tcadp_parser = TCADPParser()
|
table_result_type = conf.get("table_result_type", "1")
|
||||||
|
markdown_image_response_type = conf.get("markdown_image_response_type", "1")
|
||||||
|
tcadp_parser = TCADPParser(
|
||||||
|
table_result_type=table_result_type,
|
||||||
|
markdown_image_response_type=markdown_image_response_type
|
||||||
|
)
|
||||||
sections, _ = tcadp_parser.parse_pdf(
|
sections, _ = tcadp_parser.parse_pdf(
|
||||||
filepath=name,
|
filepath=name,
|
||||||
binary=blob,
|
binary=blob,
|
||||||
|
|
@ -301,14 +310,86 @@ class Parser(ProcessBase):
|
||||||
self.callback(random.randint(1, 5) / 100.0, "Start to work on a Spreadsheet.")
|
self.callback(random.randint(1, 5) / 100.0, "Start to work on a Spreadsheet.")
|
||||||
conf = self._param.setups["spreadsheet"]
|
conf = self._param.setups["spreadsheet"]
|
||||||
self.set_output("output_format", conf["output_format"])
|
self.set_output("output_format", conf["output_format"])
|
||||||
spreadsheet_parser = ExcelParser()
|
|
||||||
if conf.get("output_format") == "html":
|
parse_method = conf.get("parse_method", "deepdoc")
|
||||||
htmls = spreadsheet_parser.html(blob, 1000000000)
|
|
||||||
self.set_output("html", htmls[0])
|
# Handle TCADP parser
|
||||||
elif conf.get("output_format") == "json":
|
if parse_method.lower() == "tcadp parser":
|
||||||
self.set_output("json", [{"text": txt} for txt in spreadsheet_parser(blob) if txt])
|
table_result_type = conf.get("table_result_type", "1")
|
||||||
elif conf.get("output_format") == "markdown":
|
markdown_image_response_type = conf.get("markdown_image_response_type", "1")
|
||||||
self.set_output("markdown", spreadsheet_parser.markdown(blob))
|
tcadp_parser = TCADPParser(
|
||||||
|
table_result_type=table_result_type,
|
||||||
|
markdown_image_response_type=markdown_image_response_type
|
||||||
|
)
|
||||||
|
if not tcadp_parser.check_installation():
|
||||||
|
raise RuntimeError("TCADP parser not available. Please check Tencent Cloud API configuration.")
|
||||||
|
|
||||||
|
# Determine file type based on extension
|
||||||
|
if re.search(r"\.xlsx?$", name, re.IGNORECASE):
|
||||||
|
file_type = "XLSX"
|
||||||
|
else:
|
||||||
|
file_type = "CSV"
|
||||||
|
|
||||||
|
self.callback(0.2, f"Using TCADP parser for {file_type} file.")
|
||||||
|
sections, tables = tcadp_parser.parse_pdf(
|
||||||
|
filepath=name,
|
||||||
|
binary=blob,
|
||||||
|
callback=self.callback,
|
||||||
|
file_type=file_type,
|
||||||
|
file_start_page=1,
|
||||||
|
file_end_page=1000
|
||||||
|
)
|
||||||
|
|
||||||
|
# Process TCADP parser output based on configured output_format
|
||||||
|
output_format = conf.get("output_format", "html")
|
||||||
|
|
||||||
|
if output_format == "html":
|
||||||
|
# For HTML output, combine sections and tables into HTML
|
||||||
|
html_content = ""
|
||||||
|
for section, position_tag in sections:
|
||||||
|
if section:
|
||||||
|
html_content += section + "\n"
|
||||||
|
for table in tables:
|
||||||
|
if table:
|
||||||
|
html_content += table + "\n"
|
||||||
|
|
||||||
|
self.set_output("html", html_content)
|
||||||
|
|
||||||
|
elif output_format == "json":
|
||||||
|
# For JSON output, create a list of text items
|
||||||
|
result = []
|
||||||
|
# Add sections as text
|
||||||
|
for section, position_tag in sections:
|
||||||
|
if section:
|
||||||
|
result.append({"text": section})
|
||||||
|
# Add tables as text
|
||||||
|
for table in tables:
|
||||||
|
if table:
|
||||||
|
result.append({"text": table})
|
||||||
|
|
||||||
|
self.set_output("json", result)
|
||||||
|
|
||||||
|
elif output_format == "markdown":
|
||||||
|
# For markdown output, combine into markdown
|
||||||
|
md_content = ""
|
||||||
|
for section, position_tag in sections:
|
||||||
|
if section:
|
||||||
|
md_content += section + "\n\n"
|
||||||
|
for table in tables:
|
||||||
|
if table:
|
||||||
|
md_content += table + "\n\n"
|
||||||
|
|
||||||
|
self.set_output("markdown", md_content)
|
||||||
|
else:
|
||||||
|
# Default DeepDOC parser
|
||||||
|
spreadsheet_parser = ExcelParser()
|
||||||
|
if conf.get("output_format") == "html":
|
||||||
|
htmls = spreadsheet_parser.html(blob, 1000000000)
|
||||||
|
self.set_output("html", htmls[0])
|
||||||
|
elif conf.get("output_format") == "json":
|
||||||
|
self.set_output("json", [{"text": txt} for txt in spreadsheet_parser(blob) if txt])
|
||||||
|
elif conf.get("output_format") == "markdown":
|
||||||
|
self.set_output("markdown", spreadsheet_parser.markdown(blob))
|
||||||
|
|
||||||
def _word(self, name, blob):
|
def _word(self, name, blob):
|
||||||
self.callback(random.randint(1, 5) / 100.0, "Start to work on a Word Processor Document")
|
self.callback(random.randint(1, 5) / 100.0, "Start to work on a Word Processor Document")
|
||||||
|
|
@ -326,22 +407,69 @@ class Parser(ProcessBase):
|
||||||
self.set_output("markdown", markdown_text)
|
self.set_output("markdown", markdown_text)
|
||||||
|
|
||||||
def _slides(self, name, blob):
|
def _slides(self, name, blob):
|
||||||
from deepdoc.parser.ppt_parser import RAGFlowPptParser as ppt_parser
|
|
||||||
|
|
||||||
self.callback(random.randint(1, 5) / 100.0, "Start to work on a PowerPoint Document")
|
self.callback(random.randint(1, 5) / 100.0, "Start to work on a PowerPoint Document")
|
||||||
|
|
||||||
conf = self._param.setups["slides"]
|
conf = self._param.setups["slides"]
|
||||||
self.set_output("output_format", conf["output_format"])
|
self.set_output("output_format", conf["output_format"])
|
||||||
|
|
||||||
ppt_parser = ppt_parser()
|
parse_method = conf.get("parse_method", "deepdoc")
|
||||||
txts = ppt_parser(blob, 0, 100000, None)
|
|
||||||
|
|
||||||
sections = [{"text": section} for section in txts if section.strip()]
|
# Handle TCADP parser
|
||||||
|
if parse_method.lower() == "tcadp parser":
|
||||||
|
table_result_type = conf.get("table_result_type", "1")
|
||||||
|
markdown_image_response_type = conf.get("markdown_image_response_type", "1")
|
||||||
|
tcadp_parser = TCADPParser(
|
||||||
|
table_result_type=table_result_type,
|
||||||
|
markdown_image_response_type=markdown_image_response_type
|
||||||
|
)
|
||||||
|
if not tcadp_parser.check_installation():
|
||||||
|
raise RuntimeError("TCADP parser not available. Please check Tencent Cloud API configuration.")
|
||||||
|
|
||||||
# json
|
# Determine file type based on extension
|
||||||
assert conf.get("output_format") == "json", "have to be json for ppt"
|
if re.search(r"\.pptx?$", name, re.IGNORECASE):
|
||||||
if conf.get("output_format") == "json":
|
file_type = "PPTX"
|
||||||
self.set_output("json", sections)
|
else:
|
||||||
|
file_type = "PPT"
|
||||||
|
|
||||||
|
self.callback(0.2, f"Using TCADP parser for {file_type} file.")
|
||||||
|
|
||||||
|
sections, tables = tcadp_parser.parse_pdf(
|
||||||
|
filepath=name,
|
||||||
|
binary=blob,
|
||||||
|
callback=self.callback,
|
||||||
|
file_type=file_type,
|
||||||
|
file_start_page=1,
|
||||||
|
file_end_page=1000
|
||||||
|
)
|
||||||
|
|
||||||
|
# Process TCADP parser output - PPT only supports json format
|
||||||
|
output_format = conf.get("output_format", "json")
|
||||||
|
if output_format == "json":
|
||||||
|
# For JSON output, create a list of text items
|
||||||
|
result = []
|
||||||
|
# Add sections as text
|
||||||
|
for section, position_tag in sections:
|
||||||
|
if section:
|
||||||
|
result.append({"text": section})
|
||||||
|
# Add tables as text
|
||||||
|
for table in tables:
|
||||||
|
if table:
|
||||||
|
result.append({"text": table})
|
||||||
|
|
||||||
|
self.set_output("json", result)
|
||||||
|
else:
|
||||||
|
# Default DeepDOC parser (supports .pptx format)
|
||||||
|
from deepdoc.parser.ppt_parser import RAGFlowPptParser as ppt_parser
|
||||||
|
|
||||||
|
ppt_parser = ppt_parser()
|
||||||
|
txts = ppt_parser(blob, 0, 100000, None)
|
||||||
|
|
||||||
|
sections = [{"text": section} for section in txts if section.strip()]
|
||||||
|
|
||||||
|
# json
|
||||||
|
assert conf.get("output_format") == "json", "have to be json for ppt"
|
||||||
|
if conf.get("output_format") == "json":
|
||||||
|
self.set_output("json", sections)
|
||||||
|
|
||||||
def _markdown(self, name, blob):
|
def _markdown(self, name, blob):
|
||||||
from functools import reduce
|
from functools import reduce
|
||||||
|
|
@ -579,6 +707,7 @@ class Parser(ProcessBase):
|
||||||
"video": self._video,
|
"video": self._video,
|
||||||
"email": self._email,
|
"email": self._email,
|
||||||
}
|
}
|
||||||
|
|
||||||
try:
|
try:
|
||||||
from_upstream = ParserFromUpstream.model_validate(kwargs)
|
from_upstream = ParserFromUpstream.model_validate(kwargs)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
|
|
||||||
|
|
@ -234,7 +234,11 @@ class CoHereRerank(Base):
|
||||||
def __init__(self, key, model_name, base_url=None):
|
def __init__(self, key, model_name, base_url=None):
|
||||||
from cohere import Client
|
from cohere import Client
|
||||||
|
|
||||||
self.client = Client(api_key=key, base_url=base_url)
|
# Only pass base_url if it's a non-empty string, otherwise use default Cohere API endpoint
|
||||||
|
client_kwargs = {"api_key": key}
|
||||||
|
if base_url and base_url.strip():
|
||||||
|
client_kwargs["base_url"] = base_url
|
||||||
|
self.client = Client(**client_kwargs)
|
||||||
self.model_name = model_name.split("___")[0]
|
self.model_name = model_name.split("___")[0]
|
||||||
|
|
||||||
def similarity(self, query: str, texts: list):
|
def similarity(self, query: str, texts: list):
|
||||||
|
|
|
||||||
|
|
@ -83,6 +83,7 @@ class FulltextQueryer:
|
||||||
return txt
|
return txt
|
||||||
|
|
||||||
def question(self, txt, tbl="qa", min_match: float = 0.6):
|
def question(self, txt, tbl="qa", min_match: float = 0.6):
|
||||||
|
original_query = txt
|
||||||
txt = FulltextQueryer.add_space_between_eng_zh(txt)
|
txt = FulltextQueryer.add_space_between_eng_zh(txt)
|
||||||
txt = re.sub(
|
txt = re.sub(
|
||||||
r"[ :|\r\n\t,,。??/`!!&^%%()\[\]{}<>]+",
|
r"[ :|\r\n\t,,。??/`!!&^%%()\[\]{}<>]+",
|
||||||
|
|
@ -127,7 +128,7 @@ class FulltextQueryer:
|
||||||
q.append(txt)
|
q.append(txt)
|
||||||
query = " ".join(q)
|
query = " ".join(q)
|
||||||
return MatchTextExpr(
|
return MatchTextExpr(
|
||||||
self.query_fields, query, 100
|
self.query_fields, query, 100, {"original_query": original_query}
|
||||||
), keywords
|
), keywords
|
||||||
|
|
||||||
def need_fine_grained_tokenize(tk):
|
def need_fine_grained_tokenize(tk):
|
||||||
|
|
@ -212,7 +213,7 @@ class FulltextQueryer:
|
||||||
if not query:
|
if not query:
|
||||||
query = otxt
|
query = otxt
|
||||||
return MatchTextExpr(
|
return MatchTextExpr(
|
||||||
self.query_fields, query, 100, {"minimum_should_match": min_match}
|
self.query_fields, query, 100, {"minimum_should_match": min_match, "original_query": original_query}
|
||||||
), keywords
|
), keywords
|
||||||
return None, keywords
|
return None, keywords
|
||||||
|
|
||||||
|
|
@ -259,6 +260,7 @@ class FulltextQueryer:
|
||||||
content_tks = [c.strip() for c in content_tks.strip() if c.strip()]
|
content_tks = [c.strip() for c in content_tks.strip() if c.strip()]
|
||||||
tks_w = self.tw.weights(content_tks, preprocess=False)
|
tks_w = self.tw.weights(content_tks, preprocess=False)
|
||||||
|
|
||||||
|
origin_keywords = keywords.copy()
|
||||||
keywords = [f'"{k.strip()}"' for k in keywords]
|
keywords = [f'"{k.strip()}"' for k in keywords]
|
||||||
for tk, w in sorted(tks_w, key=lambda x: x[1] * -1)[:keywords_topn]:
|
for tk, w in sorted(tks_w, key=lambda x: x[1] * -1)[:keywords_topn]:
|
||||||
tk_syns = self.syn.lookup(tk)
|
tk_syns = self.syn.lookup(tk)
|
||||||
|
|
@ -274,4 +276,4 @@ class FulltextQueryer:
|
||||||
keywords.append(f"{tk}^{w}")
|
keywords.append(f"{tk}^{w}")
|
||||||
|
|
||||||
return MatchTextExpr(self.query_fields, " ".join(keywords), 100,
|
return MatchTextExpr(self.query_fields, " ".join(keywords), 100,
|
||||||
{"minimum_should_match": min(3, len(keywords) // 10)})
|
{"minimum_should_match": min(3, len(keywords) / 10), "original_query": " ".join(origin_keywords)})
|
||||||
|
|
|
||||||
1562
rag/utils/ob_conn.py
Normal file
1562
rag/utils/ob_conn.py
Normal file
File diff suppressed because it is too large
Load diff
|
|
@ -140,6 +140,16 @@
|
||||||
<path d="M0 0h1024v1024H0z" opacity=".01"></path>
|
<path d="M0 0h1024v1024H0z" opacity=".01"></path>
|
||||||
<path d="M867.072 141.184H156.032a32 32 0 0 0 0 64h711.04a32 32 0 0 0 0-64z m0.832 226.368H403.2a32 32 0 0 0 0 64h464.704a32 32 0 0 0 0-64zM403.2 573.888h464.704a32 32 0 0 1 0 64H403.2a32 32 0 0 1 0-64z m464.704 226.368H156.864a32 32 0 0 0 0 64h711.04a32 32 0 0 0 0-64zM137.472 367.552v270.336l174.528-122.24-174.528-148.096z" ></path>
|
<path d="M867.072 141.184H156.032a32 32 0 0 0 0 64h711.04a32 32 0 0 0 0-64z m0.832 226.368H403.2a32 32 0 0 0 0 64h464.704a32 32 0 0 0 0-64zM403.2 573.888h464.704a32 32 0 0 1 0 64H403.2a32 32 0 0 1 0-64z m464.704 226.368H156.864a32 32 0 0 0 0 64h711.04a32 32 0 0 0 0-64zM137.472 367.552v270.336l174.528-122.24-174.528-148.096z" ></path>
|
||||||
</symbol>` +
|
</symbol>` +
|
||||||
|
` <symbol id="icon-a-listoperations" viewBox="0 0 1024 1024">
|
||||||
|
<path d="M341.376 96a32 32 0 0 1 0 64h-128a10.688 10.688 0 0 0-10.688 10.688v682.624a10.752 10.752 0 0 0 10.688 10.688h128a32 32 0 0 1 0 64h-128a74.688 74.688 0 0 1-74.688-74.688V170.688A74.688 74.688 0 0 1 213.376 96h128z m469.312 0a74.688 74.688 0 0 1 74.688 74.688v682.624a74.752 74.752 0 0 1-74.688 74.688h-128a32 32 0 1 1 0-64h128a10.752 10.752 0 0 0 10.688-10.688V170.688a10.752 10.752 0 0 0-10.688-10.688h-128a32 32 0 1 1 0-64h128zM357.248 464.256a48 48 0 0 1 0 95.488l-4.928 0.256H352a48 48 0 0 1 0-96h0.32l4.928 0.256z m155.072-0.256a48 48 0 1 1 0 96H512a48 48 0 0 1 0-96h0.32z m160 0a48 48 0 0 1 0 96H672a48 48 0 0 1 0-96h0.32z" ></path>
|
||||||
|
</symbol>` +
|
||||||
|
`<symbol id="icon-aggregator" viewBox="0 0 1024 1024">
|
||||||
|
<path d="M949.312 533.312a32 32 0 0 1-9.344 22.592l-170.688 170.688a32 32 0 0 1-45.248-45.248l116.032-116.032H478.208l-10.176-0.128a202.688 202.688 0 0 1-135.36-59.264L41.344 214.592a32 32 0 1 1 45.312-45.248l291.264 291.328 10.24 9.344a138.688 138.688 0 0 0 89.344 31.296h362.56L724.032 385.28a32 32 0 0 1 45.248-45.248l170.688 170.624a32 32 0 0 1 9.344 22.656zM299.968 638.656a32 32 0 0 1 0 45.248L86.656 897.28a32 32 0 0 1-45.312-45.248L254.72 638.72a32 32 0 0 1 45.312 0z" ></path>
|
||||||
|
</symbol>` +
|
||||||
|
`<symbol id="icon-a-ariableassigner" viewBox="0 0 1024 1024">
|
||||||
|
<path d="M509.056 64c123.136 0 235.072 48.512 317.12 130.56l-41.024 37.312C714.24 161.024 617.216 119.936 509.056 119.936a391.808 391.808 0 1 0 0 783.552 392.448 392.448 0 0 0 294.784-134.272l41.024 37.312c-82.048 93.248-201.472 149.248-335.808 149.248-246.272 3.712-447.744-197.76-447.744-444.032S262.784 64 509.056 64z m-63.424 186.56a29.184 29.184 0 0 1 14.912 14.912l160.448 444.032c3.712 14.912-3.712 26.112-18.56 33.536-14.976 3.776-26.24-3.648-33.664-14.848l-48.512-149.248H341.12l-59.712 149.248a27.648 27.648 0 0 1-33.6 14.848c-14.912-3.712-18.56-18.624-14.848-33.536l179.008-444.032c3.776-11.136 22.4-18.624 33.6-14.912zM889.6 530.432c14.976 0 26.176 11.2 26.176 26.112a25.472 25.472 0 0 1-26.176 26.112h-212.608a25.472 25.472 0 0 1-26.112-26.112c0-14.912 11.2-26.112 26.112-26.112H889.6z m-529.792 0h141.824L434.432 351.36l-74.624 179.2zM889.6 411.008c14.912 0 26.176 11.2 26.176 26.112a25.536 25.536 0 0 1-26.176 26.112h-212.608a25.536 25.536 0 0 1-26.112-26.112c0-14.912 11.2-26.112 26.112-26.112H889.6z" ></path>
|
||||||
|
</symbol>
|
||||||
|
` +
|
||||||
'</svg>'),
|
'</svg>'),
|
||||||
((h) => {
|
((h) => {
|
||||||
var a = (l = (l = document.getElementsByTagName('script'))[
|
var a = (l = (l = document.getElementsByTagName('script'))[
|
||||||
|
|
|
||||||
|
|
@ -48,7 +48,7 @@ const JsonSchemaVisualizer: FC<JsonSchemaVisualizerProps> = ({
|
||||||
|
|
||||||
try {
|
try {
|
||||||
const parsedJson = JSON.parse(value);
|
const parsedJson = JSON.parse(value);
|
||||||
if (onChange) {
|
if (onChange && typeof parsedJson !== 'number') {
|
||||||
onChange(parsedJson);
|
onChange(parsedJson);
|
||||||
}
|
}
|
||||||
} catch (_error) {
|
} catch (_error) {
|
||||||
|
|
|
||||||
|
|
@ -25,6 +25,7 @@ export default {
|
||||||
portugueseBr: 'Portuguese (Brazil)',
|
portugueseBr: 'Portuguese (Brazil)',
|
||||||
chinese: 'Simplified Chinese',
|
chinese: 'Simplified Chinese',
|
||||||
traditionalChinese: 'Traditional Chinese',
|
traditionalChinese: 'Traditional Chinese',
|
||||||
|
russian: 'Russian',
|
||||||
language: 'Language',
|
language: 'Language',
|
||||||
languageMessage: 'Please input your language!',
|
languageMessage: 'Please input your language!',
|
||||||
languagePlaceholder: 'select your language',
|
languagePlaceholder: 'select your language',
|
||||||
|
|
@ -1645,6 +1646,7 @@ The variable aggregation node (originally the variable assignment node) is a cru
|
||||||
beginInputTip:
|
beginInputTip:
|
||||||
'By defining input parameters, this content can be accessed by other components in subsequent processes.',
|
'By defining input parameters, this content can be accessed by other components in subsequent processes.',
|
||||||
query: 'Query variables',
|
query: 'Query variables',
|
||||||
|
queryRequired: 'Query is required',
|
||||||
queryTip: 'Select the variable you want to use',
|
queryTip: 'Select the variable you want to use',
|
||||||
agent: 'Agent',
|
agent: 'Agent',
|
||||||
addAgent: 'Add Agent',
|
addAgent: 'Add Agent',
|
||||||
|
|
@ -1751,6 +1753,8 @@ The variable aggregation node (originally the variable assignment node) is a cru
|
||||||
The Indexer will store the content in the corresponding data structures for the selected methods.`,
|
The Indexer will store the content in the corresponding data structures for the selected methods.`,
|
||||||
// file: 'File',
|
// file: 'File',
|
||||||
parserMethod: 'PDF parser',
|
parserMethod: 'PDF parser',
|
||||||
|
tableResultType: 'Table Result Type',
|
||||||
|
markdownImageResponseType: 'Markdown Image Response Type',
|
||||||
// systemPrompt: 'System Prompt',
|
// systemPrompt: 'System Prompt',
|
||||||
systemPromptPlaceholder:
|
systemPromptPlaceholder:
|
||||||
'Enter system prompt for image analysis, if empty the system default value will be used',
|
'Enter system prompt for image analysis, if empty the system default value will be used',
|
||||||
|
|
@ -1854,7 +1858,7 @@ Important structured information may include: names, dates, locations, events, k
|
||||||
desc: 'Descending',
|
desc: 'Descending',
|
||||||
},
|
},
|
||||||
variableAssignerLogicalOperatorOptions: {
|
variableAssignerLogicalOperatorOptions: {
|
||||||
overwrite: 'Overwrite',
|
overwrite: 'Overwritten By',
|
||||||
clear: 'Clear',
|
clear: 'Clear',
|
||||||
set: 'Set',
|
set: 'Set',
|
||||||
'+=': 'Add',
|
'+=': 'Add',
|
||||||
|
|
@ -1933,6 +1937,7 @@ Important structured information may include: names, dates, locations, events, k
|
||||||
japanese: 'Japanese',
|
japanese: 'Japanese',
|
||||||
korean: 'Korean',
|
korean: 'Korean',
|
||||||
vietnamese: 'Vietnamese',
|
vietnamese: 'Vietnamese',
|
||||||
|
russian: 'Russian',
|
||||||
},
|
},
|
||||||
pagination: {
|
pagination: {
|
||||||
total: 'Total {{total}}',
|
total: 'Total {{total}}',
|
||||||
|
|
|
||||||
File diff suppressed because it is too large
Load diff
|
|
@ -1549,6 +1549,7 @@ General:实体和关系提取提示来自 GitHub - microsoft/graphrag:基于
|
||||||
task: '任务',
|
task: '任务',
|
||||||
beginInputTip: '通过定义输入参数,此内容可以被后续流程中的其他组件访问。',
|
beginInputTip: '通过定义输入参数,此内容可以被后续流程中的其他组件访问。',
|
||||||
query: '查询变量',
|
query: '查询变量',
|
||||||
|
queryRequired: '查询变量是必填项',
|
||||||
queryTip: '选择您想要使用的变量',
|
queryTip: '选择您想要使用的变量',
|
||||||
agent: '智能体',
|
agent: '智能体',
|
||||||
addAgent: '添加智能体',
|
addAgent: '添加智能体',
|
||||||
|
|
@ -1628,6 +1629,8 @@ General:实体和关系提取提示来自 GitHub - microsoft/graphrag:基于
|
||||||
Tokenizer 会根据所选方式将内容存储为对应的数据结构。`,
|
Tokenizer 会根据所选方式将内容存储为对应的数据结构。`,
|
||||||
filenameEmbdWeight: '文件名嵌入权重',
|
filenameEmbdWeight: '文件名嵌入权重',
|
||||||
parserMethod: '解析方法',
|
parserMethod: '解析方法',
|
||||||
|
tableResultType: '表格返回形式',
|
||||||
|
markdownImageResponseType: '图片返回形式',
|
||||||
systemPromptPlaceholder:
|
systemPromptPlaceholder:
|
||||||
'请输入用于图像分析的系统提示词,若为空则使用系统缺省值',
|
'请输入用于图像分析的系统提示词,若为空则使用系统缺省值',
|
||||||
exportJson: '导出 JSON',
|
exportJson: '导出 JSON',
|
||||||
|
|
|
||||||
|
|
@ -11,11 +11,12 @@ export function DataOperationsNode({
|
||||||
}: NodeProps<BaseNode<DataOperationsFormSchemaType>>) {
|
}: NodeProps<BaseNode<DataOperationsFormSchemaType>>) {
|
||||||
const { data } = props;
|
const { data } = props;
|
||||||
const { t } = useTranslation();
|
const { t } = useTranslation();
|
||||||
|
const operations = data.form?.operations;
|
||||||
|
|
||||||
return (
|
return (
|
||||||
<RagNode {...props}>
|
<RagNode {...props}>
|
||||||
<LabelCard>
|
<LabelCard>
|
||||||
{t(`flow.operationsOptions.${camelCase(data.form?.operations)}`)}
|
{operations && t(`flow.operationsOptions.${camelCase(operations)}`)}
|
||||||
</LabelCard>
|
</LabelCard>
|
||||||
</RagNode>
|
</RagNode>
|
||||||
);
|
);
|
||||||
|
|
|
||||||
|
|
@ -169,6 +169,7 @@ export const initialParserValues = {
|
||||||
{
|
{
|
||||||
fileFormat: FileType.Spreadsheet,
|
fileFormat: FileType.Spreadsheet,
|
||||||
output_format: SpreadsheetOutputFormat.Html,
|
output_format: SpreadsheetOutputFormat.Html,
|
||||||
|
parse_method: ParseDocumentType.DeepDOC,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
fileFormat: FileType.Image,
|
fileFormat: FileType.Image,
|
||||||
|
|
@ -192,6 +193,7 @@ export const initialParserValues = {
|
||||||
{
|
{
|
||||||
fileFormat: FileType.PowerPoint,
|
fileFormat: FileType.PowerPoint,
|
||||||
output_format: PptOutputFormat.Json,
|
output_format: PptOutputFormat.Json,
|
||||||
|
parse_method: ParseDocumentType.DeepDOC,
|
||||||
},
|
},
|
||||||
],
|
],
|
||||||
};
|
};
|
||||||
|
|
@ -243,7 +245,7 @@ export const FileTypeSuffixMap = {
|
||||||
[FileType.Email]: ['eml', 'msg'],
|
[FileType.Email]: ['eml', 'msg'],
|
||||||
[FileType.TextMarkdown]: ['md', 'markdown', 'mdx', 'txt'],
|
[FileType.TextMarkdown]: ['md', 'markdown', 'mdx', 'txt'],
|
||||||
[FileType.Docx]: ['doc', 'docx'],
|
[FileType.Docx]: ['doc', 'docx'],
|
||||||
[FileType.PowerPoint]: ['pptx'],
|
[FileType.PowerPoint]: ['pptx', 'ppt'],
|
||||||
[FileType.Video]: ['mp4', 'avi', 'mkv'],
|
[FileType.Video]: ['mp4', 'avi', 'mkv'],
|
||||||
[FileType.Audio]: [
|
[FileType.Audio]: [
|
||||||
'da',
|
'da',
|
||||||
|
|
|
||||||
|
|
@ -22,12 +22,13 @@ import { Switch } from '@/components/ui/switch';
|
||||||
import { LlmModelType } from '@/constants/knowledge';
|
import { LlmModelType } from '@/constants/knowledge';
|
||||||
import { useFindLlmByUuid } from '@/hooks/use-llm-request';
|
import { useFindLlmByUuid } from '@/hooks/use-llm-request';
|
||||||
import { zodResolver } from '@hookform/resolvers/zod';
|
import { zodResolver } from '@hookform/resolvers/zod';
|
||||||
import { memo, useEffect, useMemo } from 'react';
|
import { memo, useCallback, useEffect, useMemo } from 'react';
|
||||||
import { useForm, useWatch } from 'react-hook-form';
|
import { useForm, useWatch } from 'react-hook-form';
|
||||||
import { useTranslation } from 'react-i18next';
|
import { useTranslation } from 'react-i18next';
|
||||||
import { z } from 'zod';
|
import { z } from 'zod';
|
||||||
import {
|
import {
|
||||||
AgentExceptionMethod,
|
AgentExceptionMethod,
|
||||||
|
AgentStructuredOutputField,
|
||||||
NodeHandleId,
|
NodeHandleId,
|
||||||
VariableType,
|
VariableType,
|
||||||
} from '../../constant';
|
} from '../../constant';
|
||||||
|
|
@ -127,6 +128,17 @@ function AgentForm({ node }: INextOperatorForm) {
|
||||||
handleStructuredOutputDialogOk,
|
handleStructuredOutputDialogOk,
|
||||||
} = useShowStructuredOutputDialog(node?.id);
|
} = useShowStructuredOutputDialog(node?.id);
|
||||||
|
|
||||||
|
const updateNodeForm = useGraphStore((state) => state.updateNodeForm);
|
||||||
|
|
||||||
|
const handleShowStructuredOutput = useCallback(
|
||||||
|
(val: boolean) => {
|
||||||
|
if (node?.id && val) {
|
||||||
|
updateNodeForm(node?.id, {}, ['outputs', AgentStructuredOutputField]);
|
||||||
|
}
|
||||||
|
},
|
||||||
|
[node?.id, updateNodeForm],
|
||||||
|
);
|
||||||
|
|
||||||
useEffect(() => {
|
useEffect(() => {
|
||||||
if (exceptionMethod !== AgentExceptionMethod.Goto) {
|
if (exceptionMethod !== AgentExceptionMethod.Goto) {
|
||||||
if (node?.id) {
|
if (node?.id) {
|
||||||
|
|
@ -293,7 +305,10 @@ function AgentForm({ node }: INextOperatorForm) {
|
||||||
<Switch
|
<Switch
|
||||||
id="airplane-mode"
|
id="airplane-mode"
|
||||||
checked={field.value}
|
checked={field.value}
|
||||||
onCheckedChange={field.onChange}
|
onCheckedChange={(val) => {
|
||||||
|
handleShowStructuredOutput(val);
|
||||||
|
field.onChange(val);
|
||||||
|
}}
|
||||||
/>
|
/>
|
||||||
</div>
|
</div>
|
||||||
)}
|
)}
|
||||||
|
|
|
||||||
|
|
@ -17,20 +17,13 @@ export function useWatchFormChange(id?: string, form?: UseFormReturn<any>) {
|
||||||
prompts: [{ role: PromptRole.User, content: values.prompts }],
|
prompts: [{ role: PromptRole.User, content: values.prompts }],
|
||||||
};
|
};
|
||||||
|
|
||||||
if (values.showStructuredOutput) {
|
if (!values.showStructuredOutput) {
|
||||||
nextValues = {
|
|
||||||
...nextValues,
|
|
||||||
outputs: {
|
|
||||||
...values.outputs,
|
|
||||||
[AgentStructuredOutputField]:
|
|
||||||
values[AgentStructuredOutputField] ?? {},
|
|
||||||
},
|
|
||||||
};
|
|
||||||
} else {
|
|
||||||
nextValues = {
|
nextValues = {
|
||||||
...nextValues,
|
...nextValues,
|
||||||
outputs: omit(values.outputs, [AgentStructuredOutputField]),
|
outputs: omit(values.outputs, [AgentStructuredOutputField]),
|
||||||
};
|
};
|
||||||
|
} else {
|
||||||
|
nextValues = omit(nextValues, 'outputs');
|
||||||
}
|
}
|
||||||
updateNodeForm(id, nextValues);
|
updateNodeForm(id, nextValues);
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -7,17 +7,24 @@ export type FormListHeaderProps = {
|
||||||
label: ReactNode;
|
label: ReactNode;
|
||||||
tooltip?: string;
|
tooltip?: string;
|
||||||
onClick?: () => void;
|
onClick?: () => void;
|
||||||
|
disabled?: boolean;
|
||||||
};
|
};
|
||||||
|
|
||||||
export function DynamicFormHeader({
|
export function DynamicFormHeader({
|
||||||
label,
|
label,
|
||||||
tooltip,
|
tooltip,
|
||||||
onClick,
|
onClick,
|
||||||
|
disabled = false,
|
||||||
}: FormListHeaderProps) {
|
}: FormListHeaderProps) {
|
||||||
return (
|
return (
|
||||||
<div className="flex items-center justify-between">
|
<div className="flex items-center justify-between">
|
||||||
<FormLabel tooltip={tooltip}>{label}</FormLabel>
|
<FormLabel tooltip={tooltip}>{label}</FormLabel>
|
||||||
<Button variant={'ghost'} type="button" onClick={onClick}>
|
<Button
|
||||||
|
variant={'ghost'}
|
||||||
|
type="button"
|
||||||
|
onClick={onClick}
|
||||||
|
disabled={disabled}
|
||||||
|
>
|
||||||
<Plus />
|
<Plus />
|
||||||
</Button>
|
</Button>
|
||||||
</div>
|
</div>
|
||||||
|
|
|
||||||
|
|
@ -2,6 +2,10 @@ import { Button } from '@/components/ui/button';
|
||||||
import { X } from 'lucide-react';
|
import { X } from 'lucide-react';
|
||||||
import { useFieldArray, useFormContext } from 'react-hook-form';
|
import { useFieldArray, useFormContext } from 'react-hook-form';
|
||||||
import { JsonSchemaDataType } from '../../constant';
|
import { JsonSchemaDataType } from '../../constant';
|
||||||
|
import {
|
||||||
|
flatOptions,
|
||||||
|
useFilterQueryVariableOptionsByTypes,
|
||||||
|
} from '../../hooks/use-get-begin-query';
|
||||||
import { DynamicFormHeader, FormListHeaderProps } from './dynamic-fom-header';
|
import { DynamicFormHeader, FormListHeaderProps } from './dynamic-fom-header';
|
||||||
import { QueryVariable } from './query-variable';
|
import { QueryVariable } from './query-variable';
|
||||||
|
|
||||||
|
|
@ -16,6 +20,10 @@ export function QueryVariableList({
|
||||||
const form = useFormContext();
|
const form = useFormContext();
|
||||||
const name = 'query';
|
const name = 'query';
|
||||||
|
|
||||||
|
let options = useFilterQueryVariableOptionsByTypes(types);
|
||||||
|
|
||||||
|
const secondOptions = flatOptions(options);
|
||||||
|
|
||||||
const { fields, remove, append } = useFieldArray({
|
const { fields, remove, append } = useFieldArray({
|
||||||
name: name,
|
name: name,
|
||||||
control: form.control,
|
control: form.control,
|
||||||
|
|
@ -26,14 +34,15 @@ export function QueryVariableList({
|
||||||
<DynamicFormHeader
|
<DynamicFormHeader
|
||||||
label={label}
|
label={label}
|
||||||
tooltip={tooltip}
|
tooltip={tooltip}
|
||||||
onClick={() => append({ input: '' })}
|
onClick={() => append({ input: secondOptions.at(0)?.value })}
|
||||||
|
disabled={!secondOptions.length}
|
||||||
></DynamicFormHeader>
|
></DynamicFormHeader>
|
||||||
<div className="space-y-5">
|
<div className="space-y-5">
|
||||||
{fields.map((field, index) => {
|
{fields.map((field, index) => {
|
||||||
const nameField = `${name}.${index}.input`;
|
const nameField = `${name}.${index}.input`;
|
||||||
|
|
||||||
return (
|
return (
|
||||||
<div key={field.id} className="flex items-center gap-2">
|
<div key={field.id} className="flex gap-2">
|
||||||
<QueryVariable
|
<QueryVariable
|
||||||
name={nameField}
|
name={nameField}
|
||||||
hideLabel
|
hideLabel
|
||||||
|
|
|
||||||
|
|
@ -4,6 +4,7 @@ import { Form } from '@/components/ui/form';
|
||||||
import { Separator } from '@/components/ui/separator';
|
import { Separator } from '@/components/ui/separator';
|
||||||
import { buildOptions } from '@/utils/form';
|
import { buildOptions } from '@/utils/form';
|
||||||
import { zodResolver } from '@hookform/resolvers/zod';
|
import { zodResolver } from '@hookform/resolvers/zod';
|
||||||
|
import { t } from 'i18next';
|
||||||
import { memo } from 'react';
|
import { memo } from 'react';
|
||||||
import { useForm, useWatch } from 'react-hook-form';
|
import { useForm, useWatch } from 'react-hook-form';
|
||||||
import { useTranslation } from 'react-i18next';
|
import { useTranslation } from 'react-i18next';
|
||||||
|
|
@ -25,7 +26,11 @@ import { SelectKeys } from './select-keys';
|
||||||
import { Updates } from './updates';
|
import { Updates } from './updates';
|
||||||
|
|
||||||
export const RetrievalPartialSchema = {
|
export const RetrievalPartialSchema = {
|
||||||
query: z.array(z.object({ input: z.string().optional() })),
|
query: z.array(
|
||||||
|
z.object({
|
||||||
|
input: z.string().min(1, { message: t('flow.queryRequired') }),
|
||||||
|
}),
|
||||||
|
),
|
||||||
operations: z.string(),
|
operations: z.string(),
|
||||||
select_keys: z.array(z.object({ name: z.string().optional() })).optional(),
|
select_keys: z.array(z.object({ name: z.string().optional() })).optional(),
|
||||||
remove_keys: z.array(z.object({ name: z.string().optional() })).optional(),
|
remove_keys: z.array(z.object({ name: z.string().optional() })).optional(),
|
||||||
|
|
|
||||||
|
|
@ -34,6 +34,8 @@ import { OutputFormatFormField } from './common-form-fields';
|
||||||
import { EmailFormFields } from './email-form-fields';
|
import { EmailFormFields } from './email-form-fields';
|
||||||
import { ImageFormFields } from './image-form-fields';
|
import { ImageFormFields } from './image-form-fields';
|
||||||
import { PdfFormFields } from './pdf-form-fields';
|
import { PdfFormFields } from './pdf-form-fields';
|
||||||
|
import { PptFormFields } from './ppt-form-fields';
|
||||||
|
import { SpreadsheetFormFields } from './spreadsheet-form-fields';
|
||||||
import { buildFieldNameWithPrefix } from './utils';
|
import { buildFieldNameWithPrefix } from './utils';
|
||||||
import { AudioFormFields, VideoFormFields } from './video-form-fields';
|
import { AudioFormFields, VideoFormFields } from './video-form-fields';
|
||||||
|
|
||||||
|
|
@ -41,6 +43,8 @@ const outputList = buildOutputList(initialParserValues.outputs);
|
||||||
|
|
||||||
const FileFormatWidgetMap = {
|
const FileFormatWidgetMap = {
|
||||||
[FileType.PDF]: PdfFormFields,
|
[FileType.PDF]: PdfFormFields,
|
||||||
|
[FileType.Spreadsheet]: SpreadsheetFormFields,
|
||||||
|
[FileType.PowerPoint]: PptFormFields,
|
||||||
[FileType.Video]: VideoFormFields,
|
[FileType.Video]: VideoFormFields,
|
||||||
[FileType.Audio]: AudioFormFields,
|
[FileType.Audio]: AudioFormFields,
|
||||||
[FileType.Email]: EmailFormFields,
|
[FileType.Email]: EmailFormFields,
|
||||||
|
|
@ -65,6 +69,8 @@ export const FormSchema = z.object({
|
||||||
fields: z.array(z.string()).optional(),
|
fields: z.array(z.string()).optional(),
|
||||||
llm_id: z.string().optional(),
|
llm_id: z.string().optional(),
|
||||||
system_prompt: z.string().optional(),
|
system_prompt: z.string().optional(),
|
||||||
|
table_result_type: z.string().optional(),
|
||||||
|
markdown_image_response_type: z.string().optional(),
|
||||||
}),
|
}),
|
||||||
),
|
),
|
||||||
});
|
});
|
||||||
|
|
@ -184,6 +190,8 @@ const ParserForm = ({ node }: INextOperatorForm) => {
|
||||||
lang: '',
|
lang: '',
|
||||||
fields: [],
|
fields: [],
|
||||||
llm_id: '',
|
llm_id: '',
|
||||||
|
table_result_type: '',
|
||||||
|
markdown_image_response_type: '',
|
||||||
});
|
});
|
||||||
}, [append]);
|
}, [append]);
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,13 +1,30 @@
|
||||||
import { ParseDocumentType } from '@/components/layout-recognize-form-field';
|
import { ParseDocumentType } from '@/components/layout-recognize-form-field';
|
||||||
|
import {
|
||||||
|
SelectWithSearch,
|
||||||
|
SelectWithSearchFlagOptionType,
|
||||||
|
} from '@/components/originui/select-with-search';
|
||||||
|
import { RAGFlowFormItem } from '@/components/ragflow-form';
|
||||||
import { isEmpty } from 'lodash';
|
import { isEmpty } from 'lodash';
|
||||||
import { useEffect, useMemo } from 'react';
|
import { useEffect, useMemo } from 'react';
|
||||||
import { useFormContext, useWatch } from 'react-hook-form';
|
import { useFormContext, useWatch } from 'react-hook-form';
|
||||||
|
import { useTranslation } from 'react-i18next';
|
||||||
import { LanguageFormField, ParserMethodFormField } from './common-form-fields';
|
import { LanguageFormField, ParserMethodFormField } from './common-form-fields';
|
||||||
import { CommonProps } from './interface';
|
import { CommonProps } from './interface';
|
||||||
import { useSetInitialLanguage } from './use-set-initial-language';
|
import { useSetInitialLanguage } from './use-set-initial-language';
|
||||||
import { buildFieldNameWithPrefix } from './utils';
|
import { buildFieldNameWithPrefix } from './utils';
|
||||||
|
|
||||||
|
const tableResultTypeOptions: SelectWithSearchFlagOptionType[] = [
|
||||||
|
{ label: 'Markdown', value: '0' },
|
||||||
|
{ label: 'HTML', value: '1' },
|
||||||
|
];
|
||||||
|
|
||||||
|
const markdownImageResponseTypeOptions: SelectWithSearchFlagOptionType[] = [
|
||||||
|
{ label: 'URL', value: '0' },
|
||||||
|
{ label: 'Text', value: '1' },
|
||||||
|
];
|
||||||
|
|
||||||
export function PdfFormFields({ prefix }: CommonProps) {
|
export function PdfFormFields({ prefix }: CommonProps) {
|
||||||
|
const { t } = useTranslation();
|
||||||
const form = useFormContext();
|
const form = useFormContext();
|
||||||
|
|
||||||
const parseMethodName = buildFieldNameWithPrefix('parse_method', prefix);
|
const parseMethodName = buildFieldNameWithPrefix('parse_method', prefix);
|
||||||
|
|
@ -25,6 +42,12 @@ export function PdfFormFields({ prefix }: CommonProps) {
|
||||||
);
|
);
|
||||||
}, [parseMethod]);
|
}, [parseMethod]);
|
||||||
|
|
||||||
|
const tcadpOptionsShown = useMemo(() => {
|
||||||
|
return (
|
||||||
|
!isEmpty(parseMethod) && parseMethod === ParseDocumentType.TCADPParser
|
||||||
|
);
|
||||||
|
}, [parseMethod]);
|
||||||
|
|
||||||
useSetInitialLanguage({ prefix, languageShown });
|
useSetInitialLanguage({ prefix, languageShown });
|
||||||
|
|
||||||
useEffect(() => {
|
useEffect(() => {
|
||||||
|
|
@ -36,10 +59,68 @@ export function PdfFormFields({ prefix }: CommonProps) {
|
||||||
}
|
}
|
||||||
}, [form, parseMethodName]);
|
}, [form, parseMethodName]);
|
||||||
|
|
||||||
|
// Set default values for TCADP options when TCADP is selected
|
||||||
|
useEffect(() => {
|
||||||
|
if (tcadpOptionsShown) {
|
||||||
|
const tableResultTypeName = buildFieldNameWithPrefix(
|
||||||
|
'table_result_type',
|
||||||
|
prefix,
|
||||||
|
);
|
||||||
|
const markdownImageResponseTypeName = buildFieldNameWithPrefix(
|
||||||
|
'markdown_image_response_type',
|
||||||
|
prefix,
|
||||||
|
);
|
||||||
|
|
||||||
|
if (isEmpty(form.getValues(tableResultTypeName))) {
|
||||||
|
form.setValue(tableResultTypeName, '1', {
|
||||||
|
shouldValidate: true,
|
||||||
|
shouldDirty: true,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
if (isEmpty(form.getValues(markdownImageResponseTypeName))) {
|
||||||
|
form.setValue(markdownImageResponseTypeName, '1', {
|
||||||
|
shouldValidate: true,
|
||||||
|
shouldDirty: true,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}, [tcadpOptionsShown, form, prefix]);
|
||||||
|
|
||||||
return (
|
return (
|
||||||
<>
|
<>
|
||||||
<ParserMethodFormField prefix={prefix}></ParserMethodFormField>
|
<ParserMethodFormField prefix={prefix}></ParserMethodFormField>
|
||||||
{languageShown && <LanguageFormField prefix={prefix}></LanguageFormField>}
|
{languageShown && <LanguageFormField prefix={prefix}></LanguageFormField>}
|
||||||
|
{tcadpOptionsShown && (
|
||||||
|
<>
|
||||||
|
<RAGFlowFormItem
|
||||||
|
name={buildFieldNameWithPrefix('table_result_type', prefix)}
|
||||||
|
label={t('flow.tableResultType') || '表格返回形式'}
|
||||||
|
>
|
||||||
|
{(field) => (
|
||||||
|
<SelectWithSearch
|
||||||
|
value={field.value}
|
||||||
|
onChange={field.onChange}
|
||||||
|
options={tableResultTypeOptions}
|
||||||
|
></SelectWithSearch>
|
||||||
|
)}
|
||||||
|
</RAGFlowFormItem>
|
||||||
|
<RAGFlowFormItem
|
||||||
|
name={buildFieldNameWithPrefix(
|
||||||
|
'markdown_image_response_type',
|
||||||
|
prefix,
|
||||||
|
)}
|
||||||
|
label={t('flow.markdownImageResponseType') || '图片返回形式'}
|
||||||
|
>
|
||||||
|
{(field) => (
|
||||||
|
<SelectWithSearch
|
||||||
|
value={field.value}
|
||||||
|
onChange={field.onChange}
|
||||||
|
options={markdownImageResponseTypeOptions}
|
||||||
|
></SelectWithSearch>
|
||||||
|
)}
|
||||||
|
</RAGFlowFormItem>
|
||||||
|
</>
|
||||||
|
)}
|
||||||
</>
|
</>
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
|
||||||
125
web/src/pages/agent/form/parser-form/ppt-form-fields.tsx
Normal file
125
web/src/pages/agent/form/parser-form/ppt-form-fields.tsx
Normal file
|
|
@ -0,0 +1,125 @@
|
||||||
|
import { ParseDocumentType } from '@/components/layout-recognize-form-field';
|
||||||
|
import {
|
||||||
|
SelectWithSearch,
|
||||||
|
SelectWithSearchFlagOptionType,
|
||||||
|
} from '@/components/originui/select-with-search';
|
||||||
|
import { RAGFlowFormItem } from '@/components/ragflow-form';
|
||||||
|
import { isEmpty } from 'lodash';
|
||||||
|
import { useEffect, useMemo } from 'react';
|
||||||
|
import { useFormContext, useWatch } from 'react-hook-form';
|
||||||
|
import { useTranslation } from 'react-i18next';
|
||||||
|
import { ParserMethodFormField } from './common-form-fields';
|
||||||
|
import { CommonProps } from './interface';
|
||||||
|
import { buildFieldNameWithPrefix } from './utils';
|
||||||
|
|
||||||
|
const tableResultTypeOptions: SelectWithSearchFlagOptionType[] = [
|
||||||
|
{ label: 'Markdown', value: '0' },
|
||||||
|
{ label: 'HTML', value: '1' },
|
||||||
|
];
|
||||||
|
|
||||||
|
const markdownImageResponseTypeOptions: SelectWithSearchFlagOptionType[] = [
|
||||||
|
{ label: 'URL', value: '0' },
|
||||||
|
{ label: 'Text', value: '1' },
|
||||||
|
];
|
||||||
|
|
||||||
|
export function PptFormFields({ prefix }: CommonProps) {
|
||||||
|
const { t } = useTranslation();
|
||||||
|
const form = useFormContext();
|
||||||
|
|
||||||
|
const parseMethodName = buildFieldNameWithPrefix('parse_method', prefix);
|
||||||
|
|
||||||
|
const parseMethod = useWatch({
|
||||||
|
name: parseMethodName,
|
||||||
|
});
|
||||||
|
|
||||||
|
// PPT only supports DeepDOC and TCADPParser
|
||||||
|
const optionsWithoutLLM = [
|
||||||
|
{ label: ParseDocumentType.DeepDOC, value: ParseDocumentType.DeepDOC },
|
||||||
|
{
|
||||||
|
label: ParseDocumentType.TCADPParser,
|
||||||
|
value: ParseDocumentType.TCADPParser,
|
||||||
|
},
|
||||||
|
];
|
||||||
|
|
||||||
|
const tcadpOptionsShown = useMemo(() => {
|
||||||
|
return (
|
||||||
|
!isEmpty(parseMethod) && parseMethod === ParseDocumentType.TCADPParser
|
||||||
|
);
|
||||||
|
}, [parseMethod]);
|
||||||
|
|
||||||
|
useEffect(() => {
|
||||||
|
if (isEmpty(form.getValues(parseMethodName))) {
|
||||||
|
form.setValue(parseMethodName, ParseDocumentType.DeepDOC, {
|
||||||
|
shouldValidate: true,
|
||||||
|
shouldDirty: true,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}, [form, parseMethodName]);
|
||||||
|
|
||||||
|
// Set default values for TCADP options when TCADP is selected
|
||||||
|
useEffect(() => {
|
||||||
|
if (tcadpOptionsShown) {
|
||||||
|
const tableResultTypeName = buildFieldNameWithPrefix(
|
||||||
|
'table_result_type',
|
||||||
|
prefix,
|
||||||
|
);
|
||||||
|
const markdownImageResponseTypeName = buildFieldNameWithPrefix(
|
||||||
|
'markdown_image_response_type',
|
||||||
|
prefix,
|
||||||
|
);
|
||||||
|
|
||||||
|
if (isEmpty(form.getValues(tableResultTypeName))) {
|
||||||
|
form.setValue(tableResultTypeName, '1', {
|
||||||
|
shouldValidate: true,
|
||||||
|
shouldDirty: true,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
if (isEmpty(form.getValues(markdownImageResponseTypeName))) {
|
||||||
|
form.setValue(markdownImageResponseTypeName, '1', {
|
||||||
|
shouldValidate: true,
|
||||||
|
shouldDirty: true,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}, [tcadpOptionsShown, form, prefix]);
|
||||||
|
|
||||||
|
return (
|
||||||
|
<>
|
||||||
|
<ParserMethodFormField
|
||||||
|
prefix={prefix}
|
||||||
|
optionsWithoutLLM={optionsWithoutLLM}
|
||||||
|
></ParserMethodFormField>
|
||||||
|
{tcadpOptionsShown && (
|
||||||
|
<>
|
||||||
|
<RAGFlowFormItem
|
||||||
|
name={buildFieldNameWithPrefix('table_result_type', prefix)}
|
||||||
|
label={t('flow.tableResultType') || '表格返回形式'}
|
||||||
|
>
|
||||||
|
{(field) => (
|
||||||
|
<SelectWithSearch
|
||||||
|
value={field.value}
|
||||||
|
onChange={field.onChange}
|
||||||
|
options={tableResultTypeOptions}
|
||||||
|
></SelectWithSearch>
|
||||||
|
)}
|
||||||
|
</RAGFlowFormItem>
|
||||||
|
<RAGFlowFormItem
|
||||||
|
name={buildFieldNameWithPrefix(
|
||||||
|
'markdown_image_response_type',
|
||||||
|
prefix,
|
||||||
|
)}
|
||||||
|
label={t('flow.markdownImageResponseType') || '图片返回形式'}
|
||||||
|
>
|
||||||
|
{(field) => (
|
||||||
|
<SelectWithSearch
|
||||||
|
value={field.value}
|
||||||
|
onChange={field.onChange}
|
||||||
|
options={markdownImageResponseTypeOptions}
|
||||||
|
></SelectWithSearch>
|
||||||
|
)}
|
||||||
|
</RAGFlowFormItem>
|
||||||
|
</>
|
||||||
|
)}
|
||||||
|
</>
|
||||||
|
);
|
||||||
|
}
|
||||||
125
web/src/pages/agent/form/parser-form/spreadsheet-form-fields.tsx
Normal file
125
web/src/pages/agent/form/parser-form/spreadsheet-form-fields.tsx
Normal file
|
|
@ -0,0 +1,125 @@
|
||||||
|
import { ParseDocumentType } from '@/components/layout-recognize-form-field';
|
||||||
|
import {
|
||||||
|
SelectWithSearch,
|
||||||
|
SelectWithSearchFlagOptionType,
|
||||||
|
} from '@/components/originui/select-with-search';
|
||||||
|
import { RAGFlowFormItem } from '@/components/ragflow-form';
|
||||||
|
import { isEmpty } from 'lodash';
|
||||||
|
import { useEffect, useMemo } from 'react';
|
||||||
|
import { useFormContext, useWatch } from 'react-hook-form';
|
||||||
|
import { useTranslation } from 'react-i18next';
|
||||||
|
import { ParserMethodFormField } from './common-form-fields';
|
||||||
|
import { CommonProps } from './interface';
|
||||||
|
import { buildFieldNameWithPrefix } from './utils';
|
||||||
|
|
||||||
|
const tableResultTypeOptions: SelectWithSearchFlagOptionType[] = [
|
||||||
|
{ label: 'Markdown', value: '0' },
|
||||||
|
{ label: 'HTML', value: '1' },
|
||||||
|
];
|
||||||
|
|
||||||
|
const markdownImageResponseTypeOptions: SelectWithSearchFlagOptionType[] = [
|
||||||
|
{ label: 'URL', value: '0' },
|
||||||
|
{ label: 'Text', value: '1' },
|
||||||
|
];
|
||||||
|
|
||||||
|
export function SpreadsheetFormFields({ prefix }: CommonProps) {
|
||||||
|
const { t } = useTranslation();
|
||||||
|
const form = useFormContext();
|
||||||
|
|
||||||
|
const parseMethodName = buildFieldNameWithPrefix('parse_method', prefix);
|
||||||
|
|
||||||
|
const parseMethod = useWatch({
|
||||||
|
name: parseMethodName,
|
||||||
|
});
|
||||||
|
|
||||||
|
// Spreadsheet only supports DeepDOC and TCADPParser
|
||||||
|
const optionsWithoutLLM = [
|
||||||
|
{ label: ParseDocumentType.DeepDOC, value: ParseDocumentType.DeepDOC },
|
||||||
|
{
|
||||||
|
label: ParseDocumentType.TCADPParser,
|
||||||
|
value: ParseDocumentType.TCADPParser,
|
||||||
|
},
|
||||||
|
];
|
||||||
|
|
||||||
|
const tcadpOptionsShown = useMemo(() => {
|
||||||
|
return (
|
||||||
|
!isEmpty(parseMethod) && parseMethod === ParseDocumentType.TCADPParser
|
||||||
|
);
|
||||||
|
}, [parseMethod]);
|
||||||
|
|
||||||
|
useEffect(() => {
|
||||||
|
if (isEmpty(form.getValues(parseMethodName))) {
|
||||||
|
form.setValue(parseMethodName, ParseDocumentType.DeepDOC, {
|
||||||
|
shouldValidate: true,
|
||||||
|
shouldDirty: true,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}, [form, parseMethodName]);
|
||||||
|
|
||||||
|
// Set default values for TCADP options when TCADP is selected
|
||||||
|
useEffect(() => {
|
||||||
|
if (tcadpOptionsShown) {
|
||||||
|
const tableResultTypeName = buildFieldNameWithPrefix(
|
||||||
|
'table_result_type',
|
||||||
|
prefix,
|
||||||
|
);
|
||||||
|
const markdownImageResponseTypeName = buildFieldNameWithPrefix(
|
||||||
|
'markdown_image_response_type',
|
||||||
|
prefix,
|
||||||
|
);
|
||||||
|
|
||||||
|
if (isEmpty(form.getValues(tableResultTypeName))) {
|
||||||
|
form.setValue(tableResultTypeName, '1', {
|
||||||
|
shouldValidate: true,
|
||||||
|
shouldDirty: true,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
if (isEmpty(form.getValues(markdownImageResponseTypeName))) {
|
||||||
|
form.setValue(markdownImageResponseTypeName, '1', {
|
||||||
|
shouldValidate: true,
|
||||||
|
shouldDirty: true,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}, [tcadpOptionsShown, form, prefix]);
|
||||||
|
|
||||||
|
return (
|
||||||
|
<>
|
||||||
|
<ParserMethodFormField
|
||||||
|
prefix={prefix}
|
||||||
|
optionsWithoutLLM={optionsWithoutLLM}
|
||||||
|
></ParserMethodFormField>
|
||||||
|
{tcadpOptionsShown && (
|
||||||
|
<>
|
||||||
|
<RAGFlowFormItem
|
||||||
|
name={buildFieldNameWithPrefix('table_result_type', prefix)}
|
||||||
|
label={t('flow.tableResultType') || '表格返回形式'}
|
||||||
|
>
|
||||||
|
{(field) => (
|
||||||
|
<SelectWithSearch
|
||||||
|
value={field.value}
|
||||||
|
onChange={field.onChange}
|
||||||
|
options={tableResultTypeOptions}
|
||||||
|
></SelectWithSearch>
|
||||||
|
)}
|
||||||
|
</RAGFlowFormItem>
|
||||||
|
<RAGFlowFormItem
|
||||||
|
name={buildFieldNameWithPrefix(
|
||||||
|
'markdown_image_response_type',
|
||||||
|
prefix,
|
||||||
|
)}
|
||||||
|
label={t('flow.markdownImageResponseType') || '图片返回形式'}
|
||||||
|
>
|
||||||
|
{(field) => (
|
||||||
|
<SelectWithSearch
|
||||||
|
value={field.value}
|
||||||
|
onChange={field.onChange}
|
||||||
|
options={markdownImageResponseTypeOptions}
|
||||||
|
></SelectWithSearch>
|
||||||
|
)}
|
||||||
|
</RAGFlowFormItem>
|
||||||
|
</>
|
||||||
|
)}
|
||||||
|
</>
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
@ -317,14 +317,18 @@ export const useGetComponentLabelByValue = (nodeId: string) => {
|
||||||
return getLabel;
|
return getLabel;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
export function flatOptions(options: DefaultOptionType[]) {
|
||||||
|
return options.reduce<DefaultOptionType[]>((pre, cur) => {
|
||||||
|
return [...pre, ...cur.options];
|
||||||
|
}, []);
|
||||||
|
}
|
||||||
|
|
||||||
export function useFlattenQueryVariableOptions(nodeId?: string) {
|
export function useFlattenQueryVariableOptions(nodeId?: string) {
|
||||||
const { getNode } = useGraphStore((state) => state);
|
const { getNode } = useGraphStore((state) => state);
|
||||||
const nextOptions = useBuildQueryVariableOptions(getNode(nodeId));
|
const nextOptions = useBuildQueryVariableOptions(getNode(nodeId));
|
||||||
|
|
||||||
const flattenOptions = useMemo(() => {
|
const flattenOptions = useMemo(() => {
|
||||||
return nextOptions.reduce<DefaultOptionType[]>((pre, cur) => {
|
return flatOptions(nextOptions);
|
||||||
return [...pre, ...cur.options];
|
|
||||||
}, []);
|
|
||||||
}, [nextOptions]);
|
}, [nextOptions]);
|
||||||
|
|
||||||
return flattenOptions;
|
return flattenOptions;
|
||||||
|
|
|
||||||
|
|
@ -12,9 +12,9 @@ import { ReactComponent as WenCaiIcon } from '@/assets/svg/wencai.svg';
|
||||||
import { ReactComponent as WikipediaIcon } from '@/assets/svg/wikipedia.svg';
|
import { ReactComponent as WikipediaIcon } from '@/assets/svg/wikipedia.svg';
|
||||||
import { ReactComponent as YahooFinanceIcon } from '@/assets/svg/yahoo-finance.svg';
|
import { ReactComponent as YahooFinanceIcon } from '@/assets/svg/yahoo-finance.svg';
|
||||||
|
|
||||||
import { IconFont } from '@/components/icon-font';
|
import { IconFontFill } from '@/components/icon-font';
|
||||||
import { cn } from '@/lib/utils';
|
import { cn } from '@/lib/utils';
|
||||||
import { Columns3, Equal, FileCode, HousePlus, Variable } from 'lucide-react';
|
import { FileCode, HousePlus } from 'lucide-react';
|
||||||
import { Operator } from './constant';
|
import { Operator } from './constant';
|
||||||
|
|
||||||
interface IProps {
|
interface IProps {
|
||||||
|
|
@ -37,6 +37,9 @@ export const OperatorIconMap = {
|
||||||
[Operator.ExeSQL]: 'executesql-0',
|
[Operator.ExeSQL]: 'executesql-0',
|
||||||
[Operator.Invoke]: 'httprequest-0',
|
[Operator.Invoke]: 'httprequest-0',
|
||||||
[Operator.Email]: 'sendemail-0',
|
[Operator.Email]: 'sendemail-0',
|
||||||
|
[Operator.ListOperations]: 'a-listoperations',
|
||||||
|
[Operator.VariableAssigner]: 'a-ariableassigner',
|
||||||
|
[Operator.VariableAggregator]: 'aggregator',
|
||||||
};
|
};
|
||||||
|
|
||||||
export const SVGIconMap = {
|
export const SVGIconMap = {
|
||||||
|
|
@ -57,9 +60,6 @@ export const SVGIconMap = {
|
||||||
};
|
};
|
||||||
export const LucideIconMap = {
|
export const LucideIconMap = {
|
||||||
[Operator.DataOperations]: FileCode,
|
[Operator.DataOperations]: FileCode,
|
||||||
[Operator.ListOperations]: Columns3,
|
|
||||||
[Operator.VariableAssigner]: Equal,
|
|
||||||
[Operator.VariableAggregator]: Variable,
|
|
||||||
};
|
};
|
||||||
|
|
||||||
const Empty = () => {
|
const Empty = () => {
|
||||||
|
|
@ -86,7 +86,10 @@ const OperatorIcon = ({ name, className }: IProps) => {
|
||||||
|
|
||||||
if (Icon) {
|
if (Icon) {
|
||||||
return (
|
return (
|
||||||
<IconFont name={Icon} className={cn('size-5 ', className)}></IconFont>
|
<IconFontFill
|
||||||
|
name={Icon}
|
||||||
|
className={cn('size-5 ', className)}
|
||||||
|
></IconFontFill>
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -214,6 +214,36 @@ function transformParserParams(params: ParserFormSchemaType) {
|
||||||
parse_method: cur.parse_method,
|
parse_method: cur.parse_method,
|
||||||
lang: cur.lang,
|
lang: cur.lang,
|
||||||
};
|
};
|
||||||
|
// Only include TCADP parameters if TCADP Parser is selected
|
||||||
|
if (cur.parse_method?.toLowerCase() === 'tcadp parser') {
|
||||||
|
filteredSetup.table_result_type = cur.table_result_type;
|
||||||
|
filteredSetup.markdown_image_response_type =
|
||||||
|
cur.markdown_image_response_type;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case FileType.Spreadsheet:
|
||||||
|
filteredSetup = {
|
||||||
|
...filteredSetup,
|
||||||
|
parse_method: cur.parse_method,
|
||||||
|
};
|
||||||
|
// Only include TCADP parameters if TCADP Parser is selected
|
||||||
|
if (cur.parse_method?.toLowerCase() === 'tcadp parser') {
|
||||||
|
filteredSetup.table_result_type = cur.table_result_type;
|
||||||
|
filteredSetup.markdown_image_response_type =
|
||||||
|
cur.markdown_image_response_type;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case FileType.PowerPoint:
|
||||||
|
filteredSetup = {
|
||||||
|
...filteredSetup,
|
||||||
|
parse_method: cur.parse_method,
|
||||||
|
};
|
||||||
|
// Only include TCADP parameters if TCADP Parser is selected
|
||||||
|
if (cur.parse_method?.toLowerCase() === 'tcadp parser') {
|
||||||
|
filteredSetup.table_result_type = cur.table_result_type;
|
||||||
|
filteredSetup.markdown_image_response_type =
|
||||||
|
cur.markdown_image_response_type;
|
||||||
|
}
|
||||||
break;
|
break;
|
||||||
case FileType.Image:
|
case FileType.Image:
|
||||||
filteredSetup = {
|
filteredSetup = {
|
||||||
|
|
|
||||||
0
web/src/pages/data-flow/constant.tsx
Normal file
0
web/src/pages/data-flow/constant.tsx
Normal file
0
web/src/pages/data-flow/form/parser-form/index.tsx
Normal file
0
web/src/pages/data-flow/form/parser-form/index.tsx
Normal file
40
web/src/pages/data-flow/form/parser-form/ppt-form-fields.tsx
Normal file
40
web/src/pages/data-flow/form/parser-form/ppt-form-fields.tsx
Normal file
|
|
@ -0,0 +1,40 @@
|
||||||
|
import { ParseDocumentType } from '@/components/layout-recognize-form-field';
|
||||||
|
import { isEmpty } from 'lodash';
|
||||||
|
import { useEffect } from 'react';
|
||||||
|
import { useFormContext } from 'react-hook-form';
|
||||||
|
import { ParserMethodFormField } from './common-form-fields';
|
||||||
|
import { CommonProps } from './interface';
|
||||||
|
import { buildFieldNameWithPrefix } from './utils';
|
||||||
|
|
||||||
|
export function PptFormFields({ prefix }: CommonProps) {
|
||||||
|
const form = useFormContext();
|
||||||
|
|
||||||
|
const parseMethodName = buildFieldNameWithPrefix('parse_method', prefix);
|
||||||
|
|
||||||
|
// PPT only supports DeepDOC and TCADPParser
|
||||||
|
const optionsWithoutLLM = [
|
||||||
|
{ label: ParseDocumentType.DeepDOC, value: ParseDocumentType.DeepDOC },
|
||||||
|
{
|
||||||
|
label: ParseDocumentType.TCADPParser,
|
||||||
|
value: ParseDocumentType.TCADPParser,
|
||||||
|
},
|
||||||
|
];
|
||||||
|
|
||||||
|
useEffect(() => {
|
||||||
|
if (isEmpty(form.getValues(parseMethodName))) {
|
||||||
|
form.setValue(parseMethodName, ParseDocumentType.DeepDOC, {
|
||||||
|
shouldValidate: true,
|
||||||
|
shouldDirty: true,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}, [form, parseMethodName]);
|
||||||
|
|
||||||
|
return (
|
||||||
|
<>
|
||||||
|
<ParserMethodFormField
|
||||||
|
prefix={prefix}
|
||||||
|
optionsWithoutLLM={optionsWithoutLLM}
|
||||||
|
></ParserMethodFormField>
|
||||||
|
</>
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,40 @@
|
||||||
|
import { ParseDocumentType } from '@/components/layout-recognize-form-field';
|
||||||
|
import { isEmpty } from 'lodash';
|
||||||
|
import { useEffect } from 'react';
|
||||||
|
import { useFormContext } from 'react-hook-form';
|
||||||
|
import { ParserMethodFormField } from './common-form-fields';
|
||||||
|
import { CommonProps } from './interface';
|
||||||
|
import { buildFieldNameWithPrefix } from './utils';
|
||||||
|
|
||||||
|
export function SpreadsheetFormFields({ prefix }: CommonProps) {
|
||||||
|
const form = useFormContext();
|
||||||
|
|
||||||
|
const parseMethodName = buildFieldNameWithPrefix('parse_method', prefix);
|
||||||
|
|
||||||
|
// Spreadsheet only supports DeepDOC and TCADPParser
|
||||||
|
const optionsWithoutLLM = [
|
||||||
|
{ label: ParseDocumentType.DeepDOC, value: ParseDocumentType.DeepDOC },
|
||||||
|
{
|
||||||
|
label: ParseDocumentType.TCADPParser,
|
||||||
|
value: ParseDocumentType.TCADPParser,
|
||||||
|
},
|
||||||
|
];
|
||||||
|
|
||||||
|
useEffect(() => {
|
||||||
|
if (isEmpty(form.getValues(parseMethodName))) {
|
||||||
|
form.setValue(parseMethodName, ParseDocumentType.DeepDOC, {
|
||||||
|
shouldValidate: true,
|
||||||
|
shouldDirty: true,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}, [form, parseMethodName]);
|
||||||
|
|
||||||
|
return (
|
||||||
|
<>
|
||||||
|
<ParserMethodFormField
|
||||||
|
prefix={prefix}
|
||||||
|
optionsWithoutLLM={optionsWithoutLLM}
|
||||||
|
></ParserMethodFormField>
|
||||||
|
</>
|
||||||
|
);
|
||||||
|
}
|
||||||
0
web/src/pages/data-flow/utils.ts
Normal file
0
web/src/pages/data-flow/utils.ts
Normal file
Loading…
Add table
Reference in a new issue