Merge branch 'main' of github.com:infiniflow/ragflow into mineru

This commit is contained in:
bill 2025-12-11 14:37:17 +08:00
commit 5edaa76f15
57 changed files with 2079 additions and 265 deletions

View file

@ -1,4 +1,6 @@
name: tests name: tests
permissions:
contents: read
on: on:
push: push:

View file

@ -351,7 +351,7 @@ class AdminCLI(Cmd):
def verify_admin(self, arguments: dict, single_command: bool): def verify_admin(self, arguments: dict, single_command: bool):
self.host = arguments['host'] self.host = arguments['host']
self.port = arguments['port'] self.port = arguments['port']
print(f"Attempt to access ip: {self.host}, port: {self.port}") print("Attempt to access server for admin login")
url = f"http://{self.host}:{self.port}/api/v1/admin/login" url = f"http://{self.host}:{self.port}/api/v1/admin/login"
attempt_count = 3 attempt_count = 3
@ -390,7 +390,7 @@ class AdminCLI(Cmd):
print(f"Bad responsestatus: {response.status_code}, password is wrong") print(f"Bad responsestatus: {response.status_code}, password is wrong")
except Exception as e: except Exception as e:
print(str(e)) print(str(e))
print(f"Can't access {self.host}, port: {self.port}") print("Can't access server for admin login (connection failed)")
def _format_service_detail_table(self, data): def _format_service_detail_table(self, data):
if isinstance(data, list): if isinstance(data, list):
@ -674,7 +674,7 @@ class AdminCLI(Cmd):
user_name: str = user_name_tree.children[0].strip("'\"") user_name: str = user_name_tree.children[0].strip("'\"")
password_tree: Tree = command['password'] password_tree: Tree = command['password']
password: str = password_tree.children[0].strip("'\"") password: str = password_tree.children[0].strip("'\"")
print(f"Alter user: {user_name}, password: {password}") print(f"Alter user: {user_name}, password: ******")
url = f'http://{self.host}:{self.port}/api/v1/admin/users/{user_name}/password' url = f'http://{self.host}:{self.port}/api/v1/admin/users/{user_name}/password'
response = self.session.put(url, json={'new_password': encrypt(password)}) response = self.session.put(url, json={'new_password': encrypt(password)})
res_json = response.json() res_json = response.json()
@ -689,7 +689,7 @@ class AdminCLI(Cmd):
password_tree: Tree = command['password'] password_tree: Tree = command['password']
password: str = password_tree.children[0].strip("'\"") password: str = password_tree.children[0].strip("'\"")
role: str = command['role'] role: str = command['role']
print(f"Create user: {user_name}, password: {password}, role: {role}") print(f"Create user: {user_name}, password: ******, role: {role}")
url = f'http://{self.host}:{self.port}/api/v1/admin/users' url = f'http://{self.host}:{self.port}/api/v1/admin/users'
response = self.session.post( response = self.session.post(
url, url,
@ -951,7 +951,7 @@ def main():
args = cli.parse_connection_args(sys.argv) args = cli.parse_connection_args(sys.argv)
if 'error' in args: if 'error' in args:
print(f"Error: {args['error']}") print("Error: Invalid connection arguments")
return return
if 'command' in args: if 'command' in args:
@ -960,7 +960,7 @@ def main():
return return
if cli.verify_admin(args, single_command=True): if cli.verify_admin(args, single_command=True):
command: str = args['command'] command: str = args['command']
print(f"Run single command: {command}") # print(f"Run single command: {command}")
cli.run_single_command(command) cli.run_single_command(command)
else: else:
if cli.verify_admin(args, single_command=False): if cli.verify_admin(args, single_command=False):

View file

@ -176,11 +176,11 @@ def login_verify(f):
"message": "Access denied", "message": "Access denied",
"data": None "data": None
}), 200 }), 200
except Exception as e: except Exception:
error_msg = str(e) logging.exception("An error occurred during admin login verification.")
return jsonify({ return jsonify({
"code": 500, "code": 500,
"message": error_msg "message": "An internal server error occurred."
}), 200 }), 200
return f(*args, **kwargs) return f(*args, **kwargs)

View file

@ -136,6 +136,16 @@ class Retrieval(ToolBase, ABC):
doc_ids.extend(meta_filter(metas, filters["conditions"], filters.get("logic", "and"))) doc_ids.extend(meta_filter(metas, filters["conditions"], filters.get("logic", "and")))
if not doc_ids: if not doc_ids:
doc_ids = None doc_ids = None
elif self._param.meta_data_filter.get("method") == "semi_auto":
selected_keys = self._param.meta_data_filter.get("semi_auto", [])
if selected_keys:
filtered_metas = {key: metas[key] for key in selected_keys if key in metas}
if filtered_metas:
chat_mdl = LLMBundle(self._canvas.get_tenant_id(), LLMType.CHAT)
filters: dict = gen_meta_filter(chat_mdl, filtered_metas, query)
doc_ids.extend(meta_filter(metas, filters["conditions"], filters.get("logic", "and")))
if not doc_ids:
doc_ids = None
elif self._param.meta_data_filter.get("method") == "manual": elif self._param.meta_data_filter.get("method") == "manual":
filters = self._param.meta_data_filter["manual"] filters = self._param.meta_data_filter["manual"]
for flt in filters: for flt in filters:

View file

@ -342,7 +342,15 @@ async def test_db_connect():
f"UID={req['username']};" f"UID={req['username']};"
f"PWD={req['password']};" f"PWD={req['password']};"
) )
logging.info(conn_str) redacted_conn_str = (
f"DATABASE={req['database']};"
f"HOSTNAME={req['host']};"
f"PORT={req['port']};"
f"PROTOCOL=TCPIP;"
f"UID={req['username']};"
f"PWD=****;"
)
logging.info(redacted_conn_str)
conn = ibm_db.connect(conn_str, "", "") conn = ibm_db.connect(conn_str, "", "")
stmt = ibm_db.exec_immediate(conn, "SELECT 1 FROM sysibm.sysdummy1") stmt = ibm_db.exec_immediate(conn, "SELECT 1 FROM sysibm.sysdummy1")
ibm_db.fetch_assoc(stmt) ibm_db.fetch_assoc(stmt)

View file

@ -327,10 +327,44 @@ async def retrieval_test():
local_doc_ids.extend(meta_filter(metas, filters["conditions"], filters.get("logic", "and"))) local_doc_ids.extend(meta_filter(metas, filters["conditions"], filters.get("logic", "and")))
if not local_doc_ids: if not local_doc_ids:
local_doc_ids = None local_doc_ids = None
elif meta_data_filter.get("method") == "semi_auto":
selected_keys = meta_data_filter.get("semi_auto", [])
if selected_keys:
filtered_metas = {key: metas[key] for key in selected_keys if key in metas}
if filtered_metas:
chat_mdl = LLMBundle(user_id, LLMType.CHAT, llm_name=search_config.get("chat_id", ""))
filters: dict = gen_meta_filter(chat_mdl, filtered_metas, question)
local_doc_ids.extend(meta_filter(metas, filters["conditions"], filters.get("logic", "and")))
if not local_doc_ids:
local_doc_ids = None
elif meta_data_filter.get("method") == "manual": elif meta_data_filter.get("method") == "manual":
local_doc_ids.extend(meta_filter(metas, meta_data_filter["manual"], meta_data_filter.get("logic", "and"))) local_doc_ids.extend(meta_filter(metas, meta_data_filter["manual"], meta_data_filter.get("logic", "and")))
if meta_data_filter["manual"] and not local_doc_ids: if meta_data_filter["manual"] and not local_doc_ids:
local_doc_ids = ["-999"] local_doc_ids = ["-999"]
else:
meta_data_filter = req.get("meta_data_filter")
if meta_data_filter:
metas = DocumentService.get_meta_by_kbs(kb_ids)
if meta_data_filter.get("method") == "auto":
chat_mdl = LLMBundle(user_id, LLMType.CHAT)
filters: dict = gen_meta_filter(chat_mdl, metas, question)
local_doc_ids.extend(meta_filter(metas, filters["conditions"], filters.get("logic", "and")))
if not local_doc_ids:
local_doc_ids = None
elif meta_data_filter.get("method") == "semi_auto":
selected_keys = meta_data_filter.get("semi_auto", [])
if selected_keys:
filtered_metas = {key: metas[key] for key in selected_keys if key in metas}
if filtered_metas:
chat_mdl = LLMBundle(user_id, LLMType.CHAT)
filters: dict = gen_meta_filter(chat_mdl, filtered_metas, question)
local_doc_ids.extend(meta_filter(metas, filters["conditions"], filters.get("logic", "and")))
if not local_doc_ids:
local_doc_ids = None
elif meta_data_filter.get("method") == "manual":
local_doc_ids.extend(meta_filter(metas, meta_data_filter["manual"], meta_data_filter.get("logic", "and")))
if meta_data_filter["manual"] and not local_doc_ids:
local_doc_ids = ["-999"]
tenants = UserTenantService.query(user_id=user_id) tenants = UserTenantService.query(user_id=user_id)
for kb_id in kb_ids: for kb_id in kb_ids:

View file

@ -435,7 +435,7 @@ async def mindmap():
kb_ids.extend(req["kb_ids"]) kb_ids.extend(req["kb_ids"])
kb_ids = list(set(kb_ids)) kb_ids = list(set(kb_ids))
mind_map = gen_mindmap(req["question"], kb_ids, search_app.get("tenant_id", current_user.id), search_config) mind_map = await gen_mindmap(req["question"], kb_ids, search_app.get("tenant_id", current_user.id), search_config)
if "error" in mind_map: if "error" in mind_map:
return server_error_response(Exception(mind_map["error"])) return server_error_response(Exception(mind_map["error"]))
return get_json_result(data=mind_map) return get_json_result(data=mind_map)

View file

@ -27,6 +27,7 @@ from api.db import VALID_FILE_TYPES, FileType
from api.db.db_models import Task from api.db.db_models import Task
from api.db.services import duplicate_name from api.db.services import duplicate_name
from api.db.services.document_service import DocumentService, doc_upload_and_parse from api.db.services.document_service import DocumentService, doc_upload_and_parse
from api.db.services.dialog_service import meta_filter, convert_conditions
from api.db.services.file2document_service import File2DocumentService from api.db.services.file2document_service import File2DocumentService
from api.db.services.file_service import FileService from api.db.services.file_service import FileService
from api.db.services.knowledgebase_service import KnowledgebaseService from api.db.services.knowledgebase_service import KnowledgebaseService
@ -246,9 +247,19 @@ async def list_docs():
return get_data_error_result(message=f"Invalid filter conditions: {', '.join(invalid_types)} type{'s' if len(invalid_types) > 1 else ''}") return get_data_error_result(message=f"Invalid filter conditions: {', '.join(invalid_types)} type{'s' if len(invalid_types) > 1 else ''}")
suffix = req.get("suffix", []) suffix = req.get("suffix", [])
metadata_condition = req.get("metadata_condition", {}) or {}
if metadata_condition and not isinstance(metadata_condition, dict):
return get_data_error_result(message="metadata_condition must be an object.")
doc_ids_filter = None
if metadata_condition:
metas = DocumentService.get_flatted_meta_by_kbs([kb_id])
doc_ids_filter = meta_filter(metas, convert_conditions(metadata_condition), metadata_condition.get("logic", "and"))
if metadata_condition.get("conditions") and not doc_ids_filter:
return get_json_result(data={"total": 0, "docs": []})
try: try:
docs, tol = DocumentService.get_by_kb_id(kb_id, page_number, items_per_page, orderby, desc, keywords, run_status, types, suffix) docs, tol = DocumentService.get_by_kb_id(kb_id, page_number, items_per_page, orderby, desc, keywords, run_status, types, suffix, doc_ids_filter)
if create_time_from or create_time_to: if create_time_from or create_time_to:
filtered_docs = [] filtered_docs = []
@ -319,6 +330,87 @@ async def doc_infos():
return get_json_result(data=list(docs.dicts())) return get_json_result(data=list(docs.dicts()))
@manager.route("/metadata/summary", methods=["POST"]) # noqa: F821
@login_required
async def metadata_summary():
req = await get_request_json()
kb_id = req.get("kb_id")
if not kb_id:
return get_json_result(data=False, message='Lack of "KB ID"', code=RetCode.ARGUMENT_ERROR)
tenants = UserTenantService.query(user_id=current_user.id)
for tenant in tenants:
if KnowledgebaseService.query(tenant_id=tenant.tenant_id, id=kb_id):
break
else:
return get_json_result(data=False, message="Only owner of knowledgebase authorized for this operation.", code=RetCode.OPERATING_ERROR)
try:
summary = DocumentService.get_metadata_summary(kb_id)
return get_json_result(data={"summary": summary})
except Exception as e:
return server_error_response(e)
@manager.route("/metadata/update", methods=["POST"]) # noqa: F821
@login_required
async def metadata_update():
req = await get_request_json()
kb_id = req.get("kb_id")
if not kb_id:
return get_json_result(data=False, message='Lack of "KB ID"', code=RetCode.ARGUMENT_ERROR)
tenants = UserTenantService.query(user_id=current_user.id)
for tenant in tenants:
if KnowledgebaseService.query(tenant_id=tenant.tenant_id, id=kb_id):
break
else:
return get_json_result(data=False, message="Only owner of knowledgebase authorized for this operation.", code=RetCode.OPERATING_ERROR)
selector = req.get("selector", {}) or {}
updates = req.get("updates", []) or []
deletes = req.get("deletes", []) or []
if not isinstance(selector, dict):
return get_json_result(data=False, message="selector must be an object.", code=RetCode.ARGUMENT_ERROR)
if not isinstance(updates, list) or not isinstance(deletes, list):
return get_json_result(data=False, message="updates and deletes must be lists.", code=RetCode.ARGUMENT_ERROR)
metadata_condition = selector.get("metadata_condition", {}) or {}
if metadata_condition and not isinstance(metadata_condition, dict):
return get_json_result(data=False, message="metadata_condition must be an object.", code=RetCode.ARGUMENT_ERROR)
document_ids = selector.get("document_ids", []) or []
if document_ids and not isinstance(document_ids, list):
return get_json_result(data=False, message="document_ids must be a list.", code=RetCode.ARGUMENT_ERROR)
for upd in updates:
if not isinstance(upd, dict) or not upd.get("key") or "value" not in upd:
return get_json_result(data=False, message="Each update requires key and value.", code=RetCode.ARGUMENT_ERROR)
for d in deletes:
if not isinstance(d, dict) or not d.get("key"):
return get_json_result(data=False, message="Each delete requires key.", code=RetCode.ARGUMENT_ERROR)
kb_doc_ids = KnowledgebaseService.list_documents_by_ids([kb_id])
target_doc_ids = set(kb_doc_ids)
if document_ids:
invalid_ids = set(document_ids) - set(kb_doc_ids)
if invalid_ids:
return get_json_result(data=False, message=f"These documents do not belong to dataset {kb_id}: {', '.join(invalid_ids)}", code=RetCode.ARGUMENT_ERROR)
target_doc_ids = set(document_ids)
if metadata_condition:
metas = DocumentService.get_flatted_meta_by_kbs([kb_id])
filtered_ids = set(meta_filter(metas, convert_conditions(metadata_condition), metadata_condition.get("logic", "and")))
target_doc_ids = target_doc_ids & filtered_ids
if metadata_condition.get("conditions") and not target_doc_ids:
return get_json_result(data={"updated": 0, "matched_docs": 0})
target_doc_ids = list(target_doc_ids)
updated = DocumentService.batch_update_metadata(kb_id, target_doc_ids, updates, deletes)
return get_json_result(data={"updated": updated, "matched_docs": len(target_doc_ids)})
@manager.route("/thumbnails", methods=["GET"]) # noqa: F821 @manager.route("/thumbnails", methods=["GET"]) # noqa: F821
# @login_required # @login_required
def thumbnails(): def thumbnails():
@ -698,7 +790,10 @@ async def set_meta():
if not isinstance(meta, dict): if not isinstance(meta, dict):
return get_json_result(data=False, message="Only dictionary type supported.", code=RetCode.ARGUMENT_ERROR) return get_json_result(data=False, message="Only dictionary type supported.", code=RetCode.ARGUMENT_ERROR)
for k, v in meta.items(): for k, v in meta.items():
if not isinstance(v, str) and not isinstance(v, int) and not isinstance(v, float): if isinstance(v, list):
if not all(isinstance(i, (str, int, float)) for i in v):
return get_json_result(data=False, message=f"The type is not supported in list: {v}", code=RetCode.ARGUMENT_ERROR)
elif not isinstance(v, (str, int, float)):
return get_json_result(data=False, message=f"The type is not supported: {v}", code=RetCode.ARGUMENT_ERROR) return get_json_result(data=False, message=f"The type is not supported: {v}", code=RetCode.ARGUMENT_ERROR)
except Exception as e: except Exception as e:
return get_json_result(data=False, message=f"Json syntax error: {e}", code=RetCode.ARGUMENT_ERROR) return get_json_result(data=False, message=f"Json syntax error: {e}", code=RetCode.ARGUMENT_ERROR)

View file

@ -14,6 +14,7 @@
# limitations under the License. # limitations under the License.
# #
import datetime import datetime
import json
import logging import logging
import pathlib import pathlib
import re import re
@ -551,13 +552,29 @@ def list_docs(dataset_id, tenant_id):
run_status = q.getlist("run") run_status = q.getlist("run")
create_time_from = int(q.get("create_time_from", 0)) create_time_from = int(q.get("create_time_from", 0))
create_time_to = int(q.get("create_time_to", 0)) create_time_to = int(q.get("create_time_to", 0))
metadata_condition_raw = q.get("metadata_condition")
metadata_condition = {}
if metadata_condition_raw:
try:
metadata_condition = json.loads(metadata_condition_raw)
except Exception:
return get_error_data_result(message="metadata_condition must be valid JSON.")
if metadata_condition and not isinstance(metadata_condition, dict):
return get_error_data_result(message="metadata_condition must be an object.")
# map run status (text or numeric) - align with API parameter # map run status (text or numeric) - align with API parameter
run_status_text_to_numeric = {"UNSTART": "0", "RUNNING": "1", "CANCEL": "2", "DONE": "3", "FAIL": "4"} run_status_text_to_numeric = {"UNSTART": "0", "RUNNING": "1", "CANCEL": "2", "DONE": "3", "FAIL": "4"}
run_status_converted = [run_status_text_to_numeric.get(v, v) for v in run_status] run_status_converted = [run_status_text_to_numeric.get(v, v) for v in run_status]
doc_ids_filter = None
if metadata_condition:
metas = DocumentService.get_flatted_meta_by_kbs([dataset_id])
doc_ids_filter = meta_filter(metas, convert_conditions(metadata_condition), metadata_condition.get("logic", "and"))
if metadata_condition.get("conditions") and not doc_ids_filter:
return get_result(data={"total": 0, "docs": []})
docs, total = DocumentService.get_list( docs, total = DocumentService.get_list(
dataset_id, page, page_size, orderby, desc, keywords, document_id, name, suffix, run_status_converted dataset_id, page, page_size, orderby, desc, keywords, document_id, name, suffix, run_status_converted, doc_ids_filter
) )
# time range filter (0 means no bound) # time range filter (0 means no bound)
@ -586,6 +603,70 @@ def list_docs(dataset_id, tenant_id):
return get_result(data={"total": total, "docs": output_docs}) return get_result(data={"total": total, "docs": output_docs})
@manager.route("/datasets/<dataset_id>/metadata/summary", methods=["GET"]) # noqa: F821
@token_required
def metadata_summary(dataset_id, tenant_id):
if not KnowledgebaseService.accessible(kb_id=dataset_id, user_id=tenant_id):
return get_error_data_result(message=f"You don't own the dataset {dataset_id}. ")
try:
summary = DocumentService.get_metadata_summary(dataset_id)
return get_result(data={"summary": summary})
except Exception as e:
return server_error_response(e)
@manager.route("/datasets/<dataset_id>/metadata/update", methods=["POST"]) # noqa: F821
@token_required
async def metadata_batch_update(dataset_id, tenant_id):
if not KnowledgebaseService.accessible(kb_id=dataset_id, user_id=tenant_id):
return get_error_data_result(message=f"You don't own the dataset {dataset_id}. ")
req = await get_request_json()
selector = req.get("selector", {}) or {}
updates = req.get("updates", []) or []
deletes = req.get("deletes", []) or []
if not isinstance(selector, dict):
return get_error_data_result(message="selector must be an object.")
if not isinstance(updates, list) or not isinstance(deletes, list):
return get_error_data_result(message="updates and deletes must be lists.")
metadata_condition = selector.get("metadata_condition", {}) or {}
if metadata_condition and not isinstance(metadata_condition, dict):
return get_error_data_result(message="metadata_condition must be an object.")
document_ids = selector.get("document_ids", []) or []
if document_ids and not isinstance(document_ids, list):
return get_error_data_result(message="document_ids must be a list.")
for upd in updates:
if not isinstance(upd, dict) or not upd.get("key") or "value" not in upd:
return get_error_data_result(message="Each update requires key and value.")
for d in deletes:
if not isinstance(d, dict) or not d.get("key"):
return get_error_data_result(message="Each delete requires key.")
kb_doc_ids = KnowledgebaseService.list_documents_by_ids([dataset_id])
target_doc_ids = set(kb_doc_ids)
if document_ids:
invalid_ids = set(document_ids) - set(kb_doc_ids)
if invalid_ids:
return get_error_data_result(message=f"These documents do not belong to dataset {dataset_id}: {', '.join(invalid_ids)}")
target_doc_ids = set(document_ids)
if metadata_condition:
metas = DocumentService.get_flatted_meta_by_kbs([dataset_id])
filtered_ids = set(meta_filter(metas, convert_conditions(metadata_condition), metadata_condition.get("logic", "and")))
target_doc_ids = target_doc_ids & filtered_ids
if metadata_condition.get("conditions") and not target_doc_ids:
return get_result(data={"updated": 0, "matched_docs": 0})
target_doc_ids = list(target_doc_ids)
updated = DocumentService.batch_update_metadata(dataset_id, target_doc_ids, updates, deletes)
return get_result(data={"updated": updated, "matched_docs": len(target_doc_ids)})
@manager.route("/datasets/<dataset_id>/documents", methods=["DELETE"]) # noqa: F821 @manager.route("/datasets/<dataset_id>/documents", methods=["DELETE"]) # noqa: F821
@token_required @token_required
async def delete(tenant_id, dataset_id): async def delete(tenant_id, dataset_id):

View file

@ -984,10 +984,44 @@ async def retrieval_test_embedded():
local_doc_ids.extend(meta_filter(metas, filters["conditions"], filters.get("logic", "and"))) local_doc_ids.extend(meta_filter(metas, filters["conditions"], filters.get("logic", "and")))
if not local_doc_ids: if not local_doc_ids:
local_doc_ids = None local_doc_ids = None
elif meta_data_filter.get("method") == "semi_auto":
selected_keys = meta_data_filter.get("semi_auto", [])
if selected_keys:
filtered_metas = {key: metas[key] for key in selected_keys if key in metas}
if filtered_metas:
chat_mdl = LLMBundle(tenant_id, LLMType.CHAT, llm_name=search_config.get("chat_id", ""))
filters: dict = gen_meta_filter(chat_mdl, filtered_metas, _question)
local_doc_ids.extend(meta_filter(metas, filters["conditions"], filters.get("logic", "and")))
if not local_doc_ids:
local_doc_ids = None
elif meta_data_filter.get("method") == "manual": elif meta_data_filter.get("method") == "manual":
local_doc_ids.extend(meta_filter(metas, meta_data_filter["manual"], meta_data_filter.get("logic", "and"))) local_doc_ids.extend(meta_filter(metas, meta_data_filter["manual"], meta_data_filter.get("logic", "and")))
if meta_data_filter["manual"] and not local_doc_ids: if meta_data_filter["manual"] and not local_doc_ids:
local_doc_ids = ["-999"] local_doc_ids = ["-999"]
else:
meta_data_filter = req.get("meta_data_filter")
if meta_data_filter:
metas = DocumentService.get_meta_by_kbs(kb_ids)
if meta_data_filter.get("method") == "auto":
chat_mdl = LLMBundle(tenant_id, LLMType.CHAT)
filters: dict = gen_meta_filter(chat_mdl, metas, question)
local_doc_ids.extend(meta_filter(metas, filters["conditions"], filters.get("logic", "and")))
if not local_doc_ids:
local_doc_ids = None
elif meta_data_filter.get("method") == "semi_auto":
selected_keys = meta_data_filter.get("semi_auto", [])
if selected_keys:
filtered_metas = {key: metas[key] for key in selected_keys if key in metas}
if filtered_metas:
chat_mdl = LLMBundle(tenant_id, LLMType.CHAT)
filters: dict = gen_meta_filter(chat_mdl, filtered_metas, question)
local_doc_ids.extend(meta_filter(metas, filters["conditions"], filters.get("logic", "and")))
if not local_doc_ids:
local_doc_ids = None
elif meta_data_filter.get("method") == "manual":
local_doc_ids.extend(meta_filter(metas, meta_data_filter["manual"], meta_data_filter.get("logic", "and")))
if meta_data_filter["manual"] and not local_doc_ids:
local_doc_ids = ["-999"]
tenants = UserTenantService.query(user_id=tenant_id) tenants = UserTenantService.query(user_id=tenant_id)
for kb_id in kb_ids: for kb_id in kb_ids:
@ -1135,7 +1169,7 @@ async def mindmap():
search_id = req.get("search_id", "") search_id = req.get("search_id", "")
search_app = SearchService.get_detail(search_id) if search_id else {} search_app = SearchService.get_detail(search_id) if search_id else {}
mind_map = gen_mindmap(req["question"], req["kb_ids"], tenant_id, search_app.get("search_config", {})) mind_map =await gen_mindmap(req["question"], req["kb_ids"], tenant_id, search_app.get("search_config", {}))
if "error" in mind_map: if "error" in mind_map:
return server_error_response(Exception(mind_map["error"])) return server_error_response(Exception(mind_map["error"]))
return get_json_result(data=mind_map) return get_json_result(data=mind_map)

View file

@ -73,7 +73,7 @@ def init_superuser(nickname=DEFAULT_SUPERUSER_NICKNAME, email=DEFAULT_SUPERUSER_
UserTenantService.insert(**usr_tenant) UserTenantService.insert(**usr_tenant)
TenantLLMService.insert_many(tenant_llm) TenantLLMService.insert_many(tenant_llm)
logging.info( logging.info(
f"Super user initialized. email: {email}, password: {password}. Changing the password after login is strongly recommended.") f"Super user initialized. email: {email},A default password has been set; changing the password after login is strongly recommended.")
chat_mdl = LLMBundle(tenant["id"], LLMType.CHAT, tenant["llm_id"]) chat_mdl = LLMBundle(tenant["id"], LLMType.CHAT, tenant["llm_id"])
msg = chat_mdl.chat(system="", history=[ msg = chat_mdl.chat(system="", history=[

View file

@ -273,7 +273,7 @@ def delete_user_data(user_id: str) -> dict:
except Exception as e: except Exception as e:
logging.exception(e) logging.exception(e)
return {"success": False, "message": f"Error: {str(e)}. Already done:\n{done_msg}"} return {"success": False, "message": "An internal error occurred during user deletion. Some operations may have completed.","details": done_msg}
def delete_user_agents(user_id: str) -> dict: def delete_user_agents(user_id: str) -> dict:

View file

@ -13,7 +13,6 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
# #
import asyncio
import binascii import binascii
import logging import logging
import re import re
@ -426,6 +425,15 @@ async def async_chat(dialog, messages, stream=True, **kwargs):
attachments.extend(meta_filter(metas, filters["conditions"], filters.get("logic", "and"))) attachments.extend(meta_filter(metas, filters["conditions"], filters.get("logic", "and")))
if not attachments: if not attachments:
attachments = None attachments = None
elif dialog.meta_data_filter.get("method") == "semi_auto":
selected_keys = dialog.meta_data_filter.get("semi_auto", [])
if selected_keys:
filtered_metas = {key: metas[key] for key in selected_keys if key in metas}
if filtered_metas:
filters: dict = gen_meta_filter(chat_mdl, filtered_metas, questions[-1])
attachments.extend(meta_filter(metas, filters["conditions"], filters.get("logic", "and")))
if not attachments:
attachments = None
elif dialog.meta_data_filter.get("method") == "manual": elif dialog.meta_data_filter.get("method") == "manual":
conds = dialog.meta_data_filter["manual"] conds = dialog.meta_data_filter["manual"]
attachments.extend(meta_filter(metas, conds, dialog.meta_data_filter.get("logic", "and"))) attachments.extend(meta_filter(metas, conds, dialog.meta_data_filter.get("logic", "and")))
@ -835,6 +843,15 @@ async def async_ask(question, kb_ids, tenant_id, chat_llm_name=None, search_conf
doc_ids.extend(meta_filter(metas, filters["conditions"], filters.get("logic", "and"))) doc_ids.extend(meta_filter(metas, filters["conditions"], filters.get("logic", "and")))
if not doc_ids: if not doc_ids:
doc_ids = None doc_ids = None
elif meta_data_filter.get("method") == "semi_auto":
selected_keys = meta_data_filter.get("semi_auto", [])
if selected_keys:
filtered_metas = {key: metas[key] for key in selected_keys if key in metas}
if filtered_metas:
filters: dict = gen_meta_filter(chat_mdl, filtered_metas, question)
doc_ids.extend(meta_filter(metas, filters["conditions"], filters.get("logic", "and")))
if not doc_ids:
doc_ids = None
elif meta_data_filter.get("method") == "manual": elif meta_data_filter.get("method") == "manual":
doc_ids.extend(meta_filter(metas, meta_data_filter["manual"], meta_data_filter.get("logic", "and"))) doc_ids.extend(meta_filter(metas, meta_data_filter["manual"], meta_data_filter.get("logic", "and")))
if meta_data_filter["manual"] and not doc_ids: if meta_data_filter["manual"] and not doc_ids:
@ -887,7 +904,7 @@ async def async_ask(question, kb_ids, tenant_id, chat_llm_name=None, search_conf
yield decorate_answer(answer) yield decorate_answer(answer)
def gen_mindmap(question, kb_ids, tenant_id, search_config={}): async def gen_mindmap(question, kb_ids, tenant_id, search_config={}):
meta_data_filter = search_config.get("meta_data_filter", {}) meta_data_filter = search_config.get("meta_data_filter", {})
doc_ids = search_config.get("doc_ids", []) doc_ids = search_config.get("doc_ids", [])
rerank_id = search_config.get("rerank_id", "") rerank_id = search_config.get("rerank_id", "")
@ -910,6 +927,15 @@ def gen_mindmap(question, kb_ids, tenant_id, search_config={}):
doc_ids.extend(meta_filter(metas, filters["conditions"], filters.get("logic", "and"))) doc_ids.extend(meta_filter(metas, filters["conditions"], filters.get("logic", "and")))
if not doc_ids: if not doc_ids:
doc_ids = None doc_ids = None
elif meta_data_filter.get("method") == "semi_auto":
selected_keys = meta_data_filter.get("semi_auto", [])
if selected_keys:
filtered_metas = {key: metas[key] for key in selected_keys if key in metas}
if filtered_metas:
filters: dict = gen_meta_filter(chat_mdl, filtered_metas, question)
doc_ids.extend(meta_filter(metas, filters["conditions"], filters.get("logic", "and")))
if not doc_ids:
doc_ids = None
elif meta_data_filter.get("method") == "manual": elif meta_data_filter.get("method") == "manual":
doc_ids.extend(meta_filter(metas, meta_data_filter["manual"], meta_data_filter.get("logic", "and"))) doc_ids.extend(meta_filter(metas, meta_data_filter["manual"], meta_data_filter.get("logic", "and")))
if meta_data_filter["manual"] and not doc_ids: if meta_data_filter["manual"] and not doc_ids:
@ -931,5 +957,5 @@ def gen_mindmap(question, kb_ids, tenant_id, search_config={}):
rank_feature=label_question(question, kbs), rank_feature=label_question(question, kbs),
) )
mindmap = MindMapExtractor(chat_mdl) mindmap = MindMapExtractor(chat_mdl)
mind_map = asyncio.run(mindmap([c["content_with_weight"] for c in ranks["chunks"]])) mind_map = await mindmap([c["content_with_weight"] for c in ranks["chunks"]])
return mind_map.output return mind_map.output

View file

@ -79,7 +79,7 @@ class DocumentService(CommonService):
@classmethod @classmethod
@DB.connection_context() @DB.connection_context()
def get_list(cls, kb_id, page_number, items_per_page, def get_list(cls, kb_id, page_number, items_per_page,
orderby, desc, keywords, id, name, suffix=None, run = None): orderby, desc, keywords, id, name, suffix=None, run = None, doc_ids=None):
fields = cls.get_cls_model_fields() fields = cls.get_cls_model_fields()
docs = cls.model.select(*[*fields, UserCanvas.title]).join(File2Document, on = (File2Document.document_id == cls.model.id))\ docs = cls.model.select(*[*fields, UserCanvas.title]).join(File2Document, on = (File2Document.document_id == cls.model.id))\
.join(File, on = (File.id == File2Document.file_id))\ .join(File, on = (File.id == File2Document.file_id))\
@ -96,6 +96,8 @@ class DocumentService(CommonService):
docs = docs.where( docs = docs.where(
fn.LOWER(cls.model.name).contains(keywords.lower()) fn.LOWER(cls.model.name).contains(keywords.lower())
) )
if doc_ids:
docs = docs.where(cls.model.id.in_(doc_ids))
if suffix: if suffix:
docs = docs.where(cls.model.suffix.in_(suffix)) docs = docs.where(cls.model.suffix.in_(suffix))
if run: if run:
@ -123,7 +125,7 @@ class DocumentService(CommonService):
@classmethod @classmethod
@DB.connection_context() @DB.connection_context()
def get_by_kb_id(cls, kb_id, page_number, items_per_page, def get_by_kb_id(cls, kb_id, page_number, items_per_page,
orderby, desc, keywords, run_status, types, suffix): orderby, desc, keywords, run_status, types, suffix, doc_ids=None):
fields = cls.get_cls_model_fields() fields = cls.get_cls_model_fields()
if keywords: if keywords:
docs = cls.model.select(*[*fields, UserCanvas.title.alias("pipeline_name"), User.nickname])\ docs = cls.model.select(*[*fields, UserCanvas.title.alias("pipeline_name"), User.nickname])\
@ -143,6 +145,8 @@ class DocumentService(CommonService):
.join(User, on=(cls.model.created_by == User.id), join_type=JOIN.LEFT_OUTER)\ .join(User, on=(cls.model.created_by == User.id), join_type=JOIN.LEFT_OUTER)\
.where(cls.model.kb_id == kb_id) .where(cls.model.kb_id == kb_id)
if doc_ids:
docs = docs.where(cls.model.id.in_(doc_ids))
if run_status: if run_status:
docs = docs.where(cls.model.run.in_(run_status)) docs = docs.where(cls.model.run.in_(run_status))
if types: if types:
@ -644,6 +648,13 @@ class DocumentService(CommonService):
@classmethod @classmethod
@DB.connection_context() @DB.connection_context()
def get_meta_by_kbs(cls, kb_ids): def get_meta_by_kbs(cls, kb_ids):
"""
Legacy metadata aggregator (backward-compatible).
- Does NOT expand list values and a list is kept as one string key.
Example: {"tags": ["foo","bar"]} -> meta["tags"]["['foo', 'bar']"] = [doc_id]
- Expects meta_fields is a dict.
Use when existing callers rely on the old list-as-string semantics.
"""
fields = [ fields = [
cls.model.id, cls.model.id,
cls.model.meta_fields, cls.model.meta_fields,
@ -660,6 +671,162 @@ class DocumentService(CommonService):
meta[k][v].append(doc_id) meta[k][v].append(doc_id)
return meta return meta
@classmethod
@DB.connection_context()
def get_flatted_meta_by_kbs(cls, kb_ids):
"""
- Parses stringified JSON meta_fields when possible and skips non-dict or unparsable values.
- Expands list values into individual entries.
Example: {"tags": ["foo","bar"], "author": "alice"} ->
meta["tags"]["foo"] = [doc_id], meta["tags"]["bar"] = [doc_id], meta["author"]["alice"] = [doc_id]
Prefer for metadata_condition filtering and scenarios that must respect list semantics.
"""
fields = [
cls.model.id,
cls.model.meta_fields,
]
meta = {}
for r in cls.model.select(*fields).where(cls.model.kb_id.in_(kb_ids)):
doc_id = r.id
meta_fields = r.meta_fields or {}
if isinstance(meta_fields, str):
try:
meta_fields = json.loads(meta_fields)
except Exception:
continue
if not isinstance(meta_fields, dict):
continue
for k, v in meta_fields.items():
if k not in meta:
meta[k] = {}
values = v if isinstance(v, list) else [v]
for vv in values:
if vv is None:
continue
sv = str(vv)
if sv not in meta[k]:
meta[k][sv] = []
meta[k][sv].append(doc_id)
return meta
@classmethod
@DB.connection_context()
def get_metadata_summary(cls, kb_id):
fields = [cls.model.id, cls.model.meta_fields]
summary = {}
for r in cls.model.select(*fields).where(cls.model.kb_id == kb_id):
meta_fields = r.meta_fields or {}
if isinstance(meta_fields, str):
try:
meta_fields = json.loads(meta_fields)
except Exception:
continue
if not isinstance(meta_fields, dict):
continue
for k, v in meta_fields.items():
values = v if isinstance(v, list) else [v]
for vv in values:
if not vv:
continue
sv = str(vv)
if k not in summary:
summary[k] = {}
summary[k][sv] = summary[k].get(sv, 0) + 1
return {k: sorted([(val, cnt) for val, cnt in v.items()], key=lambda x: x[1], reverse=True) for k, v in summary.items()}
@classmethod
@DB.connection_context()
def batch_update_metadata(cls, kb_id, doc_ids, updates=None, deletes=None):
updates = updates or []
deletes = deletes or []
if not doc_ids:
return 0
def _normalize_meta(meta):
if isinstance(meta, str):
try:
meta = json.loads(meta)
except Exception:
return {}
if not isinstance(meta, dict):
return {}
return deepcopy(meta)
def _str_equal(a, b):
return str(a) == str(b)
def _apply_updates(meta):
changed = False
for upd in updates:
key = upd.get("key")
if not key or key not in meta:
continue
new_value = upd.get("value")
match_value = upd.get("match", new_value)
if isinstance(meta[key], list):
replaced = False
new_list = []
for item in meta[key]:
if match_value and _str_equal(item, match_value):
new_list.append(new_value)
replaced = True
else:
new_list.append(item)
if replaced:
meta[key] = new_list
changed = True
else:
if not match_value:
continue
if _str_equal(meta[key], match_value):
meta[key] = new_value
changed = True
return changed
def _apply_deletes(meta):
changed = False
for d in deletes:
key = d.get("key")
if not key or key not in meta:
continue
value = d.get("value", None)
if isinstance(meta[key], list):
if value is None:
del meta[key]
changed = True
continue
new_list = [item for item in meta[key] if not _str_equal(item, value)]
if len(new_list) != len(meta[key]):
if new_list:
meta[key] = new_list
else:
del meta[key]
changed = True
else:
if value is None or _str_equal(meta[key], value):
del meta[key]
changed = True
return changed
updated_docs = 0
with DB.atomic():
rows = cls.model.select(cls.model.id, cls.model.meta_fields).where(
(cls.model.id.in_(doc_ids)) & (cls.model.kb_id == kb_id)
)
for r in rows:
meta = _normalize_meta(r.meta_fields or {})
original_meta = deepcopy(meta)
changed = _apply_updates(meta)
changed = _apply_deletes(meta) or changed
if changed and meta != original_meta:
cls.model.update(
meta_fields=meta,
update_time=current_timestamp(),
update_date=get_format_time()
).where(cls.model.id == r.id).execute()
updated_docs += 1
return updated_docs
@classmethod @classmethod
@DB.connection_context() @DB.connection_context()
def update_progress(cls): def update_progress(cls):

View file

@ -109,7 +109,7 @@ class LLMBundle(LLM4Tenant):
llm_name = getattr(self, "llm_name", None) llm_name = getattr(self, "llm_name", None)
if not TenantLLMService.increase_usage(self.tenant_id, self.llm_type, used_tokens, llm_name): if not TenantLLMService.increase_usage(self.tenant_id, self.llm_type, used_tokens, llm_name):
logging.error("LLMBundle.encode can't update token usage for {}/EMBEDDING used_tokens: {}".format(self.tenant_id, used_tokens)) logging.error("LLMBundle.encode can't update token usage for <tenant redacted>/EMBEDDING used_tokens: {}".format(used_tokens))
if self.langfuse: if self.langfuse:
generation.update(usage_details={"total_tokens": used_tokens}) generation.update(usage_details={"total_tokens": used_tokens})
@ -124,7 +124,7 @@ class LLMBundle(LLM4Tenant):
emd, used_tokens = self.mdl.encode_queries(query) emd, used_tokens = self.mdl.encode_queries(query)
llm_name = getattr(self, "llm_name", None) llm_name = getattr(self, "llm_name", None)
if not TenantLLMService.increase_usage(self.tenant_id, self.llm_type, used_tokens, llm_name): if not TenantLLMService.increase_usage(self.tenant_id, self.llm_type, used_tokens, llm_name):
logging.error("LLMBundle.encode_queries can't update token usage for {}/EMBEDDING used_tokens: {}".format(self.tenant_id, used_tokens)) logging.error("LLMBundle.encode_queries can't update token usage for <tenant redacted>/EMBEDDING used_tokens: {}".format(used_tokens))
if self.langfuse: if self.langfuse:
generation.update(usage_details={"total_tokens": used_tokens}) generation.update(usage_details={"total_tokens": used_tokens})

View file

@ -1110,7 +1110,10 @@ def _make_attachment_link(
) -> str | None: ) -> str | None:
download_link = "" download_link = ""
if "api.atlassian.com" in confluence_client.url: from urllib.parse import urlparse
netloc =urlparse(confluence_client.url).hostname
if netloc == "api.atlassian.com" or (netloc and netloc.endswith(".api.atlassian.com")):
# if "api.atlassian.com" in confluence_client.url:
# https://developer.atlassian.com/cloud/confluence/rest/v1/api-group-content---attachments/#api-wiki-rest-api-content-id-child-attachment-attachmentid-download-get # https://developer.atlassian.com/cloud/confluence/rest/v1/api-group-content---attachments/#api-wiki-rest-api-content-id-child-attachment-attachmentid-download-get
if not parent_content_id: if not parent_content_id:
logging.warning( logging.warning(

View file

@ -135,7 +135,7 @@ class JiraConnector(CheckpointedConnectorWithPermSync, SlimConnectorWithPermSync
except ValueError as exc: except ValueError as exc:
raise ConnectorValidationError(str(exc)) from exc raise ConnectorValidationError(str(exc)) from exc
else: else:
logger.warning(f"[Jira] Scoped token requested but Jira base URL {self.jira_base_url} does not appear to be an Atlassian Cloud domain; scoped token ignored.") logger.warning("[Jira] Scoped token requested but Jira base URL does not appear to be an Atlassian Cloud domain; scoped token ignored.")
user_email = credentials.get("jira_user_email") or credentials.get("username") user_email = credentials.get("jira_user_email") or credentials.get("username")
api_token = credentials.get("jira_api_token") or credentials.get("token") or credentials.get("api_token") api_token = credentials.get("jira_api_token") or credentials.get("token") or credentials.get("api_token")
@ -245,7 +245,7 @@ class JiraConnector(CheckpointedConnectorWithPermSync, SlimConnectorWithPermSync
while True: while True:
attempt += 1 attempt += 1
jql = self._build_jql(attempt_start, end) jql = self._build_jql(attempt_start, end)
logger.info(f"[Jira] Executing Jira JQL attempt {attempt} (start={attempt_start}, end={end}, buffered_retry={retried_with_buffer}): {jql}") logger.info(f"[Jira] Executing Jira JQL attempt {attempt} (start={attempt_start}, end={end}, buffered_retry={retried_with_buffer})")
try: try:
return (yield from self._load_from_checkpoint_internal(jql, checkpoint, start_filter=start)) return (yield from self._load_from_checkpoint_internal(jql, checkpoint, start_filter=start))
except Exception as exc: except Exception as exc:
@ -927,9 +927,6 @@ def main(config: dict[str, Any] | None = None) -> None:
base_url = config.get("base_url") base_url = config.get("base_url")
credentials = config.get("credentials", {}) credentials = config.get("credentials", {})
print(f"[Jira] {config=}", flush=True)
print(f"[Jira] {credentials=}", flush=True)
if not base_url: if not base_url:
raise RuntimeError("Jira base URL must be provided via config or CLI arguments.") raise RuntimeError("Jira base URL must be provided via config or CLI arguments.")
if not (credentials.get("jira_api_token") or (credentials.get("jira_user_email") and credentials.get("jira_password"))): if not (credentials.get("jira_api_token") or (credentials.get("jira_user_email") and credentials.get("jira_password"))):

View file

@ -16,7 +16,9 @@ import logging
import os import os
import time import time
from typing import Any, Dict, Optional from typing import Any, Dict, Optional
from urllib.parse import parse_qsl, urlencode, urlparse, urlunparse
from common import settings
import httpx import httpx
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@ -52,6 +54,55 @@ def _get_delay(backoff_factor: float, attempt: int) -> float:
return backoff_factor * (2**attempt) return backoff_factor * (2**attempt)
# List of sensitive parameters to redact from URLs before logging
_SENSITIVE_QUERY_KEYS = {"client_secret", "secret", "code", "access_token", "refresh_token", "password", "token", "app_secret"}
def _redact_sensitive_url_params(url: str) -> str:
try:
parsed = urlparse(url)
if not parsed.query:
return url
clean_query = []
for k, v in parse_qsl(parsed.query, keep_blank_values=True):
if k.lower() in _SENSITIVE_QUERY_KEYS:
clean_query.append((k, "***REDACTED***"))
else:
clean_query.append((k, v))
new_query = urlencode(clean_query, doseq=True)
redacted_url = urlunparse(parsed._replace(query=new_query))
return redacted_url
except Exception:
return url
def _is_sensitive_url(url: str) -> bool:
"""Return True if URL is one of the configured OAuth endpoints."""
# Collect known sensitive endpoint URLs from settings
oauth_urls = set()
# GitHub OAuth endpoints
try:
if settings.GITHUB_OAUTH is not None:
url_val = settings.GITHUB_OAUTH.get("url")
if url_val:
oauth_urls.add(url_val)
except Exception:
pass
# Feishu OAuth endpoints
try:
if settings.FEISHU_OAUTH is not None:
for k in ("app_access_token_url", "user_access_token_url"):
url_val = settings.FEISHU_OAUTH.get(k)
if url_val:
oauth_urls.add(url_val)
except Exception:
pass
# Defensive normalization: compare only scheme+netloc+path
url_obj = urlparse(url)
for sensitive_url in oauth_urls:
sensitive_obj = urlparse(sensitive_url)
if (url_obj.scheme, url_obj.netloc, url_obj.path) == (sensitive_obj.scheme, sensitive_obj.netloc, sensitive_obj.path):
return True
return False
async def async_request( async def async_request(
method: str, method: str,
url: str, url: str,
@ -93,20 +144,23 @@ async def async_request(
method=method, url=url, headers=headers, **kwargs method=method, url=url, headers=headers, **kwargs
) )
duration = time.monotonic() - start duration = time.monotonic() - start
log_url = "<SENSITIVE ENDPOINT>" if _is_sensitive_url else _redact_sensitive_url_params(url)
logger.debug( logger.debug(
f"async_request {method} {url} -> {response.status_code} in {duration:.3f}s" f"async_request {method} {log_url} -> {response.status_code} in {duration:.3f}s"
) )
return response return response
except httpx.RequestError as exc: except httpx.RequestError as exc:
last_exc = exc last_exc = exc
if attempt >= retries: if attempt >= retries:
log_url = "<SENSITIVE ENDPOINT>" if _is_sensitive_url else _redact_sensitive_url_params(url)
logger.warning( logger.warning(
f"async_request exhausted retries for {method} {url}: {exc}" f"async_request exhausted retries for {method} {log_url}"
) )
raise raise
delay = _get_delay(backoff_factor, attempt) delay = _get_delay(backoff_factor, attempt)
log_url = "<SENSITIVE ENDPOINT>" if _is_sensitive_url else _redact_sensitive_url_params(url)
logger.warning( logger.warning(
f"async_request attempt {attempt + 1}/{retries + 1} failed for {method} {url}: {exc}; retrying in {delay:.2f}s" f"async_request attempt {attempt + 1}/{retries + 1} failed for {method} {log_url}; retrying in {delay:.2f}s"
) )
await asyncio.sleep(delay) await asyncio.sleep(delay)
raise last_exc # pragma: no cover raise last_exc # pragma: no cover

View file

@ -369,6 +369,13 @@
"model_type": "chat", "model_type": "chat",
"is_tools": true "is_tools": true
}, },
{
"llm_name": "deepseek-v3.2",
"tags": "LLM,CHAT,128K",
"max_tokens": 128000,
"model_type": "chat",
"is_tools": true
},
{ {
"llm_name": "deepseek-r1", "llm_name": "deepseek-r1",
"tags": "LLM,CHAT,64K", "tags": "LLM,CHAT,64K",

View file

@ -106,7 +106,7 @@ class MinerUParser(RAGFlowPdfParser):
def check_installation(self, backend: str = "pipeline", server_url: Optional[str] = None) -> tuple[bool, str]: def check_installation(self, backend: str = "pipeline", server_url: Optional[str] = None) -> tuple[bool, str]:
reason = "" reason = ""
valid_backends = ["pipeline", "vlm-http-client", "vlm-transformers", "vlm-vllm-engine"] valid_backends = ["pipeline", "vlm-http-client", "vlm-transformers", "vlm-vllm-engine", "vlm-mlx-engine"]
if backend not in valid_backends: if backend not in valid_backends:
reason = "[MinerU] Invalid backend '{backend}'. Valid backends are: {valid_backends}" reason = "[MinerU] Invalid backend '{backend}'. Valid backends are: {valid_backends}"
self.logger.warning(reason) self.logger.warning(reason)

View file

@ -1477,7 +1477,7 @@ Failure:
### List documents ### List documents
**GET** `/api/v1/datasets/{dataset_id}/documents?page={page}&page_size={page_size}&orderby={orderby}&desc={desc}&keywords={keywords}&id={document_id}&name={document_name}&create_time_from={timestamp}&create_time_to={timestamp}&suffix={file_suffix}&run={run_status}` **GET** `/api/v1/datasets/{dataset_id}/documents?page={page}&page_size={page_size}&orderby={orderby}&desc={desc}&keywords={keywords}&id={document_id}&name={document_name}&create_time_from={timestamp}&create_time_to={timestamp}&suffix={file_suffix}&run={run_status}&metadata_condition={json}`
Lists documents in a specified dataset. Lists documents in a specified dataset.
@ -1492,6 +1492,7 @@ Lists documents in a specified dataset.
##### Request examples ##### Request examples
**A basic request with pagination:** **A basic request with pagination:**
```bash ```bash
curl --request GET \ curl --request GET \
--url http://{address}/api/v1/datasets/{dataset_id}/documents?page=1&page_size=10 \ --url http://{address}/api/v1/datasets/{dataset_id}/documents?page=1&page_size=10 \
@ -1534,6 +1535,11 @@ curl --request GET \
- `3` / `DONE`: Document processing completed successfully - `3` / `DONE`: Document processing completed successfully
- `4` / `FAIL`: Document processing failed - `4` / `FAIL`: Document processing failed
Defaults to all statuses. Defaults to all statuses.
- `metadata_condition`: (*Filter parameter*), `object` (JSON in query)
Optional metadata filter applied to documents when `document_ids` is not provided. Uses the same structure as retrieval:
- `logic`: `"and"` (default) or `"or"`
- `conditions`: array of `{ "name": string, "comparison_operator": string, "value": string }`
- `comparison_operator` supports: `is`, `not is`, `contains`, `not contains`, `in`, `not in`, `start with`, `end with`, `>`, `<`, `≥`, `≤`, `empty`, `not empty`
##### Usage examples ##### Usage examples
@ -1545,6 +1551,15 @@ curl --request GET \
--header 'Authorization: Bearer <YOUR_API_KEY>' --header 'Authorization: Bearer <YOUR_API_KEY>'
``` ```
**Filter by metadata (query JSON):**
```bash
curl -G \
--url "http://localhost:9222/api/v1/datasets/{{KB_ID}}/documents" \
--header 'Authorization: Bearer <YOUR_API_KEY>' \
--data-urlencode 'metadata_condition={"logic":"and","conditions":[{"name":"tags","comparison_operator":"is","value":"bar"},{"name":"author","comparison_operator":"is","value":"alice"}]}'
```
#### Response #### Response
Success: Success:
@ -2088,6 +2103,108 @@ Failure:
--- ---
### Dataset metadata summary
**GET** `/api/v1/datasets/{dataset_id}/metadata/summary`
Aggregates metadata values across all documents in a dataset.
#### Request
- Method: GET
- URL: `/api/v1/datasets/{dataset_id}/metadata/summary`
- Headers:
- `'Authorization: Bearer <YOUR_API_KEY>'`
##### Response
Success:
```json
{
"code": 0,
"data": {
"summary": {
"tags": [["bar", 2], ["foo", 1], ["baz", 1]],
"author": [["alice", 2], ["bob", 1]]
}
}
}
```
---
### Dataset metadata update
**POST** `/api/v1/datasets/{dataset_id}/metadata/update`
Batch update or delete document-level metadata in a dataset. If both `document_ids` and `metadata_condition` are omitted, all documents in the dataset are selected. When both are provided, the intersection is used.
#### Request
- Method: POST
- URL: `/api/v1/datasets/{dataset_id}/metadata/update`
- Headers:
- `'content-Type: application/json'`
- `'Authorization: Bearer <YOUR_API_KEY>'`
- Body:
- `selector`: `object`, optional
- `document_ids`: `list[string]`, optional
- `metadata_condition`: `object`, optional
- `logic`: `"and"` (default) or `"or"`
- `conditions`: array of `{ "name": string, "comparison_operator": string, "value": string }`
- `comparison_operator` supports: `is`, `not is`, `contains`, `not contains`, `in`, `not in`, `start with`, `end with`, `>`, `<`, `≥`, `≤`, `empty`, `not empty`
- `updates`: `array`, optional
- items: `{ "key": string, "value": any, "match": any (optional) }`
- For lists: replace elements equal to `match` (or `value` when `match` omitted) with `value`.
- For scalars: replace when current value equals `match` (or `value` when `match` omitted).
- `deletes`: `array`, optional
- items: `{ "key": string, "value": any (optional) }`
- For lists: remove elements equal to `value`; if list becomes empty, remove the key.
- For scalars: remove the key when `value` matches or when `value` is omitted.
##### Request example
```bash
curl --request POST \
--url http://{address}/api/v1/datasets/{dataset_id}/metadata/update \
--header 'Content-Type: application/json' \
--header 'Authorization: Bearer <YOUR_API_KEY>' \
--data '{
"selector": {
"metadata_condition": {
"logic": "and",
"conditions": [
{"name": "author", "comparison_operator": "is", "value": "alice"}
]
}
},
"updates": [
{"key": "tags", "match": "foo", "value": "foo_new"}
],
"deletes": [
{"key": "obsolete_key"},
{"key": "author", "value": "alice"}
]
}'
```
##### Response
Success:
```json
{
"code": 0,
"data": {
"updated": 1,
"matched_docs": 2
}
}
```
---
### Retrieve chunks ### Retrieve chunks
**POST** `/api/v1/retrieval` **POST** `/api/v1/retrieval`
@ -2117,6 +2234,7 @@ Retrieves chunks from specified datasets.
- `"metadata_condition"`: `object` - `"metadata_condition"`: `object`
- `"use_kg"`: `boolean` - `"use_kg"`: `boolean`
- `"toc_enhance"`: `boolean` - `"toc_enhance"`: `boolean`
##### Request example ##### Request example
```bash ```bash
@ -2189,7 +2307,7 @@ curl --request POST \
- `"conditions"`: (*Body parameter*), `array` - `"conditions"`: (*Body parameter*), `array`
A list of metadata filter conditions. A list of metadata filter conditions.
- `"name"`: `string` - The metadata field name to filter by, e.g., `"author"`, `"company"`, `"url"`. Ensure this parameter before use. See [Set metadata](../guides/dataset/set_metadata.md) for details. - `"name"`: `string` - The metadata field name to filter by, e.g., `"author"`, `"company"`, `"url"`. Ensure this parameter before use. See [Set metadata](../guides/dataset/set_metadata.md) for details.
- `comparison_operator`: `string` - The comparison operator. Can be one of: - `comparison_operator`: `string` - The comparison operator. Can be one of:
- `"contains"` - `"contains"`
- `"not contains"` - `"not contains"`
- `"start with"` - `"start with"`
@ -2203,7 +2321,6 @@ curl --request POST \
- `"≤"` - `"≤"`
- `"value"`: `string` - The value to compare. - `"value"`: `string` - The value to compare.
#### Response #### Response
Success: Success:
@ -4450,7 +4567,9 @@ Failure:
--- ---
### System ### System
--- ---
### Check system health ### Check system health
**GET** `/v1/system/healthz` **GET** `/v1/system/healthz`
@ -4519,6 +4638,7 @@ Content-Type: application/json
``` ```
Explanation: Explanation:
- Each service is reported as "ok" or "nok". - Each service is reported as "ok" or "nok".
- The top-level `status` reflects overall health. - The top-level `status` reflects overall health.
- If any service is "nok", detailed error info appears in `_meta`. - If any service is "nok", detailed error info appears in `_meta`.

View file

@ -13,7 +13,6 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
# #
import asyncio
import json import json
import logging import logging
from collections import defaultdict from collections import defaultdict
@ -44,7 +43,7 @@ class KGSearch(Dealer):
return response return response
def query_rewrite(self, llm, question, idxnms, kb_ids): def query_rewrite(self, llm, question, idxnms, kb_ids):
ty2ents = asyncio.run(get_entity_type2samples(idxnms, kb_ids)) ty2ents = get_entity_type2samples(idxnms, kb_ids)
hint_prompt = PROMPTS["minirag_query2kwd"].format(query=question, hint_prompt = PROMPTS["minirag_query2kwd"].format(query=question,
TYPE_POOL=json.dumps(ty2ents, ensure_ascii=False, indent=2)) TYPE_POOL=json.dumps(ty2ents, ensure_ascii=False, indent=2))
result = self._chat(llm, hint_prompt, [{"role": "user", "content": "Output:"}], {}) result = self._chat(llm, hint_prompt, [{"role": "user", "content": "Output:"}], {})

View file

@ -626,8 +626,8 @@ def merge_tuples(list1, list2):
return result return result
async def get_entity_type2samples(idxnms, kb_ids: list): def get_entity_type2samples(idxnms, kb_ids: list):
es_res = await asyncio.to_thread(settings.retriever.search,{"knowledge_graph_kwd": "ty2ents", "kb_id": kb_ids, "size": 10000, "fields": ["content_with_weight"]},idxnms,kb_ids) es_res = settings.retriever.search({"knowledge_graph_kwd": "ty2ents", "kb_id": kb_ids, "size": 10000, "fields": ["content_with_weight"]},idxnms,kb_ids)
res = defaultdict(list) res = defaultdict(list)
for id in es_res.ids: for id in es_res.ids:

View file

@ -714,4 +714,4 @@ async def main():
if __name__ == "__main__": if __name__ == "__main__":
faulthandler.enable() faulthandler.enable()
init_root_logger(CONSUMER_NAME) init_root_logger(CONSUMER_NAME)
asyncio.run(main) asyncio.run(main())

View file

@ -17,13 +17,16 @@ import json
import logging import logging
import os import os
import re import re
import threading
import time import time
from typing import Any, Optional from typing import Any, Optional
import numpy as np
from elasticsearch_dsl import Q, Search from elasticsearch_dsl import Q, Search
from pydantic import BaseModel from pydantic import BaseModel
from pymysql.converters import escape_string from pymysql.converters import escape_string
from pyobvector import ObVecClient, FtsIndexParam, FtsParser, ARRAY, VECTOR from pyobvector import ObVecClient, FtsIndexParam, FtsParser, ARRAY, VECTOR
from pyobvector.client import ClusterVersionException
from pyobvector.client.hybrid_search import HybridSearch from pyobvector.client.hybrid_search import HybridSearch
from pyobvector.util import ObVersion from pyobvector.util import ObVersion
from sqlalchemy import text, Column, String, Integer, JSON, Double, Row, Table from sqlalchemy import text, Column, String, Integer, JSON, Double, Row, Table
@ -106,17 +109,6 @@ index_columns: list[str] = [
"removed_kwd", "removed_kwd",
] ]
fulltext_search_columns: list[str] = [
"docnm_kwd",
"content_with_weight",
"title_tks",
"title_sm_tks",
"important_tks",
"question_tks",
"content_ltks",
"content_sm_ltks"
]
fts_columns_origin: list[str] = [ fts_columns_origin: list[str] = [
"docnm_kwd^10", "docnm_kwd^10",
"content_with_weight", "content_with_weight",
@ -138,7 +130,7 @@ fulltext_index_name_template = "fts_idx_%s"
# MATCH AGAINST: https://www.oceanbase.com/docs/common-oceanbase-database-cn-1000000002017607 # MATCH AGAINST: https://www.oceanbase.com/docs/common-oceanbase-database-cn-1000000002017607
fulltext_search_template = "MATCH (%s) AGAINST ('%s' IN NATURAL LANGUAGE MODE)" fulltext_search_template = "MATCH (%s) AGAINST ('%s' IN NATURAL LANGUAGE MODE)"
# cosine_distance: https://www.oceanbase.com/docs/common-oceanbase-database-cn-1000000002012938 # cosine_distance: https://www.oceanbase.com/docs/common-oceanbase-database-cn-1000000002012938
vector_search_template = "cosine_distance(%s, %s)" vector_search_template = "cosine_distance(%s, '%s')"
class SearchResult(BaseModel): class SearchResult(BaseModel):
@ -362,18 +354,28 @@ class OBConnection(DocStoreConnection):
port = mysql_config.get("port", 2881) port = mysql_config.get("port", 2881)
self.username = mysql_config.get("user", "root@test") self.username = mysql_config.get("user", "root@test")
self.password = mysql_config.get("password", "infini_rag_flow") self.password = mysql_config.get("password", "infini_rag_flow")
max_connections = mysql_config.get("max_connections", 300)
else: else:
logger.info("Use customized config to create OceanBase connection.") logger.info("Use customized config to create OceanBase connection.")
host = ob_config.get("host", "localhost") host = ob_config.get("host", "localhost")
port = ob_config.get("port", 2881) port = ob_config.get("port", 2881)
self.username = ob_config.get("user", "root@test") self.username = ob_config.get("user", "root@test")
self.password = ob_config.get("password", "infini_rag_flow") self.password = ob_config.get("password", "infini_rag_flow")
max_connections = ob_config.get("max_connections", 300)
self.db_name = ob_config.get("db_name", "test") self.db_name = ob_config.get("db_name", "test")
self.uri = f"{host}:{port}" self.uri = f"{host}:{port}"
logger.info(f"Use OceanBase '{self.uri}' as the doc engine.") logger.info(f"Use OceanBase '{self.uri}' as the doc engine.")
# Set the maximum number of connections that can be created above the pool_size.
# By default, this is half of max_connections, but at least 10.
# This allows the pool to handle temporary spikes in demand without exhausting resources.
max_overflow = int(os.environ.get("OB_MAX_OVERFLOW", max(max_connections // 2, 10)))
# Set the number of seconds to wait before giving up when trying to get a connection from the pool.
# Default is 30 seconds, but can be overridden with the OB_POOL_TIMEOUT environment variable.
pool_timeout = int(os.environ.get("OB_POOL_TIMEOUT", "30"))
for _ in range(ATTEMPT_TIME): for _ in range(ATTEMPT_TIME):
try: try:
self.client = ObVecClient( self.client = ObVecClient(
@ -383,6 +385,9 @@ class OBConnection(DocStoreConnection):
db_name=self.db_name, db_name=self.db_name,
pool_pre_ping=True, pool_pre_ping=True,
pool_recycle=3600, pool_recycle=3600,
pool_size=max_connections,
max_overflow=max_overflow,
pool_timeout=pool_timeout,
) )
break break
except Exception as e: except Exception as e:
@ -398,6 +403,37 @@ class OBConnection(DocStoreConnection):
self._check_ob_version() self._check_ob_version()
self._try_to_update_ob_query_timeout() self._try_to_update_ob_query_timeout()
self.es = None
if self.enable_hybrid_search:
try:
self.es = HybridSearch(
uri=self.uri,
user=self.username,
password=self.password,
db_name=self.db_name,
pool_pre_ping=True,
pool_recycle=3600,
pool_size=max_connections,
max_overflow=max_overflow,
pool_timeout=pool_timeout,
)
logger.info("OceanBase Hybrid Search feature is enabled")
except ClusterVersionException as e:
logger.info("Failed to initialize HybridSearch client, fallback to use SQL", exc_info=e)
self.es = None
if self.es is not None and self.search_original_content:
logger.info("HybridSearch is enabled, forcing search_original_content to False")
self.search_original_content = False
# Determine which columns to use for full-text search dynamically:
# If HybridSearch is enabled (self.es is not None), we must use tokenized columns (fts_columns_tks)
# for compatibility and performance with HybridSearch. Otherwise, we use the original content columns
# (fts_columns_origin), which may be controlled by an environment variable.
self.fulltext_search_columns = fts_columns_origin if self.search_original_content else fts_columns_tks
self._table_exists_cache: set[str] = set()
self._table_exists_cache_lock = threading.RLock()
logger.info(f"OceanBase {self.uri} is healthy.") logger.info(f"OceanBase {self.uri} is healthy.")
def _check_ob_version(self): def _check_ob_version(self):
@ -417,18 +453,6 @@ class OBConnection(DocStoreConnection):
f"The version of OceanBase needs to be higher than or equal to 4.3.5.1, current version is {version_str}" f"The version of OceanBase needs to be higher than or equal to 4.3.5.1, current version is {version_str}"
) )
self.es = None
if not ob_version < ObVersion.from_db_version_nums(4, 4, 1, 0) and self.enable_hybrid_search:
self.es = HybridSearch(
uri=self.uri,
user=self.username,
password=self.password,
db_name=self.db_name,
pool_pre_ping=True,
pool_recycle=3600,
)
logger.info("OceanBase Hybrid Search feature is enabled")
def _try_to_update_ob_query_timeout(self): def _try_to_update_ob_query_timeout(self):
try: try:
val = self._get_variable_value("ob_query_timeout") val = self._get_variable_value("ob_query_timeout")
@ -455,9 +479,19 @@ class OBConnection(DocStoreConnection):
return os.getenv(var, default).lower() in ['true', '1', 'yes', 'y'] return os.getenv(var, default).lower() in ['true', '1', 'yes', 'y']
self.enable_fulltext_search = is_true('ENABLE_FULLTEXT_SEARCH', 'true') self.enable_fulltext_search = is_true('ENABLE_FULLTEXT_SEARCH', 'true')
logger.info(f"ENABLE_FULLTEXT_SEARCH={self.enable_fulltext_search}")
self.use_fulltext_hint = is_true('USE_FULLTEXT_HINT', 'true') self.use_fulltext_hint = is_true('USE_FULLTEXT_HINT', 'true')
logger.info(f"USE_FULLTEXT_HINT={self.use_fulltext_hint}")
self.search_original_content = is_true("SEARCH_ORIGINAL_CONTENT", 'true') self.search_original_content = is_true("SEARCH_ORIGINAL_CONTENT", 'true')
logger.info(f"SEARCH_ORIGINAL_CONTENT={self.search_original_content}")
self.enable_hybrid_search = is_true('ENABLE_HYBRID_SEARCH', 'false') self.enable_hybrid_search = is_true('ENABLE_HYBRID_SEARCH', 'false')
logger.info(f"ENABLE_HYBRID_SEARCH={self.enable_hybrid_search}")
self.use_fulltext_first_fusion_search = is_true('USE_FULLTEXT_FIRST_FUSION_SEARCH', 'true')
logger.info(f"USE_FULLTEXT_FIRST_FUSION_SEARCH={self.use_fulltext_first_fusion_search}")
""" """
Database operations Database operations
@ -478,6 +512,43 @@ class OBConnection(DocStoreConnection):
return row[1] return row[1]
raise Exception(f"Variable '{var_name}' not found.") raise Exception(f"Variable '{var_name}' not found.")
def _check_table_exists_cached(self, table_name: str) -> bool:
"""
Check table existence with cache to reduce INFORMATION_SCHEMA queries under high concurrency.
Only caches when table exists. Does not cache when table does not exist.
Thread-safe implementation: read operations are lock-free (GIL-protected),
write operations are protected by RLock to ensure cache consistency.
Args:
table_name: Table name
Returns:
Whether the table exists with all required indexes and columns
"""
if table_name in self._table_exists_cache:
return True
try:
if not self.client.check_table_exists(table_name):
return False
for column_name in index_columns:
if not self._index_exists(table_name, index_name_template % (table_name, column_name)):
return False
for fts_column in self.fulltext_search_columns:
column_name = fts_column.split("^")[0]
if not self._index_exists(table_name, fulltext_index_name_template % column_name):
return False
for column in [column_order_id, column_group_id]:
if not self._column_exist(table_name, column.name):
return False
except Exception as e:
raise Exception(f"OBConnection._check_table_exists_cached error: {str(e)}")
with self._table_exists_cache_lock:
if table_name not in self._table_exists_cache:
self._table_exists_cache.add(table_name)
return True
""" """
Table operations Table operations
""" """
@ -500,8 +571,7 @@ class OBConnection(DocStoreConnection):
process_func=lambda: self._add_index(indexName, column_name), process_func=lambda: self._add_index(indexName, column_name),
) )
fts_columns = fts_columns_origin if self.search_original_content else fts_columns_tks for fts_column in self.fulltext_search_columns:
for fts_column in fts_columns:
column_name = fts_column.split("^")[0] column_name = fts_column.split("^")[0]
_try_with_lock( _try_with_lock(
lock_name=f"ob_add_fulltext_idx_{indexName}_{column_name}", lock_name=f"ob_add_fulltext_idx_{indexName}_{column_name}",
@ -546,24 +616,7 @@ class OBConnection(DocStoreConnection):
raise Exception(f"OBConnection.deleteIndex error: {str(e)}") raise Exception(f"OBConnection.deleteIndex error: {str(e)}")
def indexExist(self, indexName: str, knowledgebaseId: str = None) -> bool: def indexExist(self, indexName: str, knowledgebaseId: str = None) -> bool:
try: return self._check_table_exists_cached(indexName)
if not self.client.check_table_exists(indexName):
return False
for column_name in index_columns:
if not self._index_exists(indexName, index_name_template % (indexName, column_name)):
return False
fts_columns = fts_columns_origin if self.search_original_content else fts_columns_tks
for fts_column in fts_columns:
column_name = fts_column.split("^")[0]
if not self._index_exists(indexName, fulltext_index_name_template % column_name):
return False
for column in [column_order_id, column_group_id]:
if not self._column_exist(indexName, column.name):
return False
except Exception as e:
raise Exception(f"OBConnection.indexExist error: {str(e)}")
return True
def _get_count(self, table_name: str, filter_list: list[str] = None) -> int: def _get_count(self, table_name: str, filter_list: list[str] = None) -> int:
where_clause = "WHERE " + " AND ".join(filter_list) if len(filter_list) > 0 else "" where_clause = "WHERE " + " AND ".join(filter_list) if len(filter_list) > 0 else ""
@ -853,10 +906,8 @@ class OBConnection(DocStoreConnection):
fulltext_query = escape_string(fulltext_query.strip()) fulltext_query = escape_string(fulltext_query.strip())
fulltext_topn = m.topn fulltext_topn = m.topn
fts_columns = fts_columns_origin if self.search_original_content else fts_columns_tks
# get fulltext match expression and weight values # get fulltext match expression and weight values
for field in fts_columns: for field in self.fulltext_search_columns:
parts = field.split("^") parts = field.split("^")
column_name: str = parts[0] column_name: str = parts[0]
column_weight: float = float(parts[1]) if (len(parts) > 1 and parts[1]) else 1.0 column_weight: float = float(parts[1]) if (len(parts) > 1 and parts[1]) else 1.0
@ -885,7 +936,8 @@ class OBConnection(DocStoreConnection):
fulltext_search_score_expr = f"({' + '.join(f'{expr} * {fulltext_search_weight.get(col, 0)}' for col, expr in fulltext_search_expr.items())})" fulltext_search_score_expr = f"({' + '.join(f'{expr} * {fulltext_search_weight.get(col, 0)}' for col, expr in fulltext_search_expr.items())})"
if vector_data: if vector_data:
vector_search_expr = vector_search_template % (vector_column_name, vector_data) vector_data_str = "[" + ",".join([str(np.float32(v)) for v in vector_data]) + "]"
vector_search_expr = vector_search_template % (vector_column_name, vector_data_str)
# use (1 - cosine_distance) as score, which should be [-1, 1] # use (1 - cosine_distance) as score, which should be [-1, 1]
# https://www.oceanbase.com/docs/common-oceanbase-database-standalone-1000000003577323 # https://www.oceanbase.com/docs/common-oceanbase-database-standalone-1000000003577323
vector_search_score_expr = f"(1 - {vector_search_expr})" vector_search_score_expr = f"(1 - {vector_search_expr})"
@ -910,11 +962,15 @@ class OBConnection(DocStoreConnection):
if search_type in ["fusion", "fulltext", "vector"] and "_score" not in output_fields: if search_type in ["fusion", "fulltext", "vector"] and "_score" not in output_fields:
output_fields.append("_score") output_fields.append("_score")
group_results = kwargs.get("group_results", False) if limit:
if vector_topn is not None:
limit = min(vector_topn, limit)
if fulltext_topn is not None:
limit = min(fulltext_topn, limit)
for index_name in indexNames: for index_name in indexNames:
if not self.client.check_table_exists(index_name): if not self._check_table_exists_cached(index_name):
continue continue
fulltext_search_hint = f"/*+ UNION_MERGE({index_name} {' '.join(fulltext_search_idx_list)}) */" if self.use_fulltext_hint else "" fulltext_search_hint = f"/*+ UNION_MERGE({index_name} {' '.join(fulltext_search_idx_list)}) */" if self.use_fulltext_hint else ""
@ -922,29 +978,7 @@ class OBConnection(DocStoreConnection):
if search_type == "fusion": if search_type == "fusion":
# fusion search, usually for chat # fusion search, usually for chat
num_candidates = vector_topn + fulltext_topn num_candidates = vector_topn + fulltext_topn
if group_results: if self.use_fulltext_first_fusion_search:
count_sql = (
f"WITH fulltext_results AS ("
f" SELECT {fulltext_search_hint} *, {fulltext_search_score_expr} AS relevance"
f" FROM {index_name}"
f" WHERE {filters_expr} AND {fulltext_search_filter}"
f" ORDER BY relevance DESC"
f" LIMIT {num_candidates}"
f"),"
f" scored_results AS ("
f" SELECT *"
f" FROM fulltext_results"
f" WHERE {vector_search_filter}"
f"),"
f" group_results AS ("
f" SELECT *, ROW_NUMBER() OVER (PARTITION BY group_id) as rn"
f" FROM scored_results"
f")"
f" SELECT COUNT(*)"
f" FROM group_results"
f" WHERE rn = 1"
)
else:
count_sql = ( count_sql = (
f"WITH fulltext_results AS (" f"WITH fulltext_results AS ("
f" SELECT {fulltext_search_hint} *, {fulltext_search_score_expr} AS relevance" f" SELECT {fulltext_search_hint} *, {fulltext_search_score_expr} AS relevance"
@ -955,6 +989,22 @@ class OBConnection(DocStoreConnection):
f")" f")"
f" SELECT COUNT(*) FROM fulltext_results WHERE {vector_search_filter}" f" SELECT COUNT(*) FROM fulltext_results WHERE {vector_search_filter}"
) )
else:
count_sql = (
f"WITH fulltext_results AS ("
f" SELECT {fulltext_search_hint} id FROM {index_name}"
f" WHERE {filters_expr} AND {fulltext_search_filter}"
f" ORDER BY {fulltext_search_score_expr}"
f" LIMIT {fulltext_topn}"
f"),"
f"vector_results AS ("
f" SELECT id FROM {index_name}"
f" WHERE {filters_expr} AND {vector_search_filter}"
f" ORDER BY {vector_search_expr}"
f" APPROXIMATE LIMIT {vector_topn}"
f")"
f" SELECT COUNT(*) FROM fulltext_results f FULL OUTER JOIN vector_results v ON f.id = v.id"
)
logger.debug("OBConnection.search with count sql: %s", count_sql) logger.debug("OBConnection.search with count sql: %s", count_sql)
start_time = time.time() start_time = time.time()
@ -976,32 +1026,8 @@ class OBConnection(DocStoreConnection):
if total_count == 0: if total_count == 0:
continue continue
score_expr = f"(relevance * {1 - vector_similarity_weight} + {vector_search_score_expr} * {vector_similarity_weight} + {pagerank_score_expr})" if self.use_fulltext_first_fusion_search:
if group_results: score_expr = f"(relevance * {1 - vector_similarity_weight} + {vector_search_score_expr} * {vector_similarity_weight} + {pagerank_score_expr})"
fusion_sql = (
f"WITH fulltext_results AS ("
f" SELECT {fulltext_search_hint} *, {fulltext_search_score_expr} AS relevance"
f" FROM {index_name}"
f" WHERE {filters_expr} AND {fulltext_search_filter}"
f" ORDER BY relevance DESC"
f" LIMIT {num_candidates}"
f"),"
f" scored_results AS ("
f" SELECT *, {score_expr} AS _score"
f" FROM fulltext_results"
f" WHERE {vector_search_filter}"
f"),"
f" group_results AS ("
f" SELECT *, ROW_NUMBER() OVER (PARTITION BY group_id ORDER BY _score DESC) as rn"
f" FROM scored_results"
f")"
f" SELECT {fields_expr}, _score"
f" FROM group_results"
f" WHERE rn = 1"
f" ORDER BY _score DESC"
f" LIMIT {offset}, {limit}"
)
else:
fusion_sql = ( fusion_sql = (
f"WITH fulltext_results AS (" f"WITH fulltext_results AS ("
f" SELECT {fulltext_search_hint} *, {fulltext_search_score_expr} AS relevance" f" SELECT {fulltext_search_hint} *, {fulltext_search_score_expr} AS relevance"
@ -1016,6 +1042,38 @@ class OBConnection(DocStoreConnection):
f" ORDER BY _score DESC" f" ORDER BY _score DESC"
f" LIMIT {offset}, {limit}" f" LIMIT {offset}, {limit}"
) )
else:
pagerank_score_expr = f"(CAST(IFNULL(f.{PAGERANK_FLD}, 0) AS DECIMAL(10, 2)) / 100)"
score_expr = f"(f.relevance * {1 - vector_similarity_weight} + v.similarity * {vector_similarity_weight} + {pagerank_score_expr})"
fields_expr = ", ".join([f"t.{f} as {f}" for f in output_fields if f != "_score"])
fusion_sql = (
f"WITH fulltext_results AS ("
f" SELECT {fulltext_search_hint} id, pagerank_fea, {fulltext_search_score_expr} AS relevance"
f" FROM {index_name}"
f" WHERE {filters_expr} AND {fulltext_search_filter}"
f" ORDER BY relevance DESC"
f" LIMIT {fulltext_topn}"
f"),"
f"vector_results AS ("
f" SELECT id, pagerank_fea, {vector_search_score_expr} AS similarity"
f" FROM {index_name}"
f" WHERE {filters_expr} AND {vector_search_filter}"
f" ORDER BY {vector_search_expr}"
f" APPROXIMATE LIMIT {vector_topn}"
f"),"
f"combined_results AS ("
f" SELECT COALESCE(f.id, v.id) AS id, {score_expr} AS score"
f" FROM fulltext_results f"
f" FULL OUTER JOIN vector_results v"
f" ON f.id = v.id"
f")"
f" SELECT {fields_expr}, c.score as _score"
f" FROM combined_results c"
f" JOIN {index_name} t"
f" ON c.id = t.id"
f" ORDER BY score DESC"
f" LIMIT {offset}, {limit}"
)
logger.debug("OBConnection.search with fusion sql: %s", fusion_sql) logger.debug("OBConnection.search with fusion sql: %s", fusion_sql)
start_time = time.time() start_time = time.time()
@ -1234,10 +1292,14 @@ class OBConnection(DocStoreConnection):
for row in rows: for row in rows:
result.chunks.append(self._row_to_entity(row, output_fields)) result.chunks.append(self._row_to_entity(row, output_fields))
if result.total == 0:
result.total = len(result.chunks)
return result return result
def get(self, chunkId: str, indexName: str, knowledgebaseIds: list[str]) -> dict | None: def get(self, chunkId: str, indexName: str, knowledgebaseIds: list[str]) -> dict | None:
if not self.client.check_table_exists(indexName): if not self._check_table_exists_cached(indexName):
return None return None
try: try:
@ -1336,7 +1398,7 @@ class OBConnection(DocStoreConnection):
return res return res
def update(self, condition: dict, newValue: dict, indexName: str, knowledgebaseId: str) -> bool: def update(self, condition: dict, newValue: dict, indexName: str, knowledgebaseId: str) -> bool:
if not self.client.check_table_exists(indexName): if not self._check_table_exists_cached(indexName):
return True return True
condition["kb_id"] = knowledgebaseId condition["kb_id"] = knowledgebaseId
@ -1387,7 +1449,7 @@ class OBConnection(DocStoreConnection):
return False return False
def delete(self, condition: dict, indexName: str, knowledgebaseId: str) -> int: def delete(self, condition: dict, indexName: str, knowledgebaseId: str) -> int:
if not self.client.check_table_exists(indexName): if not self._check_table_exists_cached(indexName):
return 0 return 0
condition["kb_id"] = knowledgebaseId condition["kb_id"] = knowledgebaseId

View file

@ -41,7 +41,9 @@ def get_opendal_config():
scheme = opendal_config.get("scheme") scheme = opendal_config.get("scheme")
config_data = opendal_config.get("config", {}) config_data = opendal_config.get("config", {})
kwargs = {"scheme": scheme, **config_data} kwargs = {"scheme": scheme, **config_data}
logging.info("Loaded OpenDAL configuration from yaml: %s", kwargs) safe_log_keys=['scheme', 'host', 'port', 'database', 'table']
loggable_kwargs = {k: v for k, v in kwargs.items() if k in safe_log_keys}
logging.info("Loaded OpenDAL configuration(non sensitive): %s", loggable_kwargs)
return kwargs return kwargs
except Exception as e: except Exception as e:
logging.error("Failed to load OpenDAL configuration from yaml: %s", str(e)) logging.error("Failed to load OpenDAL configuration from yaml: %s", str(e))

View file

@ -20,6 +20,7 @@ import { CirclePlus, HelpCircle, Info } from 'lucide-react';
import { useId, useState, type FC, type FormEvent } from 'react'; import { useId, useState, type FC, type FormEvent } from 'react';
import { useTranslation } from '../../hooks/use-translation'; import { useTranslation } from '../../hooks/use-translation';
import type { NewField, SchemaType } from '../../types/json-schema'; import type { NewField, SchemaType } from '../../types/json-schema';
import { KeyInputProps } from './interface';
import SchemaTypeSelector from './schema-type-selector'; import SchemaTypeSelector from './schema-type-selector';
interface AddFieldButtonProps { interface AddFieldButtonProps {
@ -27,9 +28,10 @@ interface AddFieldButtonProps {
variant?: 'primary' | 'secondary'; variant?: 'primary' | 'secondary';
} }
const AddFieldButton: FC<AddFieldButtonProps> = ({ const AddFieldButton: FC<AddFieldButtonProps & KeyInputProps> = ({
onAddField, onAddField,
variant = 'primary', variant = 'primary',
pattern,
}) => { }) => {
const [dialogOpen, setDialogOpen] = useState(false); const [dialogOpen, setDialogOpen] = useState(false);
const [fieldName, setFieldName] = useState(''); const [fieldName, setFieldName] = useState('');
@ -120,6 +122,7 @@ const AddFieldButton: FC<AddFieldButtonProps> = ({
placeholder={t.fieldNamePlaceholder} placeholder={t.fieldNamePlaceholder}
className="font-mono text-sm w-full" className="font-mono text-sm w-full"
required required
searchValue={pattern}
/> />
</div> </div>

View file

@ -0,0 +1,9 @@
import React, { useContext } from 'react';
import { KeyInputProps } from './interface';
export const KeyInputContext = React.createContext<KeyInputProps>({});
export function useInputPattern() {
const x = useContext(KeyInputContext);
return x.pattern;
}

View file

@ -0,0 +1 @@
export type KeyInputProps = { pattern?: RegExp | string };

View file

@ -16,6 +16,7 @@ import {
withObjectSchema, withObjectSchema,
} from '../../types/json-schema'; } from '../../types/json-schema';
import type { ValidationTreeNode } from '../../types/validation'; import type { ValidationTreeNode } from '../../types/validation';
import { useInputPattern } from './context';
import TypeDropdown from './type-dropdown'; import TypeDropdown from './type-dropdown';
import TypeEditor from './type-editor'; import TypeEditor from './type-editor';
@ -54,6 +55,8 @@ export const SchemaPropertyEditor: React.FC<SchemaPropertyEditorProps> = ({
'object' as SchemaType, 'object' as SchemaType,
); );
const pattern = useInputPattern();
// Update temp values when props change // Update temp values when props change
useEffect(() => { useEffect(() => {
setTempName(name); setTempName(name);
@ -123,6 +126,7 @@ export const SchemaPropertyEditor: React.FC<SchemaPropertyEditorProps> = ({
className="h-8 text-sm font-medium min-w-[120px] max-w-full z-10" className="h-8 text-sm font-medium min-w-[120px] max-w-full z-10"
autoFocus autoFocus
onFocus={(e) => e.target.select()} onFocus={(e) => e.target.select()}
searchValue={pattern}
/> />
) : ( ) : (
<button <button

View file

@ -8,6 +8,8 @@ import {
import type { JSONSchema, NewField } from '../../types/json-schema'; import type { JSONSchema, NewField } from '../../types/json-schema';
import { asObjectSchema, isBooleanSchema } from '../../types/json-schema'; import { asObjectSchema, isBooleanSchema } from '../../types/json-schema';
import AddFieldButton from './add-field-button'; import AddFieldButton from './add-field-button';
import { KeyInputContext } from './context';
import { KeyInputProps } from './interface';
import SchemaFieldList from './schema-field-list'; import SchemaFieldList from './schema-field-list';
/** @public */ /** @public */
@ -17,9 +19,10 @@ export interface SchemaVisualEditorProps {
} }
/** @public */ /** @public */
const SchemaVisualEditor: FC<SchemaVisualEditorProps> = ({ const SchemaVisualEditor: FC<SchemaVisualEditorProps & KeyInputProps> = ({
schema, schema,
onChange, onChange,
pattern,
}) => { }) => {
const t = useTranslation(); const t = useTranslation();
// Handle adding a top-level field // Handle adding a top-level field
@ -121,7 +124,7 @@ const SchemaVisualEditor: FC<SchemaVisualEditorProps> = ({
return ( return (
<div className="p-4 h-full flex flex-col overflow-auto jsonjoy"> <div className="p-4 h-full flex flex-col overflow-auto jsonjoy">
<div className="mb-6 shrink-0"> <div className="mb-6 shrink-0">
<AddFieldButton onAddField={handleAddField} /> <AddFieldButton onAddField={handleAddField} pattern={pattern} />
</div> </div>
<div className="grow overflow-auto"> <div className="grow overflow-auto">
@ -131,12 +134,14 @@ const SchemaVisualEditor: FC<SchemaVisualEditorProps> = ({
<p className="text-sm">{t.visualEditorNoFieldsHint2}</p> <p className="text-sm">{t.visualEditorNoFieldsHint2}</p>
</div> </div>
) : ( ) : (
<SchemaFieldList <KeyInputContext.Provider value={{ pattern }}>
schema={schema} <SchemaFieldList
onAddField={handleAddField} schema={schema}
onEditField={handleEditField} onAddField={handleAddField}
onDeleteField={handleDeleteField} onEditField={handleEditField}
/> onDeleteField={handleDeleteField}
/>
</KeyInputContext.Provider>
)} )}
</div> </div>
</div> </div>

View file

@ -5,6 +5,7 @@ import { z } from 'zod';
import { SelectWithSearch } from '../originui/select-with-search'; import { SelectWithSearch } from '../originui/select-with-search';
import { RAGFlowFormItem } from '../ragflow-form'; import { RAGFlowFormItem } from '../ragflow-form';
import { MetadataFilterConditions } from './metadata-filter-conditions'; import { MetadataFilterConditions } from './metadata-filter-conditions';
import { MetadataSemiAutoFields } from './metadata-semi-auto-fields';
type MetadataFilterProps = { type MetadataFilterProps = {
prefix?: string; prefix?: string;
@ -25,6 +26,7 @@ export const MetadataFilterSchema = {
}), }),
) )
.optional(), .optional(),
semi_auto: z.array(z.string()).optional(),
}) })
.optional(), .optional(),
}; };
@ -76,6 +78,12 @@ export function MetadataFilter({
canReference={canReference} canReference={canReference}
></MetadataFilterConditions> ></MetadataFilterConditions>
)} )}
{hasKnowledge && metadata === DatasetMetadata.SemiAutomatic && (
<MetadataSemiAutoFields
kbIds={kbIds}
prefix={prefix}
></MetadataSemiAutoFields>
)}
</> </>
); );
} }

View file

@ -0,0 +1,99 @@
import { Button } from '@/components/ui/button';
import {
DropdownMenu,
DropdownMenuContent,
DropdownMenuItem,
DropdownMenuTrigger,
} from '@/components/ui/dropdown-menu';
import {
FormControl,
FormField,
FormItem,
FormLabel,
FormMessage,
} from '@/components/ui/form';
import { Input } from '@/components/ui/input';
import { useFetchKnowledgeMetadata } from '@/hooks/use-knowledge-request';
import { Plus, X } from 'lucide-react';
import { useCallback } from 'react';
import { useFieldArray, useFormContext } from 'react-hook-form';
import { useTranslation } from 'react-i18next';
export function MetadataSemiAutoFields({
kbIds,
prefix = '',
}: {
kbIds: string[];
prefix?: string;
}) {
const { t } = useTranslation();
const form = useFormContext();
const name = prefix + 'meta_data_filter.semi_auto';
const metadata = useFetchKnowledgeMetadata(kbIds);
const { fields, remove, append } = useFieldArray({
name,
control: form.control,
});
const add = useCallback(
(key: string) => () => {
append(key);
},
[append],
);
return (
<section className="flex flex-col gap-2">
<div className="flex items-center justify-between">
<FormLabel>{t('chat.metadataKeys')}</FormLabel>
<DropdownMenu>
<DropdownMenuTrigger>
<Button variant={'ghost'} type="button">
<Plus />
</Button>
</DropdownMenuTrigger>
<DropdownMenuContent className="max-h-[300px] !overflow-y-auto scrollbar-auto">
{Object.keys(metadata.data).map((key, idx) => {
return (
<DropdownMenuItem key={idx} onClick={add(key)}>
{key}
</DropdownMenuItem>
);
})}
</DropdownMenuContent>
</DropdownMenu>
</div>
<div className="space-y-5">
{fields.map((field, index) => {
const typeField = `${name}.${index}`;
return (
<section key={field.id} className="flex gap-2">
<div className="w-full space-y-2">
<FormField
control={form.control}
name={typeField}
render={({ field }) => (
<FormItem className="flex-1 overflow-hidden">
<FormControl>
<Input
{...field}
placeholder={t('common.pleaseInput')}
readOnly
></Input>
</FormControl>
<FormMessage />
</FormItem>
)}
/>
</div>
<Button variant={'ghost'} onClick={() => remove(index)}>
<X className="text-text-sub-title-invert " />
</Button>
</section>
);
})}
</div>
</section>
);
}

View file

@ -193,3 +193,19 @@ export enum SwitchLogicOperator {
And = 'and', And = 'and',
Or = 'or', Or = 'or',
} }
export const WebhookAlgorithmList = [
'hs256',
'hs384',
'hs512',
'rs256',
'rs384',
'rs512',
'es256',
'es384',
'es512',
'ps256',
'ps384',
'ps512',
'none',
] as const;

View file

@ -36,5 +36,6 @@ export const EmptyConversationId = 'empty';
export enum DatasetMetadata { export enum DatasetMetadata {
Disabled = 'disabled', Disabled = 'disabled',
Automatic = 'auto', Automatic = 'auto',
SemiAutomatic = 'semi_auto',
Manual = 'manual', Manual = 'manual',
} }

View file

@ -7,6 +7,16 @@ export interface ITestRetrievalRequestBody {
use_kg?: boolean; use_kg?: boolean;
highlight?: boolean; highlight?: boolean;
kb_id?: string[]; kb_id?: string[];
meta_data_filter?: {
logic?: string;
method?: string;
manual?: Array<{
key: string;
op: string;
value: string;
}>;
semi_auto?: string[];
};
} }
export interface IFetchKnowledgeListRequestBody { export interface IFetchKnowledgeListRequestBody {

View file

@ -737,11 +737,13 @@ This auto-tagging feature enhances retrieval by adding another layer of domain-s
metadataTip: metadataTip:
'Metadata filtering is the process of using metadata attributes (such as tags, categories, or access permissions) to refine and control the retrieval of relevant information within a system.', 'Metadata filtering is the process of using metadata attributes (such as tags, categories, or access permissions) to refine and control the retrieval of relevant information within a system.',
conditions: 'Conditions', conditions: 'Conditions',
metadataKeys: 'Filterable items',
addCondition: 'Add condition', addCondition: 'Add condition',
meta: { meta: {
disabled: 'Disabled', disabled: 'Disabled',
auto: 'Automatic', auto: 'Automatic',
manual: 'Manual', manual: 'Manual',
semi_auto: 'Semi-automatic',
}, },
cancel: 'Cancel', cancel: 'Cancel',
chatSetting: 'Chat setting', chatSetting: 'Chat setting',
@ -1976,6 +1978,37 @@ Important structured information may include: names, dates, locations, events, k
removeFirst: 'Remove first', removeFirst: 'Remove first',
removeLast: 'Remove last', removeLast: 'Remove last',
}, },
webhook: {
name: 'Webhook',
methods: 'Methods',
contentTypes: 'Content types',
security: 'Security',
schema: 'Schema',
response: 'Response',
executionMode: 'Execution mode',
authMethods: 'Authentication Methods',
authType: 'Authentication Type',
limit: 'Request Limit',
per: 'Time Period',
maxBodySize: 'Maximum Body Size',
ipWhitelist: 'IP Whitelist',
tokenHeader: 'Token Header',
tokenValue: 'Token Value',
username: 'Username',
password: 'Password',
algorithm: 'Algorithm',
secret: 'Secret',
issuer: 'Issuer',
audience: 'Audience',
requiredClaims: 'Required Claims',
header: 'Header',
status: 'Status',
headersTemplate: 'Headers Template',
bodyTemplate: 'Body Template',
basic: 'Basic',
bearer: 'Bearer',
apiKey: 'Api Key',
},
}, },
llmTools: { llmTools: {
bad_calculator: { bad_calculator: {

View file

@ -673,11 +673,13 @@ General实体和关系提取提示来自 GitHub - microsoft/graphrag基于
metadataTip: metadataTip:
'元数据过滤是使用元数据属性(例如标签、类别或访问权限)来优化和控制系统内相关信息检索的过程。', '元数据过滤是使用元数据属性(例如标签、类别或访问权限)来优化和控制系统内相关信息检索的过程。',
conditions: '条件', conditions: '条件',
metadataKeys: '可选过滤项',
addCondition: '增加条件', addCondition: '增加条件',
meta: { meta: {
disabled: '禁用', disabled: '禁用',
auto: '自动', auto: '自动',
manual: '手动', manual: '手动',
semi_auto: '半自动',
}, },
cancel: '取消', cancel: '取消',
chatSetting: '聊天设置', chatSetting: '聊天设置',
@ -1770,6 +1772,37 @@ Tokenizer 会根据所选方式将内容存储为对应的数据结构。`,
removeFirst: '移除第一个', removeFirst: '移除第一个',
removeLast: '移除最后一个', removeLast: '移除最后一个',
}, },
webhook: {
name: '网络钩子',
methods: '方法',
contentTypes: '内容类型',
security: '安全配置',
schema: '模式',
response: '响应',
executionMode: '执行模式',
authMethods: '认证方法',
authType: '认证类型',
limit: '请求限制',
per: '时间周期',
maxBodySize: '最大主体大小',
ipWhitelist: 'IP白名单',
tokenHeader: '令牌头部',
tokenValue: '令牌值',
username: '用户名',
password: '密码',
algorithm: '算法',
secret: '密钥',
issuer: '签发者',
audience: '受众',
requiredClaims: '必需声明',
header: '头部',
status: '状态',
headersTemplate: '头部模板',
bodyTemplate: '主体模板',
basic: '基础认证',
bearer: '承载令牌',
apiKey: 'API密钥',
},
}, },
footer: { footer: {
profile: 'All rights reserved @ React', profile: 'All rights reserved @ React',

View file

@ -1,32 +1,14 @@
import i18n from '@/locales/config';
import { BeginId } from '@/pages/agent/constant';
import { ReactNode } from 'react'; import { ReactNode } from 'react';
const prefix = BeginId + '@';
interface VariableDisplayProps { interface VariableDisplayProps {
content: string; content: string;
getLabel?: (value?: string) => string | ReactNode; getLabel?: (value?: string) => string | ReactNode;
} }
// This component mimics the VariableNode's decorate function from PromptEditor // This component mimics the VariableNode's decorate function from PromptEditor
function VariableNodeDisplay({ function VariableNodeDisplay({ label }: { label: ReactNode }) {
value,
label,
}: {
value: string;
label: ReactNode;
}) {
let content: ReactNode = <span className="text-accent-primary">{label}</span>; let content: ReactNode = <span className="text-accent-primary">{label}</span>;
if (value.startsWith(prefix)) {
content = (
<div>
<span>{i18n.t(`flow.begin`)}</span> / {content}
</div>
);
}
return <div className="inline-flex items-center mr-1">{content}</div>; return <div className="inline-flex items-center mr-1">{content}</div>;
} }
@ -63,11 +45,7 @@ export function VariableDisplay({ content, getLabel }: VariableDisplayProps) {
if (label && label !== variableValue) { if (label && label !== variableValue) {
// If we found a valid label, render as variable node // If we found a valid label, render as variable node
elements.push( elements.push(
<VariableNodeDisplay <VariableNodeDisplay key={`variable-${index}`} label={label} />,
key={`variable-${index}`}
value={variableValue}
label={label}
/>,
); );
} else { } else {
// If no label found, keep as original text // If no label found, keep as original text

View file

@ -25,6 +25,7 @@ export * from './pipeline';
export enum AgentDialogueMode { export enum AgentDialogueMode {
Conversational = 'conversational', Conversational = 'conversational',
Task = 'task', Task = 'task',
Webhook = 'Webhook',
} }
import { ModelVariableType } from '@/constants/knowledge'; import { ModelVariableType } from '@/constants/knowledge';
@ -930,3 +931,37 @@ export enum AgentVariableType {
Begin = 'begin', Begin = 'begin',
Conversation = 'conversation', Conversation = 'conversation',
} }
export enum WebhookMethod {
Post = 'POST',
Get = 'GET',
Put = 'PUT',
Patch = 'PATCH',
Delete = 'DELETE',
Head = 'HEAD',
}
export enum WebhookContentType {
ApplicationJson = 'application/json',
MultipartFormData = 'multipart/form-data',
ApplicationXWwwFormUrlencoded = 'application/x-www-form-urlencoded',
TextPlain = 'text/plain',
ApplicationOctetStream = 'application/octet-stream',
}
export enum WebhookExecutionMode {
Immediately = 'Immediately',
Streaming = 'Streaming',
}
export enum WebhookSecurityAuthType {
None = 'none',
Token = 'token',
Basic = 'basic',
Jwt = 'jwt',
Hmac = 'hmac',
}
export const RateLimitPerList = ['minute', 'hour', 'day'];
export const WebhookMaxBodySize = ['10MB', '50MB', '100MB', '1000MB'];

View file

@ -42,9 +42,9 @@ import { FormWrapper } from '../components/form-wrapper';
import { Output } from '../components/output'; import { Output } from '../components/output';
import { PromptEditor } from '../components/prompt-editor'; import { PromptEditor } from '../components/prompt-editor';
import { QueryVariable } from '../components/query-variable'; import { QueryVariable } from '../components/query-variable';
import { SchemaDialog } from '../components/schema-dialog';
import { SchemaPanel } from '../components/schema-panel';
import { AgentTools, Agents } from './agent-tools'; import { AgentTools, Agents } from './agent-tools';
import { StructuredOutputDialog } from './structured-output-dialog';
import { StructuredOutputPanel } from './structured-output-panel';
import { useBuildPromptExtraPromptOptions } from './use-build-prompt-options'; import { useBuildPromptExtraPromptOptions } from './use-build-prompt-options';
import { import {
useHandleShowStructuredOutput, useHandleShowStructuredOutput,
@ -327,19 +327,17 @@ function AgentForm({ node }: INextOperatorForm) {
</Button> </Button>
</div> </div>
<StructuredOutputPanel <SchemaPanel value={structuredOutput}></SchemaPanel>
value={structuredOutput}
></StructuredOutputPanel>
</section> </section>
)} )}
</FormWrapper> </FormWrapper>
</Form> </Form>
{structuredOutputDialogVisible && ( {structuredOutputDialogVisible && (
<StructuredOutputDialog <SchemaDialog
hideModal={hideStructuredOutputDialog} hideModal={hideStructuredOutputDialog}
onOk={handleStructuredOutputDialogOk} onOk={handleStructuredOutputDialogOk}
initialValues={structuredOutput} initialValues={structuredOutput}
></StructuredOutputDialog> ></SchemaDialog>
)} )}
</> </>
); );

View file

@ -12,6 +12,7 @@ import { RAGFlowSelect } from '@/components/ui/select';
import { Switch } from '@/components/ui/switch'; import { Switch } from '@/components/ui/switch';
import { Textarea } from '@/components/ui/textarea'; import { Textarea } from '@/components/ui/textarea';
import { FormTooltip } from '@/components/ui/tooltip'; import { FormTooltip } from '@/components/ui/tooltip';
import { WebhookAlgorithmList } from '@/constants/agent';
import { zodResolver } from '@hookform/resolvers/zod'; import { zodResolver } from '@hookform/resolvers/zod';
import { t } from 'i18next'; import { t } from 'i18next';
import { Plus } from 'lucide-react'; import { Plus } from 'lucide-react';
@ -24,37 +25,71 @@ import { INextOperatorForm } from '../../interface';
import { ParameterDialog } from './parameter-dialog'; import { ParameterDialog } from './parameter-dialog';
import { QueryTable } from './query-table'; import { QueryTable } from './query-table';
import { useEditQueryRecord } from './use-edit-query'; import { useEditQueryRecord } from './use-edit-query';
import { useHandleModeChange } from './use-handle-mode-change';
import { useValues } from './use-values'; import { useValues } from './use-values';
import { useWatchFormChange } from './use-watch-change'; import { useWatchFormChange } from './use-watch-change';
import { WebHook } from './webhook';
const ModeOptions = [ const ModeOptions = [
{ value: AgentDialogueMode.Conversational, label: t('flow.conversational') }, { value: AgentDialogueMode.Conversational, label: t('flow.conversational') },
{ value: AgentDialogueMode.Task, label: t('flow.task') }, { value: AgentDialogueMode.Task, label: t('flow.task') },
{ value: AgentDialogueMode.Webhook, label: t('flow.webhook.name') },
]; ];
const FormSchema = z.object({
enablePrologue: z.boolean().optional(),
prologue: z.string().trim().optional(),
mode: z.string(),
inputs: z
.array(
z.object({
key: z.string(),
type: z.string(),
value: z.string(),
optional: z.boolean(),
name: z.string(),
options: z.array(z.union([z.number(), z.string(), z.boolean()])),
}),
)
.optional(),
methods: z.string().optional(),
content_types: z.string().optional(),
security: z
.object({
auth_type: z.string(),
ip_whitelist: z.array(z.object({ value: z.string() })),
rate_limit: z.object({
limit: z.number(),
per: z.string().optional(),
}),
max_body_size: z.string(),
jwt: z
.object({
algorithm: z.string().default(WebhookAlgorithmList[0]).optional(),
})
.optional(),
})
.optional(),
schema: z.record(z.any()).optional(),
response: z
.object({
status: z.number(),
headers_template: z.array(
z.object({ key: z.string(), value: z.string() }),
),
body_template: z.array(z.object({ key: z.string(), value: z.string() })),
})
.optional(),
execution_mode: z.string().optional(),
});
export type BeginFormSchemaType = z.infer<typeof FormSchema>;
function BeginForm({ node }: INextOperatorForm) { function BeginForm({ node }: INextOperatorForm) {
const { t } = useTranslation(); const { t } = useTranslation();
const values = useValues(node); const values = useValues(node);
const FormSchema = z.object({
enablePrologue: z.boolean().optional(),
prologue: z.string().trim().optional(),
mode: z.string(),
inputs: z
.array(
z.object({
key: z.string(),
type: z.string(),
value: z.string(),
optional: z.boolean(),
name: z.string(),
options: z.array(z.union([z.number(), z.string(), z.boolean()])),
}),
)
.optional(),
});
const form = useForm({ const form = useForm({
defaultValues: values, defaultValues: values,
resolver: zodResolver(FormSchema), resolver: zodResolver(FormSchema),
@ -72,6 +107,8 @@ function BeginForm({ node }: INextOperatorForm) {
const previousModeRef = useRef(mode); const previousModeRef = useRef(mode);
const { handleModeChange } = useHandleModeChange(form);
useEffect(() => { useEffect(() => {
if ( if (
previousModeRef.current === AgentDialogueMode.Task && previousModeRef.current === AgentDialogueMode.Task &&
@ -111,6 +148,10 @@ function BeginForm({ node }: INextOperatorForm) {
placeholder={t('common.pleaseSelect')} placeholder={t('common.pleaseSelect')}
options={ModeOptions} options={ModeOptions}
{...field} {...field}
onChange={(val) => {
handleModeChange(val);
field.onChange(val);
}}
></RAGFlowSelect> ></RAGFlowSelect>
</FormControl> </FormControl>
<FormMessage /> <FormMessage />
@ -158,44 +199,49 @@ function BeginForm({ node }: INextOperatorForm) {
)} )}
/> />
)} )}
{/* Create a hidden field to make Form instance record this */} {mode === AgentDialogueMode.Webhook && <WebHook></WebHook>}
<FormField {mode !== AgentDialogueMode.Webhook && (
control={form.control} <>
name={'inputs'} {/* Create a hidden field to make Form instance record this */}
render={() => <div></div>} <FormField
/> control={form.control}
<Collapse name={'inputs'}
title={ render={() => <div></div>}
<div> />
{t('flow.input')} <Collapse
<FormTooltip tooltip={t('flow.beginInputTip')}></FormTooltip> title={
</div> <div>
} {t('flow.input')}
rightContent={ <FormTooltip tooltip={t('flow.beginInputTip')}></FormTooltip>
<Button </div>
variant={'ghost'} }
onClick={(e) => { rightContent={
e.preventDefault(); <Button
showModal(); variant={'ghost'}
}} onClick={(e) => {
e.preventDefault();
showModal();
}}
>
<Plus />
</Button>
}
> >
<Plus /> <QueryTable
</Button> data={inputs}
} showModal={showModal}
> deleteRecord={handleDeleteRecord}
<QueryTable ></QueryTable>
data={inputs} </Collapse>
showModal={showModal} {visible && (
deleteRecord={handleDeleteRecord} <ParameterDialog
></QueryTable> hideModal={hideModal}
</Collapse> initialValue={currentRecord}
{visible && ( otherThanCurrentQuery={otherThanCurrentQuery}
<ParameterDialog submit={ok}
hideModal={hideModal} ></ParameterDialog>
initialValue={currentRecord} )}
otherThanCurrentQuery={otherThanCurrentQuery} </>
submit={ok}
></ParameterDialog>
)} )}
</Form> </Form>
</section> </section>

View file

@ -0,0 +1,76 @@
import { useCallback } from 'react';
import { UseFormReturn } from 'react-hook-form';
import {
AgentDialogueMode,
RateLimitPerList,
WebhookExecutionMode,
WebhookMaxBodySize,
WebhookSecurityAuthType,
} from '../../constant';
// const WebhookSchema = {
// query: {
// type: 'object',
// required: [],
// properties: {
// // debug: { type: 'boolean' },
// // event: { type: 'string' },
// },
// },
// headers: {
// type: 'object',
// required: [],
// properties: {
// // 'X-Trace-ID': { type: 'string' },
// },
// },
// body: {
// type: 'object',
// required: [],
// properties: {
// id: { type: 'string' },
// payload: { type: 'object' },
// },
// },
// };
const schema = {
properties: {
query: {
type: 'object',
description: '',
},
headers: {
type: 'object',
description: '',
},
body: {
type: 'object',
description: '',
},
},
};
const initialFormValuesMap = {
schema: schema,
'security.auth_type': WebhookSecurityAuthType.Basic,
'security.rate_limit.per': RateLimitPerList[0],
'security.max_body_size': WebhookMaxBodySize[0],
execution_mode: WebhookExecutionMode.Immediately,
};
export function useHandleModeChange(form: UseFormReturn<any>) {
const handleModeChange = useCallback(
(mode: AgentDialogueMode) => {
if (mode === AgentDialogueMode.Webhook) {
Object.entries(initialFormValuesMap).forEach(([key, value]) => {
form.setValue(key, value, { shouldDirty: true });
});
}
},
[form],
);
return { handleModeChange };
}

View file

@ -0,0 +1,28 @@
import { JSONSchema } from '@/components/jsonjoy-builder';
import { useSetModalState } from '@/hooks/common-hooks';
import { useCallback } from 'react';
import { UseFormReturn } from 'react-hook-form';
export function useShowSchemaDialog(form: UseFormReturn<any>) {
const {
visible: schemaDialogVisible,
showModal: showSchemaDialog,
hideModal: hideSchemaDialog,
} = useSetModalState();
const handleSchemaDialogOk = useCallback(
(values: JSONSchema) => {
// Sync data to canvas
form.setValue('schema', values);
hideSchemaDialog();
},
[form, hideSchemaDialog],
);
return {
schemaDialogVisible,
showSchemaDialog,
hideSchemaDialog,
handleSchemaDialogOk,
};
}

View file

@ -1,6 +1,7 @@
import { omit } from 'lodash'; import { omit } from 'lodash';
import { useEffect } from 'react'; import { useEffect } from 'react';
import { UseFormReturn, useWatch } from 'react-hook-form'; import { UseFormReturn, useWatch } from 'react-hook-form';
import { AgentDialogueMode } from '../../constant';
import { BeginQuery } from '../../interface'; import { BeginQuery } from '../../interface';
import useGraphStore from '../../store'; import useGraphStore from '../../store';
@ -20,9 +21,21 @@ export function useWatchFormChange(id?: string, form?: UseFormReturn) {
if (id) { if (id) {
values = form?.getValues() || {}; values = form?.getValues() || {};
let outputs: Record<string, any> = {};
// For webhook mode, use schema properties as direct outputs
// Each property (body, headers, query) should be able to show secondary menu
if (
values.mode === AgentDialogueMode.Webhook &&
values.schema?.properties
) {
outputs = { ...values.schema.properties };
}
const nextValues = { const nextValues = {
...values, ...values,
inputs: transferInputsArrayToObject(values.inputs), inputs: transferInputsArrayToObject(values.inputs),
outputs,
}; };
updateNodeForm(id, nextValues); updateNodeForm(id, nextValues);

View file

@ -0,0 +1,139 @@
import { SelectWithSearch } from '@/components/originui/select-with-search';
import { RAGFlowFormItem } from '@/components/ragflow-form';
import { Input } from '@/components/ui/input';
import { WebhookAlgorithmList } from '@/constants/agent';
import { WebhookSecurityAuthType } from '@/pages/agent/constant';
import { buildOptions } from '@/utils/form';
import { useCallback } from 'react';
import { useFormContext, useWatch } from 'react-hook-form';
import { useTranslation } from 'react-i18next';
const AlgorithmOptions = buildOptions(WebhookAlgorithmList);
const RequiredClaimsOptions = buildOptions(['exp', 'sub']);
export function Auth() {
const { t } = useTranslation();
const form = useFormContext();
const authType = useWatch({
name: 'security.auth_type',
control: form.control,
});
const renderTokenAuth = useCallback(
() => (
<>
<RAGFlowFormItem
name="security.token.token_header"
label={t('flow.webhook.tokenHeader')}
>
<Input></Input>
</RAGFlowFormItem>
<RAGFlowFormItem
name="security.token.token_value"
label={t('flow.webhook.tokenValue')}
>
<Input></Input>
</RAGFlowFormItem>
</>
),
[t],
);
const renderBasicAuth = useCallback(
() => (
<>
<RAGFlowFormItem
name="security.basic_auth.username"
label={t('flow.webhook.username')}
>
<Input></Input>
</RAGFlowFormItem>
<RAGFlowFormItem
name="security.basic_auth.password"
label={t('flow.webhook.password')}
>
<Input></Input>
</RAGFlowFormItem>
</>
),
[t],
);
const renderJwtAuth = useCallback(
() => (
<>
<RAGFlowFormItem
name="security.jwt.algorithm"
label={t('flow.webhook.algorithm')}
>
<SelectWithSearch options={AlgorithmOptions}></SelectWithSearch>
</RAGFlowFormItem>
<RAGFlowFormItem
name="security.jwt.secret"
label={t('flow.webhook.secret')}
>
<Input></Input>
</RAGFlowFormItem>
<RAGFlowFormItem
name="security.jwt.issuer"
label={t('flow.webhook.issuer')}
>
<Input></Input>
</RAGFlowFormItem>
<RAGFlowFormItem
name="security.jwt.audience"
label={t('flow.webhook.audience')}
>
<Input></Input>
</RAGFlowFormItem>
<RAGFlowFormItem
name="security.jwt.required_claims"
label={t('flow.webhook.requiredClaims')}
>
<SelectWithSearch options={RequiredClaimsOptions}></SelectWithSearch>
</RAGFlowFormItem>
</>
),
[t],
);
const renderHmacAuth = useCallback(
() => (
<>
<RAGFlowFormItem
name="security.hmac.header"
label={t('flow.webhook.header')}
>
<Input></Input>
</RAGFlowFormItem>
<RAGFlowFormItem
name="security.hmac.secret"
label={t('flow.webhook.secret')}
>
<Input></Input>
</RAGFlowFormItem>
<RAGFlowFormItem
name="security.hmac.algorithm"
label={t('flow.webhook.algorithm')}
>
<SelectWithSearch options={AlgorithmOptions}></SelectWithSearch>
</RAGFlowFormItem>
</>
),
[t],
);
const AuthMap = {
[WebhookSecurityAuthType.Token]: renderTokenAuth,
[WebhookSecurityAuthType.Basic]: renderBasicAuth,
[WebhookSecurityAuthType.Jwt]: renderJwtAuth,
[WebhookSecurityAuthType.Hmac]: renderHmacAuth,
[WebhookSecurityAuthType.None]: () => null,
};
return AuthMap[
(authType ?? WebhookSecurityAuthType.None) as WebhookSecurityAuthType
]();
}

View file

@ -0,0 +1,213 @@
import { BoolSegmented } from '@/components/bool-segmented';
import { KeyInput } from '@/components/key-input';
import { SelectWithSearch } from '@/components/originui/select-with-search';
import { RAGFlowFormItem } from '@/components/ragflow-form';
import { useIsDarkTheme } from '@/components/theme-provider';
import { Button } from '@/components/ui/button';
import { Input } from '@/components/ui/input';
import { Separator } from '@/components/ui/separator';
import { Textarea } from '@/components/ui/textarea';
import { Editor, loader } from '@monaco-editor/react';
import { X } from 'lucide-react';
import { ReactNode, useCallback } from 'react';
import { useFieldArray, useFormContext } from 'react-hook-form';
import { InputMode, TypesWithArray } from '../../../constant';
import {
InputModeOptions,
buildConversationVariableSelectOptions,
} from '../../../utils';
import { DynamicFormHeader } from '../../components/dynamic-fom-header';
import { QueryVariable } from '../../components/query-variable';
loader.config({ paths: { vs: '/vs' } });
type SelectKeysProps = {
name: string;
label: ReactNode;
tooltip?: string;
keyField?: string;
valueField?: string;
operatorField?: string;
nodeId?: string;
};
const VariableTypeOptions = buildConversationVariableSelectOptions();
const modeField = 'input_mode';
const ConstantValueMap = {
[TypesWithArray.Boolean]: true,
[TypesWithArray.Number]: 0,
[TypesWithArray.String]: '',
[TypesWithArray.ArrayBoolean]: '[]',
[TypesWithArray.ArrayNumber]: '[]',
[TypesWithArray.ArrayString]: '[]',
[TypesWithArray.ArrayObject]: '[]',
[TypesWithArray.Object]: '{}',
};
export function DynamicResponse({
name,
label,
tooltip,
keyField = 'key',
valueField = 'value',
operatorField = 'type',
}: SelectKeysProps) {
const form = useFormContext();
const isDarkTheme = useIsDarkTheme();
const { fields, remove, append } = useFieldArray({
name: name,
control: form.control,
});
const initializeValue = useCallback(
(mode: string, variableType: string, valueFieldAlias: string) => {
if (mode === InputMode.Variable) {
form.setValue(valueFieldAlias, '', { shouldDirty: true });
} else {
const val = ConstantValueMap[variableType as TypesWithArray];
form.setValue(valueFieldAlias, val, { shouldDirty: true });
}
},
[form],
);
const handleModeChange = useCallback(
(mode: string, valueFieldAlias: string, operatorFieldAlias: string) => {
const variableType = form.getValues(operatorFieldAlias);
initializeValue(mode, variableType, valueFieldAlias);
},
[form, initializeValue],
);
const handleVariableTypeChange = useCallback(
(variableType: string, valueFieldAlias: string, modeFieldAlias: string) => {
const mode = form.getValues(modeFieldAlias);
initializeValue(mode, variableType, valueFieldAlias);
},
[form, initializeValue],
);
const renderParameter = useCallback(
(operatorFieldName: string, modeFieldName: string) => {
const mode = form.getValues(modeFieldName);
const logicalOperator = form.getValues(operatorFieldName);
if (mode === InputMode.Constant) {
if (logicalOperator === TypesWithArray.Boolean) {
return <BoolSegmented></BoolSegmented>;
}
if (logicalOperator === TypesWithArray.Number) {
return <Input className="w-full" type="number"></Input>;
}
if (logicalOperator === TypesWithArray.String) {
return <Textarea></Textarea>;
}
return (
<Editor
height={300}
theme={isDarkTheme ? 'vs-dark' : 'vs'}
language={'json'}
options={{
minimap: { enabled: false },
automaticLayout: true,
}}
/>
);
}
return (
<QueryVariable
types={[logicalOperator]}
hideLabel
pureQuery
></QueryVariable>
);
},
[form, isDarkTheme],
);
return (
<section className="space-y-2">
<DynamicFormHeader
label={label}
tooltip={tooltip}
onClick={() =>
append({
[keyField]: '',
[valueField]: '',
[modeField]: InputMode.Constant,
[operatorField]: TypesWithArray.String,
})
}
></DynamicFormHeader>
<div className="space-y-5">
{fields.map((field, index) => {
const keyFieldAlias = `${name}.${index}.${keyField}`;
const valueFieldAlias = `${name}.${index}.${valueField}`;
const operatorFieldAlias = `${name}.${index}.${operatorField}`;
const modeFieldAlias = `${name}.${index}.${modeField}`;
return (
<section key={field.id} className="flex gap-2">
<div className="flex-1 space-y-3 min-w-0">
<div className="flex items-center">
<RAGFlowFormItem name={keyFieldAlias} className="flex-1 ">
<KeyInput></KeyInput>
</RAGFlowFormItem>
<Separator className="w-2" />
<RAGFlowFormItem name={operatorFieldAlias} className="flex-1">
{(field) => (
<SelectWithSearch
value={field.value}
onChange={(val) => {
handleVariableTypeChange(
val,
valueFieldAlias,
modeFieldAlias,
);
field.onChange(val);
}}
options={VariableTypeOptions}
></SelectWithSearch>
)}
</RAGFlowFormItem>
<Separator className="w-2" />
<RAGFlowFormItem name={modeFieldAlias} className="flex-1">
{(field) => (
<SelectWithSearch
value={field.value}
onChange={(val) => {
handleModeChange(
val,
valueFieldAlias,
operatorFieldAlias,
);
field.onChange(val);
}}
options={InputModeOptions}
></SelectWithSearch>
)}
</RAGFlowFormItem>
</div>
<RAGFlowFormItem name={valueFieldAlias} className="w-full">
{renderParameter(operatorFieldAlias, modeFieldAlias)}
</RAGFlowFormItem>
</div>
<Button variant={'ghost'} onClick={() => remove(index)}>
<X />
</Button>
</section>
);
})}
</div>
</section>
);
}

View file

@ -0,0 +1,134 @@
import { Collapse } from '@/components/collapse';
import { SelectWithSearch } from '@/components/originui/select-with-search';
import { RAGFlowFormItem } from '@/components/ragflow-form';
import { Button } from '@/components/ui/button';
import { Input } from '@/components/ui/input';
import { Separator } from '@/components/ui/separator';
import { Textarea } from '@/components/ui/textarea';
import { buildOptions } from '@/utils/form';
import { useFormContext, useWatch } from 'react-hook-form';
import { useTranslation } from 'react-i18next';
import {
RateLimitPerList,
WebhookContentType,
WebhookExecutionMode,
WebhookMaxBodySize,
WebhookMethod,
WebhookSecurityAuthType,
} from '../../../constant';
import { DynamicStringForm } from '../../components/dynamic-string-form';
import { SchemaDialog } from '../../components/schema-dialog';
import { SchemaPanel } from '../../components/schema-panel';
import { useShowSchemaDialog } from '../use-show-schema-dialog';
import { Auth } from './auth';
import { WebhookResponse } from './response';
const RateLimitPerOptions = buildOptions(RateLimitPerList);
export function WebHook() {
const { t } = useTranslation();
const form = useFormContext();
const executionMode = useWatch({
control: form.control,
name: 'execution_mode',
});
const {
showSchemaDialog,
schemaDialogVisible,
hideSchemaDialog,
handleSchemaDialogOk,
} = useShowSchemaDialog(form);
const schema = form.getValues('schema');
return (
<>
<RAGFlowFormItem name="methods" label={t('flow.webhook.methods')}>
<SelectWithSearch
options={buildOptions(WebhookMethod)}
></SelectWithSearch>
</RAGFlowFormItem>
<RAGFlowFormItem
name="content_types"
label={t('flow.webhook.contentTypes')}
>
<SelectWithSearch
options={buildOptions(WebhookContentType)}
></SelectWithSearch>
</RAGFlowFormItem>
<Collapse title={<div>Security</div>}>
<section className="space-y-4">
<RAGFlowFormItem
name="security.auth_type"
label={t('flow.webhook.authType')}
>
<SelectWithSearch
options={buildOptions(WebhookSecurityAuthType)}
></SelectWithSearch>
</RAGFlowFormItem>
<Auth></Auth>
<RAGFlowFormItem
name="security.rate_limit.limit"
label={t('flow.webhook.limit')}
>
<Input type="number"></Input>
</RAGFlowFormItem>
<RAGFlowFormItem
name="security.rate_limit.per"
label={t('flow.webhook.per')}
>
<SelectWithSearch options={RateLimitPerOptions}></SelectWithSearch>
</RAGFlowFormItem>
<RAGFlowFormItem
name="security.max_body_size"
label={t('flow.webhook.maxBodySize')}
>
<SelectWithSearch
options={buildOptions(WebhookMaxBodySize)}
></SelectWithSearch>
</RAGFlowFormItem>
<DynamicStringForm
name="security.ip_whitelist"
label={t('flow.webhook.ipWhitelist')}
></DynamicStringForm>
</section>
</Collapse>
<RAGFlowFormItem
name="schema"
label={t('flow.webhook.schema')}
className="hidden"
>
<Textarea></Textarea>
</RAGFlowFormItem>
<RAGFlowFormItem
name="execution_mode"
label={t('flow.webhook.executionMode')}
>
<SelectWithSearch
options={buildOptions(WebhookExecutionMode)}
></SelectWithSearch>
</RAGFlowFormItem>
{executionMode === WebhookExecutionMode.Immediately && (
<WebhookResponse></WebhookResponse>
)}
<Separator></Separator>
<section className="flex justify-between items-center">
Schema
<Button variant={'ghost'} onClick={showSchemaDialog}>
{t('flow.structuredOutput.configuration')}
</Button>
</section>
<SchemaPanel value={schema}></SchemaPanel>
{schemaDialogVisible && (
<SchemaDialog
initialValues={schema}
hideModal={hideSchemaDialog}
onOk={handleSchemaDialogOk}
pattern={''}
></SchemaDialog>
)}
</>
);
}

View file

@ -0,0 +1,30 @@
import { Collapse } from '@/components/collapse';
import { RAGFlowFormItem } from '@/components/ragflow-form';
import { Input } from '@/components/ui/input';
import { useTranslation } from 'react-i18next';
import { DynamicResponse } from './dynamic-response';
export function WebhookResponse() {
const { t } = useTranslation();
return (
<Collapse title={<div>Response</div>}>
<section className="space-y-4">
<RAGFlowFormItem
name={'response.status'}
label={t('flow.webhook.status')}
>
<Input type="number"></Input>
</RAGFlowFormItem>
<DynamicResponse
name="response.headers_template"
label={t('flow.webhook.headersTemplate')}
></DynamicResponse>
<DynamicResponse
name="response.body_template"
label={t('flow.webhook.bodyTemplate')}
></DynamicResponse>
</section>
</Collapse>
);
}

View file

@ -0,0 +1,46 @@
import { RAGFlowFormItem } from '@/components/ragflow-form';
import { Button } from '@/components/ui/button';
import { Input } from '@/components/ui/input';
import { Trash2 } from 'lucide-react';
import { useFieldArray, useFormContext } from 'react-hook-form';
import { DynamicFormHeader, FormListHeaderProps } from './dynamic-fom-header';
type DynamicStringFormProps = { name: string } & FormListHeaderProps;
export function DynamicStringForm({ name, label }: DynamicStringFormProps) {
const form = useFormContext();
const { fields, append, remove } = useFieldArray({
name: name,
control: form.control,
});
return (
<section>
<DynamicFormHeader
label={label}
onClick={() => append({ value: '' })}
></DynamicFormHeader>
<div className="space-y-4">
{fields.map((field, index) => (
<div key={field.id} className="flex items-center gap-2">
<RAGFlowFormItem
name={`${name}.${index}.value`}
label="delimiter"
labelClassName="!hidden"
className="flex-1 !m-0"
>
<Input className="!m-0"></Input>
</RAGFlowFormItem>
<Button
type="button"
variant={'ghost'}
onClick={() => remove(index)}
>
<Trash2 />
</Button>
</div>
))}
</div>
</section>
);
}

View file

@ -3,6 +3,7 @@ import {
JsonSchemaVisualizer, JsonSchemaVisualizer,
SchemaVisualEditor, SchemaVisualEditor,
} from '@/components/jsonjoy-builder'; } from '@/components/jsonjoy-builder';
import { KeyInputProps } from '@/components/jsonjoy-builder/components/schema-editor/interface';
import { Button } from '@/components/ui/button'; import { Button } from '@/components/ui/button';
import { import {
Dialog, Dialog,
@ -16,11 +17,12 @@ import { IModalProps } from '@/interfaces/common';
import { useCallback, useState } from 'react'; import { useCallback, useState } from 'react';
import { useTranslation } from 'react-i18next'; import { useTranslation } from 'react-i18next';
export function StructuredOutputDialog({ export function SchemaDialog({
hideModal, hideModal,
onOk, onOk,
initialValues, initialValues,
}: IModalProps<any>) { pattern,
}: IModalProps<any> & KeyInputProps) {
const { t } = useTranslation(); const { t } = useTranslation();
const [schema, setSchema] = useState<JSONSchema>(initialValues); const [schema, setSchema] = useState<JSONSchema>(initialValues);
@ -36,7 +38,11 @@ export function StructuredOutputDialog({
</DialogHeader> </DialogHeader>
<section className="flex overflow-auto"> <section className="flex overflow-auto">
<div className="flex-1"> <div className="flex-1">
<SchemaVisualEditor schema={schema} onChange={setSchema} /> <SchemaVisualEditor
schema={schema}
onChange={setSchema}
pattern={pattern}
/>
</div> </div>
<div className="flex-1"> <div className="flex-1">
<JsonSchemaVisualizer schema={schema} onChange={setSchema} /> <JsonSchemaVisualizer schema={schema} onChange={setSchema} />

View file

@ -1,6 +1,6 @@
import { JSONSchema, JsonSchemaVisualizer } from '@/components/jsonjoy-builder'; import { JSONSchema, JsonSchemaVisualizer } from '@/components/jsonjoy-builder';
export function StructuredOutputPanel({ value }: { value: JSONSchema }) { export function SchemaPanel({ value }: { value: JSONSchema }) {
return ( return (
<section className="h-48"> <section className="h-48">
<JsonSchemaVisualizer <JsonSchemaVisualizer

View file

@ -2,7 +2,9 @@ import { getStructuredDatatype } from '@/utils/canvas-util';
import { get, isPlainObject } from 'lodash'; import { get, isPlainObject } from 'lodash';
import { ReactNode, useCallback } from 'react'; import { ReactNode, useCallback } from 'react';
import { import {
AgentDialogueMode,
AgentStructuredOutputField, AgentStructuredOutputField,
BeginId,
JsonSchemaDataType, JsonSchemaDataType,
Operator, Operator,
} from '../constant'; } from '../constant';
@ -16,36 +18,94 @@ function getNodeId(value: string) {
} }
export function useShowSecondaryMenu() { export function useShowSecondaryMenu() {
const { getOperatorTypeFromId } = useGraphStore((state) => state); const { getOperatorTypeFromId, getNode } = useGraphStore((state) => state);
const showSecondaryMenu = useCallback( const showSecondaryMenu = useCallback(
(value: string, outputLabel: string) => { (value: string, outputLabel: string) => {
const nodeId = getNodeId(value); const nodeId = getNodeId(value);
return ( const operatorType = getOperatorTypeFromId(nodeId);
getOperatorTypeFromId(nodeId) === Operator.Agent &&
// For Agent nodes, show secondary menu for 'structured' field
if (
operatorType === Operator.Agent &&
outputLabel === AgentStructuredOutputField outputLabel === AgentStructuredOutputField
); ) {
return true;
}
// For Begin nodes in webhook mode, show secondary menu for schema properties (body, headers, query, etc.)
if (operatorType === Operator.Begin) {
const node = getNode(nodeId);
const mode = get(node, 'data.form.mode');
if (mode === AgentDialogueMode.Webhook) {
// Check if this output field is from the schema
const outputs = get(node, 'data.form.outputs', {});
const outputField = outputs[outputLabel];
// Show secondary menu if the field is an object or has properties
return (
outputField &&
(outputField.type === 'object' ||
(outputField.properties &&
Object.keys(outputField.properties).length > 0))
);
}
}
return false;
}, },
[getOperatorTypeFromId], [getOperatorTypeFromId, getNode],
); );
return showSecondaryMenu; return showSecondaryMenu;
} }
function useGetBeginOutputsOrSchema() {
const { getNode } = useGraphStore((state) => state);
const getBeginOutputs = useCallback(() => {
const node = getNode(BeginId);
const outputs = get(node, 'data.form.outputs', {});
return outputs;
}, [getNode]);
const getBeginSchema = useCallback(() => {
const node = getNode(BeginId);
const outputs = get(node, 'data.form.schema', {});
return outputs;
}, [getNode]);
return { getBeginOutputs, getBeginSchema };
}
export function useGetStructuredOutputByValue() { export function useGetStructuredOutputByValue() {
const { getNode } = useGraphStore((state) => state); const { getNode, getOperatorTypeFromId } = useGraphStore((state) => state);
const { getBeginOutputs } = useGetBeginOutputsOrSchema();
const getStructuredOutput = useCallback( const getStructuredOutput = useCallback(
(value: string) => { (value: string) => {
const node = getNode(getNodeId(value)); const nodeId = getNodeId(value);
const structuredOutput = get( const node = getNode(nodeId);
node, const operatorType = getOperatorTypeFromId(nodeId);
`data.form.outputs.${AgentStructuredOutputField}`, const fields = splitValue(value);
); const outputLabel = fields.at(1);
let structuredOutput;
if (operatorType === Operator.Agent) {
structuredOutput = get(
node,
`data.form.outputs.${AgentStructuredOutputField}`,
);
} else if (operatorType === Operator.Begin) {
// For Begin nodes in webhook mode, get the specific schema property
const outputs = getBeginOutputs();
if (outputLabel) {
structuredOutput = outputs[outputLabel];
}
}
return structuredOutput; return structuredOutput;
}, },
[getNode], [getBeginOutputs, getNode, getOperatorTypeFromId],
); );
return getStructuredOutput; return getStructuredOutput;
@ -66,13 +126,14 @@ export function useFindAgentStructuredOutputLabel() {
icon?: ReactNode; icon?: ReactNode;
}>, }>,
) => { ) => {
// agent structured output
const fields = splitValue(value); const fields = splitValue(value);
const operatorType = getOperatorTypeFromId(fields.at(0));
// Handle Agent structured fields
if ( if (
getOperatorTypeFromId(fields.at(0)) === Operator.Agent && operatorType === Operator.Agent &&
fields.at(1)?.startsWith(AgentStructuredOutputField) fields.at(1)?.startsWith(AgentStructuredOutputField)
) { ) {
// is agent structured output
const agentOption = options.find((x) => value.includes(x.value)); const agentOption = options.find((x) => value.includes(x.value));
const jsonSchemaFields = fields const jsonSchemaFields = fields
.at(1) .at(1)
@ -84,6 +145,19 @@ export function useFindAgentStructuredOutputLabel() {
value: value, value: value,
}; };
} }
// Handle Begin webhook fields
if (operatorType === Operator.Begin && fields.at(1)) {
const fieldOption = options
.filter((x) => x.parentLabel === BeginId)
.find((x) => value.startsWith(x.value));
return {
...fieldOption,
label: fields.at(1),
value: value,
};
}
}, },
[getOperatorTypeFromId], [getOperatorTypeFromId],
); );
@ -94,6 +168,7 @@ export function useFindAgentStructuredOutputLabel() {
export function useFindAgentStructuredOutputTypeByValue() { export function useFindAgentStructuredOutputTypeByValue() {
const { getOperatorTypeFromId } = useGraphStore((state) => state); const { getOperatorTypeFromId } = useGraphStore((state) => state);
const filterStructuredOutput = useGetStructuredOutputByValue(); const filterStructuredOutput = useGetStructuredOutputByValue();
const { getBeginSchema } = useGetBeginOutputsOrSchema();
const findTypeByValue = useCallback( const findTypeByValue = useCallback(
( (
@ -136,10 +211,12 @@ export function useFindAgentStructuredOutputTypeByValue() {
} }
const fields = splitValue(value); const fields = splitValue(value);
const nodeId = fields.at(0); const nodeId = fields.at(0);
const operatorType = getOperatorTypeFromId(nodeId);
const jsonSchema = filterStructuredOutput(value); const jsonSchema = filterStructuredOutput(value);
// Handle Agent structured fields
if ( if (
getOperatorTypeFromId(nodeId) === Operator.Agent && operatorType === Operator.Agent &&
fields.at(1)?.startsWith(AgentStructuredOutputField) fields.at(1)?.startsWith(AgentStructuredOutputField)
) { ) {
const jsonSchemaFields = fields const jsonSchemaFields = fields
@ -151,13 +228,32 @@ export function useFindAgentStructuredOutputTypeByValue() {
return type; return type;
} }
} }
// Handle Begin webhook fields (body, headers, query, etc.)
if (operatorType === Operator.Begin) {
const outputLabel = fields.at(1);
const schema = getBeginSchema();
if (outputLabel && schema) {
const jsonSchemaFields = fields.at(1);
if (jsonSchemaFields) {
const type = findTypeByValue(schema, jsonSchemaFields);
return type;
}
}
}
}, },
[filterStructuredOutput, findTypeByValue, getOperatorTypeFromId], [
filterStructuredOutput,
findTypeByValue,
getBeginSchema,
getOperatorTypeFromId,
],
); );
return findAgentStructuredOutputTypeByValue; return findAgentStructuredOutputTypeByValue;
} }
// TODO: Consider merging with useFindAgentStructuredOutputLabel
export function useFindAgentStructuredOutputLabelByValue() { export function useFindAgentStructuredOutputLabelByValue() {
const { getNode } = useGraphStore((state) => state); const { getNode } = useGraphStore((state) => state);

View file

@ -314,10 +314,12 @@ export function useFilterQueryVariableOptionsByTypes({
? toLower(y.type).includes(toLower(x)) ? toLower(y.type).includes(toLower(x))
: toLower(y.type) === toLower(x), : toLower(y.type) === toLower(x),
) || ) ||
// agent structured output
isAgentStructured( isAgentStructured(
y.value, y.value,
y.value.slice(-AgentStructuredOutputField.length), y.value.slice(-AgentStructuredOutputField.length),
), // agent structured output ) ||
y.value.startsWith(BeginId), // begin node outputs
), ),
}; };
}) })

View file

@ -24,6 +24,7 @@ import {
import pipe from 'lodash/fp/pipe'; import pipe from 'lodash/fp/pipe';
import isObject from 'lodash/isObject'; import isObject from 'lodash/isObject';
import { import {
AgentDialogueMode,
CategorizeAnchorPointPositions, CategorizeAnchorPointPositions,
FileType, FileType,
FileTypeSuffixMap, FileTypeSuffixMap,
@ -34,6 +35,7 @@ import {
Operator, Operator,
TypesWithArray, TypesWithArray,
} from './constant'; } from './constant';
import { BeginFormSchemaType } from './form/begin-form';
import { DataOperationsFormSchemaType } from './form/data-operations-form'; import { DataOperationsFormSchemaType } from './form/data-operations-form';
import { ExtractorFormSchemaType } from './form/extractor-form'; import { ExtractorFormSchemaType } from './form/extractor-form';
import { HierarchicalMergerFormSchemaType } from './form/hierarchical-merger-form'; import { HierarchicalMergerFormSchemaType } from './form/hierarchical-merger-form';
@ -312,6 +314,41 @@ function transformDataOperationsParams(params: DataOperationsFormSchemaType) {
}; };
} }
export function transformArrayToObject(
list?: Array<{ key: string; value: string }>,
) {
if (!Array.isArray(list)) return {};
return list?.reduce<Record<string, any>>((pre, cur) => {
if (cur.key) {
pre[cur.key] = cur.value;
}
return pre;
}, {});
}
function transformBeginParams(params: BeginFormSchemaType) {
if (params.mode === AgentDialogueMode.Webhook) {
return {
...params,
security: {
...params.security,
ip_whitelist: params.security?.ip_whitelist.map((x) => x.value),
},
response: {
...params.response,
headers_template: transformArrayToObject(
params.response?.headers_template,
),
body_template: transformArrayToObject(params.response?.body_template),
},
};
}
return {
...params,
};
}
// construct a dsl based on the node information of the graph // construct a dsl based on the node information of the graph
export const buildDslComponentsByGraph = ( export const buildDslComponentsByGraph = (
nodes: RAGFlowNodeType[], nodes: RAGFlowNodeType[],
@ -361,6 +398,9 @@ export const buildDslComponentsByGraph = (
case Operator.DataOperations: case Operator.DataOperations:
params = transformDataOperationsParams(params); params = transformDataOperationsParams(params);
break; break;
case Operator.Begin:
params = transformBeginParams(params);
break;
default: default:
break; break;
} }

View file

@ -7,14 +7,18 @@ import { z } from 'zod';
import { CrossLanguageFormField } from '@/components/cross-language-form-field'; import { CrossLanguageFormField } from '@/components/cross-language-form-field';
import { FormContainer } from '@/components/form-container'; import { FormContainer } from '@/components/form-container';
import { import {
initialTopKValue, MetadataFilter,
MetadataFilterSchema,
} from '@/components/metadata-filter';
import {
RerankFormFields, RerankFormFields,
initialTopKValue,
topKSchema, topKSchema,
} from '@/components/rerank'; } from '@/components/rerank';
import { import {
SimilaritySliderFormField,
initialSimilarityThresholdValue, initialSimilarityThresholdValue,
initialVectorSimilarityWeightValue, initialVectorSimilarityWeightValue,
SimilaritySliderFormField,
similarityThresholdSchema, similarityThresholdSchema,
vectorSimilarityWeightSchema, vectorSimilarityWeightSchema,
} from '@/components/similarity-slider'; } from '@/components/similarity-slider';
@ -33,6 +37,7 @@ import { trim } from 'lodash';
import { Send } from 'lucide-react'; import { Send } from 'lucide-react';
import { useEffect } from 'react'; import { useEffect } from 'react';
import { useTranslation } from 'react-i18next'; import { useTranslation } from 'react-i18next';
import { useParams } from 'umi';
type TestingFormProps = Pick< type TestingFormProps = Pick<
ReturnType<typeof useTestRetrieval>, ReturnType<typeof useTestRetrieval>,
@ -45,6 +50,8 @@ export default function TestingForm({
setValues, setValues,
}: TestingFormProps) { }: TestingFormProps) {
const { t } = useTranslation(); const { t } = useTranslation();
const { id } = useParams();
const knowledgeBaseId = id;
const formSchema = z.object({ const formSchema = z.object({
question: z.string().min(1, { question: z.string().min(1, {
@ -54,6 +61,8 @@ export default function TestingForm({
...vectorSimilarityWeightSchema, ...vectorSimilarityWeightSchema,
...topKSchema, ...topKSchema,
use_kg: z.boolean().optional(), use_kg: z.boolean().optional(),
kb_ids: z.array(z.string()).optional(),
...MetadataFilterSchema,
}); });
const form = useForm<z.infer<typeof formSchema>>({ const form = useForm<z.infer<typeof formSchema>>({
@ -63,6 +72,7 @@ export default function TestingForm({
...initialVectorSimilarityWeightValue, ...initialVectorSimilarityWeightValue,
...initialTopKValue, ...initialTopKValue,
use_kg: false, use_kg: false,
kb_ids: [knowledgeBaseId],
}, },
}); });
@ -90,6 +100,7 @@ export default function TestingForm({
<CrossLanguageFormField <CrossLanguageFormField
name={'cross_languages'} name={'cross_languages'}
></CrossLanguageFormField> ></CrossLanguageFormField>
<MetadataFilter prefix=""></MetadataFilter>
</FormContainer> </FormContainer>
<FormField <FormField
control={form.control} control={form.control}

View file

@ -202,7 +202,7 @@ export default function SearchingView({
<div className="w-full flex flex-col"> <div className="w-full flex flex-col">
<div className="w-full highlightContent"> <div className="w-full highlightContent">
<ImageWithPopover <ImageWithPopover
id={chunk.img_id} id={chunk.image_id || chunk.img_id}
></ImageWithPopover> ></ImageWithPopover>
<Popover> <Popover>
<PopoverTrigger asChild> <PopoverTrigger asChild>