Merge remote-tracking branch 'upstream/main' into f016

This commit is contained in:
buua436 2025-12-11 14:58:58 +08:00
commit 91a81b735d
57 changed files with 2079 additions and 265 deletions

View file

@ -1,4 +1,6 @@
name: tests
permissions:
contents: read
on:
push:

View file

@ -351,7 +351,7 @@ class AdminCLI(Cmd):
def verify_admin(self, arguments: dict, single_command: bool):
self.host = arguments['host']
self.port = arguments['port']
print(f"Attempt to access ip: {self.host}, port: {self.port}")
print("Attempt to access server for admin login")
url = f"http://{self.host}:{self.port}/api/v1/admin/login"
attempt_count = 3
@ -390,7 +390,7 @@ class AdminCLI(Cmd):
print(f"Bad responsestatus: {response.status_code}, password is wrong")
except Exception as e:
print(str(e))
print(f"Can't access {self.host}, port: {self.port}")
print("Can't access server for admin login (connection failed)")
def _format_service_detail_table(self, data):
if isinstance(data, list):
@ -674,7 +674,7 @@ class AdminCLI(Cmd):
user_name: str = user_name_tree.children[0].strip("'\"")
password_tree: Tree = command['password']
password: str = password_tree.children[0].strip("'\"")
print(f"Alter user: {user_name}, password: {password}")
print(f"Alter user: {user_name}, password: ******")
url = f'http://{self.host}:{self.port}/api/v1/admin/users/{user_name}/password'
response = self.session.put(url, json={'new_password': encrypt(password)})
res_json = response.json()
@ -689,7 +689,7 @@ class AdminCLI(Cmd):
password_tree: Tree = command['password']
password: str = password_tree.children[0].strip("'\"")
role: str = command['role']
print(f"Create user: {user_name}, password: {password}, role: {role}")
print(f"Create user: {user_name}, password: ******, role: {role}")
url = f'http://{self.host}:{self.port}/api/v1/admin/users'
response = self.session.post(
url,
@ -951,7 +951,7 @@ def main():
args = cli.parse_connection_args(sys.argv)
if 'error' in args:
print(f"Error: {args['error']}")
print("Error: Invalid connection arguments")
return
if 'command' in args:
@ -960,7 +960,7 @@ def main():
return
if cli.verify_admin(args, single_command=True):
command: str = args['command']
print(f"Run single command: {command}")
# print(f"Run single command: {command}")
cli.run_single_command(command)
else:
if cli.verify_admin(args, single_command=False):

View file

@ -176,11 +176,11 @@ def login_verify(f):
"message": "Access denied",
"data": None
}), 200
except Exception as e:
error_msg = str(e)
except Exception:
logging.exception("An error occurred during admin login verification.")
return jsonify({
"code": 500,
"message": error_msg
"message": "An internal server error occurred."
}), 200
return f(*args, **kwargs)

View file

@ -136,6 +136,16 @@ class Retrieval(ToolBase, ABC):
doc_ids.extend(meta_filter(metas, filters["conditions"], filters.get("logic", "and")))
if not doc_ids:
doc_ids = None
elif self._param.meta_data_filter.get("method") == "semi_auto":
selected_keys = self._param.meta_data_filter.get("semi_auto", [])
if selected_keys:
filtered_metas = {key: metas[key] for key in selected_keys if key in metas}
if filtered_metas:
chat_mdl = LLMBundle(self._canvas.get_tenant_id(), LLMType.CHAT)
filters: dict = gen_meta_filter(chat_mdl, filtered_metas, query)
doc_ids.extend(meta_filter(metas, filters["conditions"], filters.get("logic", "and")))
if not doc_ids:
doc_ids = None
elif self._param.meta_data_filter.get("method") == "manual":
filters = self._param.meta_data_filter["manual"]
for flt in filters:

View file

@ -342,7 +342,15 @@ async def test_db_connect():
f"UID={req['username']};"
f"PWD={req['password']};"
)
logging.info(conn_str)
redacted_conn_str = (
f"DATABASE={req['database']};"
f"HOSTNAME={req['host']};"
f"PORT={req['port']};"
f"PROTOCOL=TCPIP;"
f"UID={req['username']};"
f"PWD=****;"
)
logging.info(redacted_conn_str)
conn = ibm_db.connect(conn_str, "", "")
stmt = ibm_db.exec_immediate(conn, "SELECT 1 FROM sysibm.sysdummy1")
ibm_db.fetch_assoc(stmt)

View file

@ -327,10 +327,44 @@ async def retrieval_test():
local_doc_ids.extend(meta_filter(metas, filters["conditions"], filters.get("logic", "and")))
if not local_doc_ids:
local_doc_ids = None
elif meta_data_filter.get("method") == "semi_auto":
selected_keys = meta_data_filter.get("semi_auto", [])
if selected_keys:
filtered_metas = {key: metas[key] for key in selected_keys if key in metas}
if filtered_metas:
chat_mdl = LLMBundle(user_id, LLMType.CHAT, llm_name=search_config.get("chat_id", ""))
filters: dict = gen_meta_filter(chat_mdl, filtered_metas, question)
local_doc_ids.extend(meta_filter(metas, filters["conditions"], filters.get("logic", "and")))
if not local_doc_ids:
local_doc_ids = None
elif meta_data_filter.get("method") == "manual":
local_doc_ids.extend(meta_filter(metas, meta_data_filter["manual"], meta_data_filter.get("logic", "and")))
if meta_data_filter["manual"] and not local_doc_ids:
local_doc_ids = ["-999"]
else:
meta_data_filter = req.get("meta_data_filter")
if meta_data_filter:
metas = DocumentService.get_meta_by_kbs(kb_ids)
if meta_data_filter.get("method") == "auto":
chat_mdl = LLMBundle(user_id, LLMType.CHAT)
filters: dict = gen_meta_filter(chat_mdl, metas, question)
local_doc_ids.extend(meta_filter(metas, filters["conditions"], filters.get("logic", "and")))
if not local_doc_ids:
local_doc_ids = None
elif meta_data_filter.get("method") == "semi_auto":
selected_keys = meta_data_filter.get("semi_auto", [])
if selected_keys:
filtered_metas = {key: metas[key] for key in selected_keys if key in metas}
if filtered_metas:
chat_mdl = LLMBundle(user_id, LLMType.CHAT)
filters: dict = gen_meta_filter(chat_mdl, filtered_metas, question)
local_doc_ids.extend(meta_filter(metas, filters["conditions"], filters.get("logic", "and")))
if not local_doc_ids:
local_doc_ids = None
elif meta_data_filter.get("method") == "manual":
local_doc_ids.extend(meta_filter(metas, meta_data_filter["manual"], meta_data_filter.get("logic", "and")))
if meta_data_filter["manual"] and not local_doc_ids:
local_doc_ids = ["-999"]
tenants = UserTenantService.query(user_id=user_id)
for kb_id in kb_ids:

View file

@ -435,7 +435,7 @@ async def mindmap():
kb_ids.extend(req["kb_ids"])
kb_ids = list(set(kb_ids))
mind_map = gen_mindmap(req["question"], kb_ids, search_app.get("tenant_id", current_user.id), search_config)
mind_map = await gen_mindmap(req["question"], kb_ids, search_app.get("tenant_id", current_user.id), search_config)
if "error" in mind_map:
return server_error_response(Exception(mind_map["error"]))
return get_json_result(data=mind_map)

View file

@ -27,6 +27,7 @@ from api.db import VALID_FILE_TYPES, FileType
from api.db.db_models import Task
from api.db.services import duplicate_name
from api.db.services.document_service import DocumentService, doc_upload_and_parse
from api.db.services.dialog_service import meta_filter, convert_conditions
from api.db.services.file2document_service import File2DocumentService
from api.db.services.file_service import FileService
from api.db.services.knowledgebase_service import KnowledgebaseService
@ -246,9 +247,19 @@ async def list_docs():
return get_data_error_result(message=f"Invalid filter conditions: {', '.join(invalid_types)} type{'s' if len(invalid_types) > 1 else ''}")
suffix = req.get("suffix", [])
metadata_condition = req.get("metadata_condition", {}) or {}
if metadata_condition and not isinstance(metadata_condition, dict):
return get_data_error_result(message="metadata_condition must be an object.")
doc_ids_filter = None
if metadata_condition:
metas = DocumentService.get_flatted_meta_by_kbs([kb_id])
doc_ids_filter = meta_filter(metas, convert_conditions(metadata_condition), metadata_condition.get("logic", "and"))
if metadata_condition.get("conditions") and not doc_ids_filter:
return get_json_result(data={"total": 0, "docs": []})
try:
docs, tol = DocumentService.get_by_kb_id(kb_id, page_number, items_per_page, orderby, desc, keywords, run_status, types, suffix)
docs, tol = DocumentService.get_by_kb_id(kb_id, page_number, items_per_page, orderby, desc, keywords, run_status, types, suffix, doc_ids_filter)
if create_time_from or create_time_to:
filtered_docs = []
@ -319,6 +330,87 @@ async def doc_infos():
return get_json_result(data=list(docs.dicts()))
@manager.route("/metadata/summary", methods=["POST"]) # noqa: F821
@login_required
async def metadata_summary():
req = await get_request_json()
kb_id = req.get("kb_id")
if not kb_id:
return get_json_result(data=False, message='Lack of "KB ID"', code=RetCode.ARGUMENT_ERROR)
tenants = UserTenantService.query(user_id=current_user.id)
for tenant in tenants:
if KnowledgebaseService.query(tenant_id=tenant.tenant_id, id=kb_id):
break
else:
return get_json_result(data=False, message="Only owner of knowledgebase authorized for this operation.", code=RetCode.OPERATING_ERROR)
try:
summary = DocumentService.get_metadata_summary(kb_id)
return get_json_result(data={"summary": summary})
except Exception as e:
return server_error_response(e)
@manager.route("/metadata/update", methods=["POST"]) # noqa: F821
@login_required
async def metadata_update():
req = await get_request_json()
kb_id = req.get("kb_id")
if not kb_id:
return get_json_result(data=False, message='Lack of "KB ID"', code=RetCode.ARGUMENT_ERROR)
tenants = UserTenantService.query(user_id=current_user.id)
for tenant in tenants:
if KnowledgebaseService.query(tenant_id=tenant.tenant_id, id=kb_id):
break
else:
return get_json_result(data=False, message="Only owner of knowledgebase authorized for this operation.", code=RetCode.OPERATING_ERROR)
selector = req.get("selector", {}) or {}
updates = req.get("updates", []) or []
deletes = req.get("deletes", []) or []
if not isinstance(selector, dict):
return get_json_result(data=False, message="selector must be an object.", code=RetCode.ARGUMENT_ERROR)
if not isinstance(updates, list) or not isinstance(deletes, list):
return get_json_result(data=False, message="updates and deletes must be lists.", code=RetCode.ARGUMENT_ERROR)
metadata_condition = selector.get("metadata_condition", {}) or {}
if metadata_condition and not isinstance(metadata_condition, dict):
return get_json_result(data=False, message="metadata_condition must be an object.", code=RetCode.ARGUMENT_ERROR)
document_ids = selector.get("document_ids", []) or []
if document_ids and not isinstance(document_ids, list):
return get_json_result(data=False, message="document_ids must be a list.", code=RetCode.ARGUMENT_ERROR)
for upd in updates:
if not isinstance(upd, dict) or not upd.get("key") or "value" not in upd:
return get_json_result(data=False, message="Each update requires key and value.", code=RetCode.ARGUMENT_ERROR)
for d in deletes:
if not isinstance(d, dict) or not d.get("key"):
return get_json_result(data=False, message="Each delete requires key.", code=RetCode.ARGUMENT_ERROR)
kb_doc_ids = KnowledgebaseService.list_documents_by_ids([kb_id])
target_doc_ids = set(kb_doc_ids)
if document_ids:
invalid_ids = set(document_ids) - set(kb_doc_ids)
if invalid_ids:
return get_json_result(data=False, message=f"These documents do not belong to dataset {kb_id}: {', '.join(invalid_ids)}", code=RetCode.ARGUMENT_ERROR)
target_doc_ids = set(document_ids)
if metadata_condition:
metas = DocumentService.get_flatted_meta_by_kbs([kb_id])
filtered_ids = set(meta_filter(metas, convert_conditions(metadata_condition), metadata_condition.get("logic", "and")))
target_doc_ids = target_doc_ids & filtered_ids
if metadata_condition.get("conditions") and not target_doc_ids:
return get_json_result(data={"updated": 0, "matched_docs": 0})
target_doc_ids = list(target_doc_ids)
updated = DocumentService.batch_update_metadata(kb_id, target_doc_ids, updates, deletes)
return get_json_result(data={"updated": updated, "matched_docs": len(target_doc_ids)})
@manager.route("/thumbnails", methods=["GET"]) # noqa: F821
# @login_required
def thumbnails():
@ -698,7 +790,10 @@ async def set_meta():
if not isinstance(meta, dict):
return get_json_result(data=False, message="Only dictionary type supported.", code=RetCode.ARGUMENT_ERROR)
for k, v in meta.items():
if not isinstance(v, str) and not isinstance(v, int) and not isinstance(v, float):
if isinstance(v, list):
if not all(isinstance(i, (str, int, float)) for i in v):
return get_json_result(data=False, message=f"The type is not supported in list: {v}", code=RetCode.ARGUMENT_ERROR)
elif not isinstance(v, (str, int, float)):
return get_json_result(data=False, message=f"The type is not supported: {v}", code=RetCode.ARGUMENT_ERROR)
except Exception as e:
return get_json_result(data=False, message=f"Json syntax error: {e}", code=RetCode.ARGUMENT_ERROR)

View file

@ -14,6 +14,7 @@
# limitations under the License.
#
import datetime
import json
import logging
import pathlib
import re
@ -551,13 +552,29 @@ def list_docs(dataset_id, tenant_id):
run_status = q.getlist("run")
create_time_from = int(q.get("create_time_from", 0))
create_time_to = int(q.get("create_time_to", 0))
metadata_condition_raw = q.get("metadata_condition")
metadata_condition = {}
if metadata_condition_raw:
try:
metadata_condition = json.loads(metadata_condition_raw)
except Exception:
return get_error_data_result(message="metadata_condition must be valid JSON.")
if metadata_condition and not isinstance(metadata_condition, dict):
return get_error_data_result(message="metadata_condition must be an object.")
# map run status (text or numeric) - align with API parameter
run_status_text_to_numeric = {"UNSTART": "0", "RUNNING": "1", "CANCEL": "2", "DONE": "3", "FAIL": "4"}
run_status_converted = [run_status_text_to_numeric.get(v, v) for v in run_status]
doc_ids_filter = None
if metadata_condition:
metas = DocumentService.get_flatted_meta_by_kbs([dataset_id])
doc_ids_filter = meta_filter(metas, convert_conditions(metadata_condition), metadata_condition.get("logic", "and"))
if metadata_condition.get("conditions") and not doc_ids_filter:
return get_result(data={"total": 0, "docs": []})
docs, total = DocumentService.get_list(
dataset_id, page, page_size, orderby, desc, keywords, document_id, name, suffix, run_status_converted
dataset_id, page, page_size, orderby, desc, keywords, document_id, name, suffix, run_status_converted, doc_ids_filter
)
# time range filter (0 means no bound)
@ -586,6 +603,70 @@ def list_docs(dataset_id, tenant_id):
return get_result(data={"total": total, "docs": output_docs})
@manager.route("/datasets/<dataset_id>/metadata/summary", methods=["GET"]) # noqa: F821
@token_required
def metadata_summary(dataset_id, tenant_id):
if not KnowledgebaseService.accessible(kb_id=dataset_id, user_id=tenant_id):
return get_error_data_result(message=f"You don't own the dataset {dataset_id}. ")
try:
summary = DocumentService.get_metadata_summary(dataset_id)
return get_result(data={"summary": summary})
except Exception as e:
return server_error_response(e)
@manager.route("/datasets/<dataset_id>/metadata/update", methods=["POST"]) # noqa: F821
@token_required
async def metadata_batch_update(dataset_id, tenant_id):
if not KnowledgebaseService.accessible(kb_id=dataset_id, user_id=tenant_id):
return get_error_data_result(message=f"You don't own the dataset {dataset_id}. ")
req = await get_request_json()
selector = req.get("selector", {}) or {}
updates = req.get("updates", []) or []
deletes = req.get("deletes", []) or []
if not isinstance(selector, dict):
return get_error_data_result(message="selector must be an object.")
if not isinstance(updates, list) or not isinstance(deletes, list):
return get_error_data_result(message="updates and deletes must be lists.")
metadata_condition = selector.get("metadata_condition", {}) or {}
if metadata_condition and not isinstance(metadata_condition, dict):
return get_error_data_result(message="metadata_condition must be an object.")
document_ids = selector.get("document_ids", []) or []
if document_ids and not isinstance(document_ids, list):
return get_error_data_result(message="document_ids must be a list.")
for upd in updates:
if not isinstance(upd, dict) or not upd.get("key") or "value" not in upd:
return get_error_data_result(message="Each update requires key and value.")
for d in deletes:
if not isinstance(d, dict) or not d.get("key"):
return get_error_data_result(message="Each delete requires key.")
kb_doc_ids = KnowledgebaseService.list_documents_by_ids([dataset_id])
target_doc_ids = set(kb_doc_ids)
if document_ids:
invalid_ids = set(document_ids) - set(kb_doc_ids)
if invalid_ids:
return get_error_data_result(message=f"These documents do not belong to dataset {dataset_id}: {', '.join(invalid_ids)}")
target_doc_ids = set(document_ids)
if metadata_condition:
metas = DocumentService.get_flatted_meta_by_kbs([dataset_id])
filtered_ids = set(meta_filter(metas, convert_conditions(metadata_condition), metadata_condition.get("logic", "and")))
target_doc_ids = target_doc_ids & filtered_ids
if metadata_condition.get("conditions") and not target_doc_ids:
return get_result(data={"updated": 0, "matched_docs": 0})
target_doc_ids = list(target_doc_ids)
updated = DocumentService.batch_update_metadata(dataset_id, target_doc_ids, updates, deletes)
return get_result(data={"updated": updated, "matched_docs": len(target_doc_ids)})
@manager.route("/datasets/<dataset_id>/documents", methods=["DELETE"]) # noqa: F821
@token_required
async def delete(tenant_id, dataset_id):

View file

@ -984,10 +984,44 @@ async def retrieval_test_embedded():
local_doc_ids.extend(meta_filter(metas, filters["conditions"], filters.get("logic", "and")))
if not local_doc_ids:
local_doc_ids = None
elif meta_data_filter.get("method") == "semi_auto":
selected_keys = meta_data_filter.get("semi_auto", [])
if selected_keys:
filtered_metas = {key: metas[key] for key in selected_keys if key in metas}
if filtered_metas:
chat_mdl = LLMBundle(tenant_id, LLMType.CHAT, llm_name=search_config.get("chat_id", ""))
filters: dict = gen_meta_filter(chat_mdl, filtered_metas, _question)
local_doc_ids.extend(meta_filter(metas, filters["conditions"], filters.get("logic", "and")))
if not local_doc_ids:
local_doc_ids = None
elif meta_data_filter.get("method") == "manual":
local_doc_ids.extend(meta_filter(metas, meta_data_filter["manual"], meta_data_filter.get("logic", "and")))
if meta_data_filter["manual"] and not local_doc_ids:
local_doc_ids = ["-999"]
else:
meta_data_filter = req.get("meta_data_filter")
if meta_data_filter:
metas = DocumentService.get_meta_by_kbs(kb_ids)
if meta_data_filter.get("method") == "auto":
chat_mdl = LLMBundle(tenant_id, LLMType.CHAT)
filters: dict = gen_meta_filter(chat_mdl, metas, question)
local_doc_ids.extend(meta_filter(metas, filters["conditions"], filters.get("logic", "and")))
if not local_doc_ids:
local_doc_ids = None
elif meta_data_filter.get("method") == "semi_auto":
selected_keys = meta_data_filter.get("semi_auto", [])
if selected_keys:
filtered_metas = {key: metas[key] for key in selected_keys if key in metas}
if filtered_metas:
chat_mdl = LLMBundle(tenant_id, LLMType.CHAT)
filters: dict = gen_meta_filter(chat_mdl, filtered_metas, question)
local_doc_ids.extend(meta_filter(metas, filters["conditions"], filters.get("logic", "and")))
if not local_doc_ids:
local_doc_ids = None
elif meta_data_filter.get("method") == "manual":
local_doc_ids.extend(meta_filter(metas, meta_data_filter["manual"], meta_data_filter.get("logic", "and")))
if meta_data_filter["manual"] and not local_doc_ids:
local_doc_ids = ["-999"]
tenants = UserTenantService.query(user_id=tenant_id)
for kb_id in kb_ids:
@ -1135,7 +1169,7 @@ async def mindmap():
search_id = req.get("search_id", "")
search_app = SearchService.get_detail(search_id) if search_id else {}
mind_map = gen_mindmap(req["question"], req["kb_ids"], tenant_id, search_app.get("search_config", {}))
mind_map =await gen_mindmap(req["question"], req["kb_ids"], tenant_id, search_app.get("search_config", {}))
if "error" in mind_map:
return server_error_response(Exception(mind_map["error"]))
return get_json_result(data=mind_map)

View file

@ -73,7 +73,7 @@ def init_superuser(nickname=DEFAULT_SUPERUSER_NICKNAME, email=DEFAULT_SUPERUSER_
UserTenantService.insert(**usr_tenant)
TenantLLMService.insert_many(tenant_llm)
logging.info(
f"Super user initialized. email: {email}, password: {password}. Changing the password after login is strongly recommended.")
f"Super user initialized. email: {email},A default password has been set; changing the password after login is strongly recommended.")
chat_mdl = LLMBundle(tenant["id"], LLMType.CHAT, tenant["llm_id"])
msg = chat_mdl.chat(system="", history=[

View file

@ -273,7 +273,7 @@ def delete_user_data(user_id: str) -> dict:
except Exception as e:
logging.exception(e)
return {"success": False, "message": f"Error: {str(e)}. Already done:\n{done_msg}"}
return {"success": False, "message": "An internal error occurred during user deletion. Some operations may have completed.","details": done_msg}
def delete_user_agents(user_id: str) -> dict:

View file

@ -13,7 +13,6 @@
# See the License for the specific language governing permissions and
# limitations under the License.
#
import asyncio
import binascii
import logging
import re
@ -426,6 +425,15 @@ async def async_chat(dialog, messages, stream=True, **kwargs):
attachments.extend(meta_filter(metas, filters["conditions"], filters.get("logic", "and")))
if not attachments:
attachments = None
elif dialog.meta_data_filter.get("method") == "semi_auto":
selected_keys = dialog.meta_data_filter.get("semi_auto", [])
if selected_keys:
filtered_metas = {key: metas[key] for key in selected_keys if key in metas}
if filtered_metas:
filters: dict = gen_meta_filter(chat_mdl, filtered_metas, questions[-1])
attachments.extend(meta_filter(metas, filters["conditions"], filters.get("logic", "and")))
if not attachments:
attachments = None
elif dialog.meta_data_filter.get("method") == "manual":
conds = dialog.meta_data_filter["manual"]
attachments.extend(meta_filter(metas, conds, dialog.meta_data_filter.get("logic", "and")))
@ -835,6 +843,15 @@ async def async_ask(question, kb_ids, tenant_id, chat_llm_name=None, search_conf
doc_ids.extend(meta_filter(metas, filters["conditions"], filters.get("logic", "and")))
if not doc_ids:
doc_ids = None
elif meta_data_filter.get("method") == "semi_auto":
selected_keys = meta_data_filter.get("semi_auto", [])
if selected_keys:
filtered_metas = {key: metas[key] for key in selected_keys if key in metas}
if filtered_metas:
filters: dict = gen_meta_filter(chat_mdl, filtered_metas, question)
doc_ids.extend(meta_filter(metas, filters["conditions"], filters.get("logic", "and")))
if not doc_ids:
doc_ids = None
elif meta_data_filter.get("method") == "manual":
doc_ids.extend(meta_filter(metas, meta_data_filter["manual"], meta_data_filter.get("logic", "and")))
if meta_data_filter["manual"] and not doc_ids:
@ -887,7 +904,7 @@ async def async_ask(question, kb_ids, tenant_id, chat_llm_name=None, search_conf
yield decorate_answer(answer)
def gen_mindmap(question, kb_ids, tenant_id, search_config={}):
async def gen_mindmap(question, kb_ids, tenant_id, search_config={}):
meta_data_filter = search_config.get("meta_data_filter", {})
doc_ids = search_config.get("doc_ids", [])
rerank_id = search_config.get("rerank_id", "")
@ -910,6 +927,15 @@ def gen_mindmap(question, kb_ids, tenant_id, search_config={}):
doc_ids.extend(meta_filter(metas, filters["conditions"], filters.get("logic", "and")))
if not doc_ids:
doc_ids = None
elif meta_data_filter.get("method") == "semi_auto":
selected_keys = meta_data_filter.get("semi_auto", [])
if selected_keys:
filtered_metas = {key: metas[key] for key in selected_keys if key in metas}
if filtered_metas:
filters: dict = gen_meta_filter(chat_mdl, filtered_metas, question)
doc_ids.extend(meta_filter(metas, filters["conditions"], filters.get("logic", "and")))
if not doc_ids:
doc_ids = None
elif meta_data_filter.get("method") == "manual":
doc_ids.extend(meta_filter(metas, meta_data_filter["manual"], meta_data_filter.get("logic", "and")))
if meta_data_filter["manual"] and not doc_ids:
@ -931,5 +957,5 @@ def gen_mindmap(question, kb_ids, tenant_id, search_config={}):
rank_feature=label_question(question, kbs),
)
mindmap = MindMapExtractor(chat_mdl)
mind_map = asyncio.run(mindmap([c["content_with_weight"] for c in ranks["chunks"]]))
mind_map = await mindmap([c["content_with_weight"] for c in ranks["chunks"]])
return mind_map.output

View file

@ -79,7 +79,7 @@ class DocumentService(CommonService):
@classmethod
@DB.connection_context()
def get_list(cls, kb_id, page_number, items_per_page,
orderby, desc, keywords, id, name, suffix=None, run = None):
orderby, desc, keywords, id, name, suffix=None, run = None, doc_ids=None):
fields = cls.get_cls_model_fields()
docs = cls.model.select(*[*fields, UserCanvas.title]).join(File2Document, on = (File2Document.document_id == cls.model.id))\
.join(File, on = (File.id == File2Document.file_id))\
@ -96,6 +96,8 @@ class DocumentService(CommonService):
docs = docs.where(
fn.LOWER(cls.model.name).contains(keywords.lower())
)
if doc_ids:
docs = docs.where(cls.model.id.in_(doc_ids))
if suffix:
docs = docs.where(cls.model.suffix.in_(suffix))
if run:
@ -123,7 +125,7 @@ class DocumentService(CommonService):
@classmethod
@DB.connection_context()
def get_by_kb_id(cls, kb_id, page_number, items_per_page,
orderby, desc, keywords, run_status, types, suffix):
orderby, desc, keywords, run_status, types, suffix, doc_ids=None):
fields = cls.get_cls_model_fields()
if keywords:
docs = cls.model.select(*[*fields, UserCanvas.title.alias("pipeline_name"), User.nickname])\
@ -143,6 +145,8 @@ class DocumentService(CommonService):
.join(User, on=(cls.model.created_by == User.id), join_type=JOIN.LEFT_OUTER)\
.where(cls.model.kb_id == kb_id)
if doc_ids:
docs = docs.where(cls.model.id.in_(doc_ids))
if run_status:
docs = docs.where(cls.model.run.in_(run_status))
if types:
@ -644,6 +648,13 @@ class DocumentService(CommonService):
@classmethod
@DB.connection_context()
def get_meta_by_kbs(cls, kb_ids):
"""
Legacy metadata aggregator (backward-compatible).
- Does NOT expand list values and a list is kept as one string key.
Example: {"tags": ["foo","bar"]} -> meta["tags"]["['foo', 'bar']"] = [doc_id]
- Expects meta_fields is a dict.
Use when existing callers rely on the old list-as-string semantics.
"""
fields = [
cls.model.id,
cls.model.meta_fields,
@ -660,6 +671,162 @@ class DocumentService(CommonService):
meta[k][v].append(doc_id)
return meta
@classmethod
@DB.connection_context()
def get_flatted_meta_by_kbs(cls, kb_ids):
"""
- Parses stringified JSON meta_fields when possible and skips non-dict or unparsable values.
- Expands list values into individual entries.
Example: {"tags": ["foo","bar"], "author": "alice"} ->
meta["tags"]["foo"] = [doc_id], meta["tags"]["bar"] = [doc_id], meta["author"]["alice"] = [doc_id]
Prefer for metadata_condition filtering and scenarios that must respect list semantics.
"""
fields = [
cls.model.id,
cls.model.meta_fields,
]
meta = {}
for r in cls.model.select(*fields).where(cls.model.kb_id.in_(kb_ids)):
doc_id = r.id
meta_fields = r.meta_fields or {}
if isinstance(meta_fields, str):
try:
meta_fields = json.loads(meta_fields)
except Exception:
continue
if not isinstance(meta_fields, dict):
continue
for k, v in meta_fields.items():
if k not in meta:
meta[k] = {}
values = v if isinstance(v, list) else [v]
for vv in values:
if vv is None:
continue
sv = str(vv)
if sv not in meta[k]:
meta[k][sv] = []
meta[k][sv].append(doc_id)
return meta
@classmethod
@DB.connection_context()
def get_metadata_summary(cls, kb_id):
fields = [cls.model.id, cls.model.meta_fields]
summary = {}
for r in cls.model.select(*fields).where(cls.model.kb_id == kb_id):
meta_fields = r.meta_fields or {}
if isinstance(meta_fields, str):
try:
meta_fields = json.loads(meta_fields)
except Exception:
continue
if not isinstance(meta_fields, dict):
continue
for k, v in meta_fields.items():
values = v if isinstance(v, list) else [v]
for vv in values:
if not vv:
continue
sv = str(vv)
if k not in summary:
summary[k] = {}
summary[k][sv] = summary[k].get(sv, 0) + 1
return {k: sorted([(val, cnt) for val, cnt in v.items()], key=lambda x: x[1], reverse=True) for k, v in summary.items()}
@classmethod
@DB.connection_context()
def batch_update_metadata(cls, kb_id, doc_ids, updates=None, deletes=None):
updates = updates or []
deletes = deletes or []
if not doc_ids:
return 0
def _normalize_meta(meta):
if isinstance(meta, str):
try:
meta = json.loads(meta)
except Exception:
return {}
if not isinstance(meta, dict):
return {}
return deepcopy(meta)
def _str_equal(a, b):
return str(a) == str(b)
def _apply_updates(meta):
changed = False
for upd in updates:
key = upd.get("key")
if not key or key not in meta:
continue
new_value = upd.get("value")
match_value = upd.get("match", new_value)
if isinstance(meta[key], list):
replaced = False
new_list = []
for item in meta[key]:
if match_value and _str_equal(item, match_value):
new_list.append(new_value)
replaced = True
else:
new_list.append(item)
if replaced:
meta[key] = new_list
changed = True
else:
if not match_value:
continue
if _str_equal(meta[key], match_value):
meta[key] = new_value
changed = True
return changed
def _apply_deletes(meta):
changed = False
for d in deletes:
key = d.get("key")
if not key or key not in meta:
continue
value = d.get("value", None)
if isinstance(meta[key], list):
if value is None:
del meta[key]
changed = True
continue
new_list = [item for item in meta[key] if not _str_equal(item, value)]
if len(new_list) != len(meta[key]):
if new_list:
meta[key] = new_list
else:
del meta[key]
changed = True
else:
if value is None or _str_equal(meta[key], value):
del meta[key]
changed = True
return changed
updated_docs = 0
with DB.atomic():
rows = cls.model.select(cls.model.id, cls.model.meta_fields).where(
(cls.model.id.in_(doc_ids)) & (cls.model.kb_id == kb_id)
)
for r in rows:
meta = _normalize_meta(r.meta_fields or {})
original_meta = deepcopy(meta)
changed = _apply_updates(meta)
changed = _apply_deletes(meta) or changed
if changed and meta != original_meta:
cls.model.update(
meta_fields=meta,
update_time=current_timestamp(),
update_date=get_format_time()
).where(cls.model.id == r.id).execute()
updated_docs += 1
return updated_docs
@classmethod
@DB.connection_context()
def update_progress(cls):

View file

@ -109,7 +109,7 @@ class LLMBundle(LLM4Tenant):
llm_name = getattr(self, "llm_name", None)
if not TenantLLMService.increase_usage(self.tenant_id, self.llm_type, used_tokens, llm_name):
logging.error("LLMBundle.encode can't update token usage for {}/EMBEDDING used_tokens: {}".format(self.tenant_id, used_tokens))
logging.error("LLMBundle.encode can't update token usage for <tenant redacted>/EMBEDDING used_tokens: {}".format(used_tokens))
if self.langfuse:
generation.update(usage_details={"total_tokens": used_tokens})
@ -124,7 +124,7 @@ class LLMBundle(LLM4Tenant):
emd, used_tokens = self.mdl.encode_queries(query)
llm_name = getattr(self, "llm_name", None)
if not TenantLLMService.increase_usage(self.tenant_id, self.llm_type, used_tokens, llm_name):
logging.error("LLMBundle.encode_queries can't update token usage for {}/EMBEDDING used_tokens: {}".format(self.tenant_id, used_tokens))
logging.error("LLMBundle.encode_queries can't update token usage for <tenant redacted>/EMBEDDING used_tokens: {}".format(used_tokens))
if self.langfuse:
generation.update(usage_details={"total_tokens": used_tokens})

View file

@ -1110,7 +1110,10 @@ def _make_attachment_link(
) -> str | None:
download_link = ""
if "api.atlassian.com" in confluence_client.url:
from urllib.parse import urlparse
netloc =urlparse(confluence_client.url).hostname
if netloc == "api.atlassian.com" or (netloc and netloc.endswith(".api.atlassian.com")):
# if "api.atlassian.com" in confluence_client.url:
# https://developer.atlassian.com/cloud/confluence/rest/v1/api-group-content---attachments/#api-wiki-rest-api-content-id-child-attachment-attachmentid-download-get
if not parent_content_id:
logging.warning(

View file

@ -135,7 +135,7 @@ class JiraConnector(CheckpointedConnectorWithPermSync, SlimConnectorWithPermSync
except ValueError as exc:
raise ConnectorValidationError(str(exc)) from exc
else:
logger.warning(f"[Jira] Scoped token requested but Jira base URL {self.jira_base_url} does not appear to be an Atlassian Cloud domain; scoped token ignored.")
logger.warning("[Jira] Scoped token requested but Jira base URL does not appear to be an Atlassian Cloud domain; scoped token ignored.")
user_email = credentials.get("jira_user_email") or credentials.get("username")
api_token = credentials.get("jira_api_token") or credentials.get("token") or credentials.get("api_token")
@ -245,7 +245,7 @@ class JiraConnector(CheckpointedConnectorWithPermSync, SlimConnectorWithPermSync
while True:
attempt += 1
jql = self._build_jql(attempt_start, end)
logger.info(f"[Jira] Executing Jira JQL attempt {attempt} (start={attempt_start}, end={end}, buffered_retry={retried_with_buffer}): {jql}")
logger.info(f"[Jira] Executing Jira JQL attempt {attempt} (start={attempt_start}, end={end}, buffered_retry={retried_with_buffer})")
try:
return (yield from self._load_from_checkpoint_internal(jql, checkpoint, start_filter=start))
except Exception as exc:
@ -927,9 +927,6 @@ def main(config: dict[str, Any] | None = None) -> None:
base_url = config.get("base_url")
credentials = config.get("credentials", {})
print(f"[Jira] {config=}", flush=True)
print(f"[Jira] {credentials=}", flush=True)
if not base_url:
raise RuntimeError("Jira base URL must be provided via config or CLI arguments.")
if not (credentials.get("jira_api_token") or (credentials.get("jira_user_email") and credentials.get("jira_password"))):

View file

@ -16,7 +16,9 @@ import logging
import os
import time
from typing import Any, Dict, Optional
from urllib.parse import parse_qsl, urlencode, urlparse, urlunparse
from common import settings
import httpx
logger = logging.getLogger(__name__)
@ -52,6 +54,55 @@ def _get_delay(backoff_factor: float, attempt: int) -> float:
return backoff_factor * (2**attempt)
# List of sensitive parameters to redact from URLs before logging
_SENSITIVE_QUERY_KEYS = {"client_secret", "secret", "code", "access_token", "refresh_token", "password", "token", "app_secret"}
def _redact_sensitive_url_params(url: str) -> str:
try:
parsed = urlparse(url)
if not parsed.query:
return url
clean_query = []
for k, v in parse_qsl(parsed.query, keep_blank_values=True):
if k.lower() in _SENSITIVE_QUERY_KEYS:
clean_query.append((k, "***REDACTED***"))
else:
clean_query.append((k, v))
new_query = urlencode(clean_query, doseq=True)
redacted_url = urlunparse(parsed._replace(query=new_query))
return redacted_url
except Exception:
return url
def _is_sensitive_url(url: str) -> bool:
"""Return True if URL is one of the configured OAuth endpoints."""
# Collect known sensitive endpoint URLs from settings
oauth_urls = set()
# GitHub OAuth endpoints
try:
if settings.GITHUB_OAUTH is not None:
url_val = settings.GITHUB_OAUTH.get("url")
if url_val:
oauth_urls.add(url_val)
except Exception:
pass
# Feishu OAuth endpoints
try:
if settings.FEISHU_OAUTH is not None:
for k in ("app_access_token_url", "user_access_token_url"):
url_val = settings.FEISHU_OAUTH.get(k)
if url_val:
oauth_urls.add(url_val)
except Exception:
pass
# Defensive normalization: compare only scheme+netloc+path
url_obj = urlparse(url)
for sensitive_url in oauth_urls:
sensitive_obj = urlparse(sensitive_url)
if (url_obj.scheme, url_obj.netloc, url_obj.path) == (sensitive_obj.scheme, sensitive_obj.netloc, sensitive_obj.path):
return True
return False
async def async_request(
method: str,
url: str,
@ -93,20 +144,23 @@ async def async_request(
method=method, url=url, headers=headers, **kwargs
)
duration = time.monotonic() - start
log_url = "<SENSITIVE ENDPOINT>" if _is_sensitive_url else _redact_sensitive_url_params(url)
logger.debug(
f"async_request {method} {url} -> {response.status_code} in {duration:.3f}s"
f"async_request {method} {log_url} -> {response.status_code} in {duration:.3f}s"
)
return response
except httpx.RequestError as exc:
last_exc = exc
if attempt >= retries:
log_url = "<SENSITIVE ENDPOINT>" if _is_sensitive_url else _redact_sensitive_url_params(url)
logger.warning(
f"async_request exhausted retries for {method} {url}: {exc}"
f"async_request exhausted retries for {method} {log_url}"
)
raise
delay = _get_delay(backoff_factor, attempt)
log_url = "<SENSITIVE ENDPOINT>" if _is_sensitive_url else _redact_sensitive_url_params(url)
logger.warning(
f"async_request attempt {attempt + 1}/{retries + 1} failed for {method} {url}: {exc}; retrying in {delay:.2f}s"
f"async_request attempt {attempt + 1}/{retries + 1} failed for {method} {log_url}; retrying in {delay:.2f}s"
)
await asyncio.sleep(delay)
raise last_exc # pragma: no cover

View file

@ -369,6 +369,13 @@
"model_type": "chat",
"is_tools": true
},
{
"llm_name": "deepseek-v3.2",
"tags": "LLM,CHAT,128K",
"max_tokens": 128000,
"model_type": "chat",
"is_tools": true
},
{
"llm_name": "deepseek-r1",
"tags": "LLM,CHAT,64K",

View file

@ -106,7 +106,7 @@ class MinerUParser(RAGFlowPdfParser):
def check_installation(self, backend: str = "pipeline", server_url: Optional[str] = None) -> tuple[bool, str]:
reason = ""
valid_backends = ["pipeline", "vlm-http-client", "vlm-transformers", "vlm-vllm-engine"]
valid_backends = ["pipeline", "vlm-http-client", "vlm-transformers", "vlm-vllm-engine", "vlm-mlx-engine"]
if backend not in valid_backends:
reason = "[MinerU] Invalid backend '{backend}'. Valid backends are: {valid_backends}"
self.logger.warning(reason)

View file

@ -1477,7 +1477,7 @@ Failure:
### List documents
**GET** `/api/v1/datasets/{dataset_id}/documents?page={page}&page_size={page_size}&orderby={orderby}&desc={desc}&keywords={keywords}&id={document_id}&name={document_name}&create_time_from={timestamp}&create_time_to={timestamp}&suffix={file_suffix}&run={run_status}`
**GET** `/api/v1/datasets/{dataset_id}/documents?page={page}&page_size={page_size}&orderby={orderby}&desc={desc}&keywords={keywords}&id={document_id}&name={document_name}&create_time_from={timestamp}&create_time_to={timestamp}&suffix={file_suffix}&run={run_status}&metadata_condition={json}`
Lists documents in a specified dataset.
@ -1492,6 +1492,7 @@ Lists documents in a specified dataset.
##### Request examples
**A basic request with pagination:**
```bash
curl --request GET \
--url http://{address}/api/v1/datasets/{dataset_id}/documents?page=1&page_size=10 \
@ -1534,6 +1535,11 @@ curl --request GET \
- `3` / `DONE`: Document processing completed successfully
- `4` / `FAIL`: Document processing failed
Defaults to all statuses.
- `metadata_condition`: (*Filter parameter*), `object` (JSON in query)
Optional metadata filter applied to documents when `document_ids` is not provided. Uses the same structure as retrieval:
- `logic`: `"and"` (default) or `"or"`
- `conditions`: array of `{ "name": string, "comparison_operator": string, "value": string }`
- `comparison_operator` supports: `is`, `not is`, `contains`, `not contains`, `in`, `not in`, `start with`, `end with`, `>`, `<`, `≥`, `≤`, `empty`, `not empty`
##### Usage examples
@ -1545,6 +1551,15 @@ curl --request GET \
--header 'Authorization: Bearer <YOUR_API_KEY>'
```
**Filter by metadata (query JSON):**
```bash
curl -G \
--url "http://localhost:9222/api/v1/datasets/{{KB_ID}}/documents" \
--header 'Authorization: Bearer <YOUR_API_KEY>' \
--data-urlencode 'metadata_condition={"logic":"and","conditions":[{"name":"tags","comparison_operator":"is","value":"bar"},{"name":"author","comparison_operator":"is","value":"alice"}]}'
```
#### Response
Success:
@ -2088,6 +2103,108 @@ Failure:
---
### Dataset metadata summary
**GET** `/api/v1/datasets/{dataset_id}/metadata/summary`
Aggregates metadata values across all documents in a dataset.
#### Request
- Method: GET
- URL: `/api/v1/datasets/{dataset_id}/metadata/summary`
- Headers:
- `'Authorization: Bearer <YOUR_API_KEY>'`
##### Response
Success:
```json
{
"code": 0,
"data": {
"summary": {
"tags": [["bar", 2], ["foo", 1], ["baz", 1]],
"author": [["alice", 2], ["bob", 1]]
}
}
}
```
---
### Dataset metadata update
**POST** `/api/v1/datasets/{dataset_id}/metadata/update`
Batch update or delete document-level metadata in a dataset. If both `document_ids` and `metadata_condition` are omitted, all documents in the dataset are selected. When both are provided, the intersection is used.
#### Request
- Method: POST
- URL: `/api/v1/datasets/{dataset_id}/metadata/update`
- Headers:
- `'content-Type: application/json'`
- `'Authorization: Bearer <YOUR_API_KEY>'`
- Body:
- `selector`: `object`, optional
- `document_ids`: `list[string]`, optional
- `metadata_condition`: `object`, optional
- `logic`: `"and"` (default) or `"or"`
- `conditions`: array of `{ "name": string, "comparison_operator": string, "value": string }`
- `comparison_operator` supports: `is`, `not is`, `contains`, `not contains`, `in`, `not in`, `start with`, `end with`, `>`, `<`, `≥`, `≤`, `empty`, `not empty`
- `updates`: `array`, optional
- items: `{ "key": string, "value": any, "match": any (optional) }`
- For lists: replace elements equal to `match` (or `value` when `match` omitted) with `value`.
- For scalars: replace when current value equals `match` (or `value` when `match` omitted).
- `deletes`: `array`, optional
- items: `{ "key": string, "value": any (optional) }`
- For lists: remove elements equal to `value`; if list becomes empty, remove the key.
- For scalars: remove the key when `value` matches or when `value` is omitted.
##### Request example
```bash
curl --request POST \
--url http://{address}/api/v1/datasets/{dataset_id}/metadata/update \
--header 'Content-Type: application/json' \
--header 'Authorization: Bearer <YOUR_API_KEY>' \
--data '{
"selector": {
"metadata_condition": {
"logic": "and",
"conditions": [
{"name": "author", "comparison_operator": "is", "value": "alice"}
]
}
},
"updates": [
{"key": "tags", "match": "foo", "value": "foo_new"}
],
"deletes": [
{"key": "obsolete_key"},
{"key": "author", "value": "alice"}
]
}'
```
##### Response
Success:
```json
{
"code": 0,
"data": {
"updated": 1,
"matched_docs": 2
}
}
```
---
### Retrieve chunks
**POST** `/api/v1/retrieval`
@ -2117,6 +2234,7 @@ Retrieves chunks from specified datasets.
- `"metadata_condition"`: `object`
- `"use_kg"`: `boolean`
- `"toc_enhance"`: `boolean`
##### Request example
```bash
@ -2189,7 +2307,7 @@ curl --request POST \
- `"conditions"`: (*Body parameter*), `array`
A list of metadata filter conditions.
- `"name"`: `string` - The metadata field name to filter by, e.g., `"author"`, `"company"`, `"url"`. Ensure this parameter before use. See [Set metadata](../guides/dataset/set_metadata.md) for details.
- `comparison_operator`: `string` - The comparison operator. Can be one of:
- `comparison_operator`: `string` - The comparison operator. Can be one of:
- `"contains"`
- `"not contains"`
- `"start with"`
@ -2203,7 +2321,6 @@ curl --request POST \
- `"≤"`
- `"value"`: `string` - The value to compare.
#### Response
Success:
@ -4450,7 +4567,9 @@ Failure:
---
### System
---
### Check system health
**GET** `/v1/system/healthz`
@ -4519,6 +4638,7 @@ Content-Type: application/json
```
Explanation:
- Each service is reported as "ok" or "nok".
- The top-level `status` reflects overall health.
- If any service is "nok", detailed error info appears in `_meta`.

View file

@ -13,7 +13,6 @@
# See the License for the specific language governing permissions and
# limitations under the License.
#
import asyncio
import json
import logging
from collections import defaultdict
@ -44,7 +43,7 @@ class KGSearch(Dealer):
return response
def query_rewrite(self, llm, question, idxnms, kb_ids):
ty2ents = asyncio.run(get_entity_type2samples(idxnms, kb_ids))
ty2ents = get_entity_type2samples(idxnms, kb_ids)
hint_prompt = PROMPTS["minirag_query2kwd"].format(query=question,
TYPE_POOL=json.dumps(ty2ents, ensure_ascii=False, indent=2))
result = self._chat(llm, hint_prompt, [{"role": "user", "content": "Output:"}], {})

View file

@ -626,8 +626,8 @@ def merge_tuples(list1, list2):
return result
async def get_entity_type2samples(idxnms, kb_ids: list):
es_res = await asyncio.to_thread(settings.retriever.search,{"knowledge_graph_kwd": "ty2ents", "kb_id": kb_ids, "size": 10000, "fields": ["content_with_weight"]},idxnms,kb_ids)
def get_entity_type2samples(idxnms, kb_ids: list):
es_res = settings.retriever.search({"knowledge_graph_kwd": "ty2ents", "kb_id": kb_ids, "size": 10000, "fields": ["content_with_weight"]},idxnms,kb_ids)
res = defaultdict(list)
for id in es_res.ids:

View file

@ -714,4 +714,4 @@ async def main():
if __name__ == "__main__":
faulthandler.enable()
init_root_logger(CONSUMER_NAME)
asyncio.run(main)
asyncio.run(main())

View file

@ -17,13 +17,16 @@ import json
import logging
import os
import re
import threading
import time
from typing import Any, Optional
import numpy as np
from elasticsearch_dsl import Q, Search
from pydantic import BaseModel
from pymysql.converters import escape_string
from pyobvector import ObVecClient, FtsIndexParam, FtsParser, ARRAY, VECTOR
from pyobvector.client import ClusterVersionException
from pyobvector.client.hybrid_search import HybridSearch
from pyobvector.util import ObVersion
from sqlalchemy import text, Column, String, Integer, JSON, Double, Row, Table
@ -106,17 +109,6 @@ index_columns: list[str] = [
"removed_kwd",
]
fulltext_search_columns: list[str] = [
"docnm_kwd",
"content_with_weight",
"title_tks",
"title_sm_tks",
"important_tks",
"question_tks",
"content_ltks",
"content_sm_ltks"
]
fts_columns_origin: list[str] = [
"docnm_kwd^10",
"content_with_weight",
@ -138,7 +130,7 @@ fulltext_index_name_template = "fts_idx_%s"
# MATCH AGAINST: https://www.oceanbase.com/docs/common-oceanbase-database-cn-1000000002017607
fulltext_search_template = "MATCH (%s) AGAINST ('%s' IN NATURAL LANGUAGE MODE)"
# cosine_distance: https://www.oceanbase.com/docs/common-oceanbase-database-cn-1000000002012938
vector_search_template = "cosine_distance(%s, %s)"
vector_search_template = "cosine_distance(%s, '%s')"
class SearchResult(BaseModel):
@ -362,18 +354,28 @@ class OBConnection(DocStoreConnection):
port = mysql_config.get("port", 2881)
self.username = mysql_config.get("user", "root@test")
self.password = mysql_config.get("password", "infini_rag_flow")
max_connections = mysql_config.get("max_connections", 300)
else:
logger.info("Use customized config to create OceanBase connection.")
host = ob_config.get("host", "localhost")
port = ob_config.get("port", 2881)
self.username = ob_config.get("user", "root@test")
self.password = ob_config.get("password", "infini_rag_flow")
max_connections = ob_config.get("max_connections", 300)
self.db_name = ob_config.get("db_name", "test")
self.uri = f"{host}:{port}"
logger.info(f"Use OceanBase '{self.uri}' as the doc engine.")
# Set the maximum number of connections that can be created above the pool_size.
# By default, this is half of max_connections, but at least 10.
# This allows the pool to handle temporary spikes in demand without exhausting resources.
max_overflow = int(os.environ.get("OB_MAX_OVERFLOW", max(max_connections // 2, 10)))
# Set the number of seconds to wait before giving up when trying to get a connection from the pool.
# Default is 30 seconds, but can be overridden with the OB_POOL_TIMEOUT environment variable.
pool_timeout = int(os.environ.get("OB_POOL_TIMEOUT", "30"))
for _ in range(ATTEMPT_TIME):
try:
self.client = ObVecClient(
@ -383,6 +385,9 @@ class OBConnection(DocStoreConnection):
db_name=self.db_name,
pool_pre_ping=True,
pool_recycle=3600,
pool_size=max_connections,
max_overflow=max_overflow,
pool_timeout=pool_timeout,
)
break
except Exception as e:
@ -398,6 +403,37 @@ class OBConnection(DocStoreConnection):
self._check_ob_version()
self._try_to_update_ob_query_timeout()
self.es = None
if self.enable_hybrid_search:
try:
self.es = HybridSearch(
uri=self.uri,
user=self.username,
password=self.password,
db_name=self.db_name,
pool_pre_ping=True,
pool_recycle=3600,
pool_size=max_connections,
max_overflow=max_overflow,
pool_timeout=pool_timeout,
)
logger.info("OceanBase Hybrid Search feature is enabled")
except ClusterVersionException as e:
logger.info("Failed to initialize HybridSearch client, fallback to use SQL", exc_info=e)
self.es = None
if self.es is not None and self.search_original_content:
logger.info("HybridSearch is enabled, forcing search_original_content to False")
self.search_original_content = False
# Determine which columns to use for full-text search dynamically:
# If HybridSearch is enabled (self.es is not None), we must use tokenized columns (fts_columns_tks)
# for compatibility and performance with HybridSearch. Otherwise, we use the original content columns
# (fts_columns_origin), which may be controlled by an environment variable.
self.fulltext_search_columns = fts_columns_origin if self.search_original_content else fts_columns_tks
self._table_exists_cache: set[str] = set()
self._table_exists_cache_lock = threading.RLock()
logger.info(f"OceanBase {self.uri} is healthy.")
def _check_ob_version(self):
@ -417,18 +453,6 @@ class OBConnection(DocStoreConnection):
f"The version of OceanBase needs to be higher than or equal to 4.3.5.1, current version is {version_str}"
)
self.es = None
if not ob_version < ObVersion.from_db_version_nums(4, 4, 1, 0) and self.enable_hybrid_search:
self.es = HybridSearch(
uri=self.uri,
user=self.username,
password=self.password,
db_name=self.db_name,
pool_pre_ping=True,
pool_recycle=3600,
)
logger.info("OceanBase Hybrid Search feature is enabled")
def _try_to_update_ob_query_timeout(self):
try:
val = self._get_variable_value("ob_query_timeout")
@ -455,9 +479,19 @@ class OBConnection(DocStoreConnection):
return os.getenv(var, default).lower() in ['true', '1', 'yes', 'y']
self.enable_fulltext_search = is_true('ENABLE_FULLTEXT_SEARCH', 'true')
logger.info(f"ENABLE_FULLTEXT_SEARCH={self.enable_fulltext_search}")
self.use_fulltext_hint = is_true('USE_FULLTEXT_HINT', 'true')
logger.info(f"USE_FULLTEXT_HINT={self.use_fulltext_hint}")
self.search_original_content = is_true("SEARCH_ORIGINAL_CONTENT", 'true')
logger.info(f"SEARCH_ORIGINAL_CONTENT={self.search_original_content}")
self.enable_hybrid_search = is_true('ENABLE_HYBRID_SEARCH', 'false')
logger.info(f"ENABLE_HYBRID_SEARCH={self.enable_hybrid_search}")
self.use_fulltext_first_fusion_search = is_true('USE_FULLTEXT_FIRST_FUSION_SEARCH', 'true')
logger.info(f"USE_FULLTEXT_FIRST_FUSION_SEARCH={self.use_fulltext_first_fusion_search}")
"""
Database operations
@ -478,6 +512,43 @@ class OBConnection(DocStoreConnection):
return row[1]
raise Exception(f"Variable '{var_name}' not found.")
def _check_table_exists_cached(self, table_name: str) -> bool:
"""
Check table existence with cache to reduce INFORMATION_SCHEMA queries under high concurrency.
Only caches when table exists. Does not cache when table does not exist.
Thread-safe implementation: read operations are lock-free (GIL-protected),
write operations are protected by RLock to ensure cache consistency.
Args:
table_name: Table name
Returns:
Whether the table exists with all required indexes and columns
"""
if table_name in self._table_exists_cache:
return True
try:
if not self.client.check_table_exists(table_name):
return False
for column_name in index_columns:
if not self._index_exists(table_name, index_name_template % (table_name, column_name)):
return False
for fts_column in self.fulltext_search_columns:
column_name = fts_column.split("^")[0]
if not self._index_exists(table_name, fulltext_index_name_template % column_name):
return False
for column in [column_order_id, column_group_id]:
if not self._column_exist(table_name, column.name):
return False
except Exception as e:
raise Exception(f"OBConnection._check_table_exists_cached error: {str(e)}")
with self._table_exists_cache_lock:
if table_name not in self._table_exists_cache:
self._table_exists_cache.add(table_name)
return True
"""
Table operations
"""
@ -500,8 +571,7 @@ class OBConnection(DocStoreConnection):
process_func=lambda: self._add_index(indexName, column_name),
)
fts_columns = fts_columns_origin if self.search_original_content else fts_columns_tks
for fts_column in fts_columns:
for fts_column in self.fulltext_search_columns:
column_name = fts_column.split("^")[0]
_try_with_lock(
lock_name=f"ob_add_fulltext_idx_{indexName}_{column_name}",
@ -546,24 +616,7 @@ class OBConnection(DocStoreConnection):
raise Exception(f"OBConnection.deleteIndex error: {str(e)}")
def indexExist(self, indexName: str, knowledgebaseId: str = None) -> bool:
try:
if not self.client.check_table_exists(indexName):
return False
for column_name in index_columns:
if not self._index_exists(indexName, index_name_template % (indexName, column_name)):
return False
fts_columns = fts_columns_origin if self.search_original_content else fts_columns_tks
for fts_column in fts_columns:
column_name = fts_column.split("^")[0]
if not self._index_exists(indexName, fulltext_index_name_template % column_name):
return False
for column in [column_order_id, column_group_id]:
if not self._column_exist(indexName, column.name):
return False
except Exception as e:
raise Exception(f"OBConnection.indexExist error: {str(e)}")
return True
return self._check_table_exists_cached(indexName)
def _get_count(self, table_name: str, filter_list: list[str] = None) -> int:
where_clause = "WHERE " + " AND ".join(filter_list) if len(filter_list) > 0 else ""
@ -853,10 +906,8 @@ class OBConnection(DocStoreConnection):
fulltext_query = escape_string(fulltext_query.strip())
fulltext_topn = m.topn
fts_columns = fts_columns_origin if self.search_original_content else fts_columns_tks
# get fulltext match expression and weight values
for field in fts_columns:
for field in self.fulltext_search_columns:
parts = field.split("^")
column_name: str = parts[0]
column_weight: float = float(parts[1]) if (len(parts) > 1 and parts[1]) else 1.0
@ -885,7 +936,8 @@ class OBConnection(DocStoreConnection):
fulltext_search_score_expr = f"({' + '.join(f'{expr} * {fulltext_search_weight.get(col, 0)}' for col, expr in fulltext_search_expr.items())})"
if vector_data:
vector_search_expr = vector_search_template % (vector_column_name, vector_data)
vector_data_str = "[" + ",".join([str(np.float32(v)) for v in vector_data]) + "]"
vector_search_expr = vector_search_template % (vector_column_name, vector_data_str)
# use (1 - cosine_distance) as score, which should be [-1, 1]
# https://www.oceanbase.com/docs/common-oceanbase-database-standalone-1000000003577323
vector_search_score_expr = f"(1 - {vector_search_expr})"
@ -910,11 +962,15 @@ class OBConnection(DocStoreConnection):
if search_type in ["fusion", "fulltext", "vector"] and "_score" not in output_fields:
output_fields.append("_score")
group_results = kwargs.get("group_results", False)
if limit:
if vector_topn is not None:
limit = min(vector_topn, limit)
if fulltext_topn is not None:
limit = min(fulltext_topn, limit)
for index_name in indexNames:
if not self.client.check_table_exists(index_name):
if not self._check_table_exists_cached(index_name):
continue
fulltext_search_hint = f"/*+ UNION_MERGE({index_name} {' '.join(fulltext_search_idx_list)}) */" if self.use_fulltext_hint else ""
@ -922,29 +978,7 @@ class OBConnection(DocStoreConnection):
if search_type == "fusion":
# fusion search, usually for chat
num_candidates = vector_topn + fulltext_topn
if group_results:
count_sql = (
f"WITH fulltext_results AS ("
f" SELECT {fulltext_search_hint} *, {fulltext_search_score_expr} AS relevance"
f" FROM {index_name}"
f" WHERE {filters_expr} AND {fulltext_search_filter}"
f" ORDER BY relevance DESC"
f" LIMIT {num_candidates}"
f"),"
f" scored_results AS ("
f" SELECT *"
f" FROM fulltext_results"
f" WHERE {vector_search_filter}"
f"),"
f" group_results AS ("
f" SELECT *, ROW_NUMBER() OVER (PARTITION BY group_id) as rn"
f" FROM scored_results"
f")"
f" SELECT COUNT(*)"
f" FROM group_results"
f" WHERE rn = 1"
)
else:
if self.use_fulltext_first_fusion_search:
count_sql = (
f"WITH fulltext_results AS ("
f" SELECT {fulltext_search_hint} *, {fulltext_search_score_expr} AS relevance"
@ -955,6 +989,22 @@ class OBConnection(DocStoreConnection):
f")"
f" SELECT COUNT(*) FROM fulltext_results WHERE {vector_search_filter}"
)
else:
count_sql = (
f"WITH fulltext_results AS ("
f" SELECT {fulltext_search_hint} id FROM {index_name}"
f" WHERE {filters_expr} AND {fulltext_search_filter}"
f" ORDER BY {fulltext_search_score_expr}"
f" LIMIT {fulltext_topn}"
f"),"
f"vector_results AS ("
f" SELECT id FROM {index_name}"
f" WHERE {filters_expr} AND {vector_search_filter}"
f" ORDER BY {vector_search_expr}"
f" APPROXIMATE LIMIT {vector_topn}"
f")"
f" SELECT COUNT(*) FROM fulltext_results f FULL OUTER JOIN vector_results v ON f.id = v.id"
)
logger.debug("OBConnection.search with count sql: %s", count_sql)
start_time = time.time()
@ -976,32 +1026,8 @@ class OBConnection(DocStoreConnection):
if total_count == 0:
continue
score_expr = f"(relevance * {1 - vector_similarity_weight} + {vector_search_score_expr} * {vector_similarity_weight} + {pagerank_score_expr})"
if group_results:
fusion_sql = (
f"WITH fulltext_results AS ("
f" SELECT {fulltext_search_hint} *, {fulltext_search_score_expr} AS relevance"
f" FROM {index_name}"
f" WHERE {filters_expr} AND {fulltext_search_filter}"
f" ORDER BY relevance DESC"
f" LIMIT {num_candidates}"
f"),"
f" scored_results AS ("
f" SELECT *, {score_expr} AS _score"
f" FROM fulltext_results"
f" WHERE {vector_search_filter}"
f"),"
f" group_results AS ("
f" SELECT *, ROW_NUMBER() OVER (PARTITION BY group_id ORDER BY _score DESC) as rn"
f" FROM scored_results"
f")"
f" SELECT {fields_expr}, _score"
f" FROM group_results"
f" WHERE rn = 1"
f" ORDER BY _score DESC"
f" LIMIT {offset}, {limit}"
)
else:
if self.use_fulltext_first_fusion_search:
score_expr = f"(relevance * {1 - vector_similarity_weight} + {vector_search_score_expr} * {vector_similarity_weight} + {pagerank_score_expr})"
fusion_sql = (
f"WITH fulltext_results AS ("
f" SELECT {fulltext_search_hint} *, {fulltext_search_score_expr} AS relevance"
@ -1016,6 +1042,38 @@ class OBConnection(DocStoreConnection):
f" ORDER BY _score DESC"
f" LIMIT {offset}, {limit}"
)
else:
pagerank_score_expr = f"(CAST(IFNULL(f.{PAGERANK_FLD}, 0) AS DECIMAL(10, 2)) / 100)"
score_expr = f"(f.relevance * {1 - vector_similarity_weight} + v.similarity * {vector_similarity_weight} + {pagerank_score_expr})"
fields_expr = ", ".join([f"t.{f} as {f}" for f in output_fields if f != "_score"])
fusion_sql = (
f"WITH fulltext_results AS ("
f" SELECT {fulltext_search_hint} id, pagerank_fea, {fulltext_search_score_expr} AS relevance"
f" FROM {index_name}"
f" WHERE {filters_expr} AND {fulltext_search_filter}"
f" ORDER BY relevance DESC"
f" LIMIT {fulltext_topn}"
f"),"
f"vector_results AS ("
f" SELECT id, pagerank_fea, {vector_search_score_expr} AS similarity"
f" FROM {index_name}"
f" WHERE {filters_expr} AND {vector_search_filter}"
f" ORDER BY {vector_search_expr}"
f" APPROXIMATE LIMIT {vector_topn}"
f"),"
f"combined_results AS ("
f" SELECT COALESCE(f.id, v.id) AS id, {score_expr} AS score"
f" FROM fulltext_results f"
f" FULL OUTER JOIN vector_results v"
f" ON f.id = v.id"
f")"
f" SELECT {fields_expr}, c.score as _score"
f" FROM combined_results c"
f" JOIN {index_name} t"
f" ON c.id = t.id"
f" ORDER BY score DESC"
f" LIMIT {offset}, {limit}"
)
logger.debug("OBConnection.search with fusion sql: %s", fusion_sql)
start_time = time.time()
@ -1234,10 +1292,14 @@ class OBConnection(DocStoreConnection):
for row in rows:
result.chunks.append(self._row_to_entity(row, output_fields))
if result.total == 0:
result.total = len(result.chunks)
return result
def get(self, chunkId: str, indexName: str, knowledgebaseIds: list[str]) -> dict | None:
if not self.client.check_table_exists(indexName):
if not self._check_table_exists_cached(indexName):
return None
try:
@ -1336,7 +1398,7 @@ class OBConnection(DocStoreConnection):
return res
def update(self, condition: dict, newValue: dict, indexName: str, knowledgebaseId: str) -> bool:
if not self.client.check_table_exists(indexName):
if not self._check_table_exists_cached(indexName):
return True
condition["kb_id"] = knowledgebaseId
@ -1387,7 +1449,7 @@ class OBConnection(DocStoreConnection):
return False
def delete(self, condition: dict, indexName: str, knowledgebaseId: str) -> int:
if not self.client.check_table_exists(indexName):
if not self._check_table_exists_cached(indexName):
return 0
condition["kb_id"] = knowledgebaseId

View file

@ -41,7 +41,9 @@ def get_opendal_config():
scheme = opendal_config.get("scheme")
config_data = opendal_config.get("config", {})
kwargs = {"scheme": scheme, **config_data}
logging.info("Loaded OpenDAL configuration from yaml: %s", kwargs)
safe_log_keys=['scheme', 'host', 'port', 'database', 'table']
loggable_kwargs = {k: v for k, v in kwargs.items() if k in safe_log_keys}
logging.info("Loaded OpenDAL configuration(non sensitive): %s", loggable_kwargs)
return kwargs
except Exception as e:
logging.error("Failed to load OpenDAL configuration from yaml: %s", str(e))

View file

@ -20,6 +20,7 @@ import { CirclePlus, HelpCircle, Info } from 'lucide-react';
import { useId, useState, type FC, type FormEvent } from 'react';
import { useTranslation } from '../../hooks/use-translation';
import type { NewField, SchemaType } from '../../types/json-schema';
import { KeyInputProps } from './interface';
import SchemaTypeSelector from './schema-type-selector';
interface AddFieldButtonProps {
@ -27,9 +28,10 @@ interface AddFieldButtonProps {
variant?: 'primary' | 'secondary';
}
const AddFieldButton: FC<AddFieldButtonProps> = ({
const AddFieldButton: FC<AddFieldButtonProps & KeyInputProps> = ({
onAddField,
variant = 'primary',
pattern,
}) => {
const [dialogOpen, setDialogOpen] = useState(false);
const [fieldName, setFieldName] = useState('');
@ -120,6 +122,7 @@ const AddFieldButton: FC<AddFieldButtonProps> = ({
placeholder={t.fieldNamePlaceholder}
className="font-mono text-sm w-full"
required
searchValue={pattern}
/>
</div>

View file

@ -0,0 +1,9 @@
import React, { useContext } from 'react';
import { KeyInputProps } from './interface';
export const KeyInputContext = React.createContext<KeyInputProps>({});
export function useInputPattern() {
const x = useContext(KeyInputContext);
return x.pattern;
}

View file

@ -0,0 +1 @@
export type KeyInputProps = { pattern?: RegExp | string };

View file

@ -16,6 +16,7 @@ import {
withObjectSchema,
} from '../../types/json-schema';
import type { ValidationTreeNode } from '../../types/validation';
import { useInputPattern } from './context';
import TypeDropdown from './type-dropdown';
import TypeEditor from './type-editor';
@ -54,6 +55,8 @@ export const SchemaPropertyEditor: React.FC<SchemaPropertyEditorProps> = ({
'object' as SchemaType,
);
const pattern = useInputPattern();
// Update temp values when props change
useEffect(() => {
setTempName(name);
@ -123,6 +126,7 @@ export const SchemaPropertyEditor: React.FC<SchemaPropertyEditorProps> = ({
className="h-8 text-sm font-medium min-w-[120px] max-w-full z-10"
autoFocus
onFocus={(e) => e.target.select()}
searchValue={pattern}
/>
) : (
<button

View file

@ -8,6 +8,8 @@ import {
import type { JSONSchema, NewField } from '../../types/json-schema';
import { asObjectSchema, isBooleanSchema } from '../../types/json-schema';
import AddFieldButton from './add-field-button';
import { KeyInputContext } from './context';
import { KeyInputProps } from './interface';
import SchemaFieldList from './schema-field-list';
/** @public */
@ -17,9 +19,10 @@ export interface SchemaVisualEditorProps {
}
/** @public */
const SchemaVisualEditor: FC<SchemaVisualEditorProps> = ({
const SchemaVisualEditor: FC<SchemaVisualEditorProps & KeyInputProps> = ({
schema,
onChange,
pattern,
}) => {
const t = useTranslation();
// Handle adding a top-level field
@ -121,7 +124,7 @@ const SchemaVisualEditor: FC<SchemaVisualEditorProps> = ({
return (
<div className="p-4 h-full flex flex-col overflow-auto jsonjoy">
<div className="mb-6 shrink-0">
<AddFieldButton onAddField={handleAddField} />
<AddFieldButton onAddField={handleAddField} pattern={pattern} />
</div>
<div className="grow overflow-auto">
@ -131,12 +134,14 @@ const SchemaVisualEditor: FC<SchemaVisualEditorProps> = ({
<p className="text-sm">{t.visualEditorNoFieldsHint2}</p>
</div>
) : (
<SchemaFieldList
schema={schema}
onAddField={handleAddField}
onEditField={handleEditField}
onDeleteField={handleDeleteField}
/>
<KeyInputContext.Provider value={{ pattern }}>
<SchemaFieldList
schema={schema}
onAddField={handleAddField}
onEditField={handleEditField}
onDeleteField={handleDeleteField}
/>
</KeyInputContext.Provider>
)}
</div>
</div>

View file

@ -5,6 +5,7 @@ import { z } from 'zod';
import { SelectWithSearch } from '../originui/select-with-search';
import { RAGFlowFormItem } from '../ragflow-form';
import { MetadataFilterConditions } from './metadata-filter-conditions';
import { MetadataSemiAutoFields } from './metadata-semi-auto-fields';
type MetadataFilterProps = {
prefix?: string;
@ -25,6 +26,7 @@ export const MetadataFilterSchema = {
}),
)
.optional(),
semi_auto: z.array(z.string()).optional(),
})
.optional(),
};
@ -76,6 +78,12 @@ export function MetadataFilter({
canReference={canReference}
></MetadataFilterConditions>
)}
{hasKnowledge && metadata === DatasetMetadata.SemiAutomatic && (
<MetadataSemiAutoFields
kbIds={kbIds}
prefix={prefix}
></MetadataSemiAutoFields>
)}
</>
);
}

View file

@ -0,0 +1,99 @@
import { Button } from '@/components/ui/button';
import {
DropdownMenu,
DropdownMenuContent,
DropdownMenuItem,
DropdownMenuTrigger,
} from '@/components/ui/dropdown-menu';
import {
FormControl,
FormField,
FormItem,
FormLabel,
FormMessage,
} from '@/components/ui/form';
import { Input } from '@/components/ui/input';
import { useFetchKnowledgeMetadata } from '@/hooks/use-knowledge-request';
import { Plus, X } from 'lucide-react';
import { useCallback } from 'react';
import { useFieldArray, useFormContext } from 'react-hook-form';
import { useTranslation } from 'react-i18next';
export function MetadataSemiAutoFields({
kbIds,
prefix = '',
}: {
kbIds: string[];
prefix?: string;
}) {
const { t } = useTranslation();
const form = useFormContext();
const name = prefix + 'meta_data_filter.semi_auto';
const metadata = useFetchKnowledgeMetadata(kbIds);
const { fields, remove, append } = useFieldArray({
name,
control: form.control,
});
const add = useCallback(
(key: string) => () => {
append(key);
},
[append],
);
return (
<section className="flex flex-col gap-2">
<div className="flex items-center justify-between">
<FormLabel>{t('chat.metadataKeys')}</FormLabel>
<DropdownMenu>
<DropdownMenuTrigger>
<Button variant={'ghost'} type="button">
<Plus />
</Button>
</DropdownMenuTrigger>
<DropdownMenuContent className="max-h-[300px] !overflow-y-auto scrollbar-auto">
{Object.keys(metadata.data).map((key, idx) => {
return (
<DropdownMenuItem key={idx} onClick={add(key)}>
{key}
</DropdownMenuItem>
);
})}
</DropdownMenuContent>
</DropdownMenu>
</div>
<div className="space-y-5">
{fields.map((field, index) => {
const typeField = `${name}.${index}`;
return (
<section key={field.id} className="flex gap-2">
<div className="w-full space-y-2">
<FormField
control={form.control}
name={typeField}
render={({ field }) => (
<FormItem className="flex-1 overflow-hidden">
<FormControl>
<Input
{...field}
placeholder={t('common.pleaseInput')}
readOnly
></Input>
</FormControl>
<FormMessage />
</FormItem>
)}
/>
</div>
<Button variant={'ghost'} onClick={() => remove(index)}>
<X className="text-text-sub-title-invert " />
</Button>
</section>
);
})}
</div>
</section>
);
}

View file

@ -193,3 +193,19 @@ export enum SwitchLogicOperator {
And = 'and',
Or = 'or',
}
export const WebhookAlgorithmList = [
'hs256',
'hs384',
'hs512',
'rs256',
'rs384',
'rs512',
'es256',
'es384',
'es512',
'ps256',
'ps384',
'ps512',
'none',
] as const;

View file

@ -36,5 +36,6 @@ export const EmptyConversationId = 'empty';
export enum DatasetMetadata {
Disabled = 'disabled',
Automatic = 'auto',
SemiAutomatic = 'semi_auto',
Manual = 'manual',
}

View file

@ -7,6 +7,16 @@ export interface ITestRetrievalRequestBody {
use_kg?: boolean;
highlight?: boolean;
kb_id?: string[];
meta_data_filter?: {
logic?: string;
method?: string;
manual?: Array<{
key: string;
op: string;
value: string;
}>;
semi_auto?: string[];
};
}
export interface IFetchKnowledgeListRequestBody {

View file

@ -737,11 +737,13 @@ This auto-tagging feature enhances retrieval by adding another layer of domain-s
metadataTip:
'Metadata filtering is the process of using metadata attributes (such as tags, categories, or access permissions) to refine and control the retrieval of relevant information within a system.',
conditions: 'Conditions',
metadataKeys: 'Filterable items',
addCondition: 'Add condition',
meta: {
disabled: 'Disabled',
auto: 'Automatic',
manual: 'Manual',
semi_auto: 'Semi-automatic',
},
cancel: 'Cancel',
chatSetting: 'Chat setting',
@ -1961,6 +1963,37 @@ Important structured information may include: names, dates, locations, events, k
removeFirst: 'Remove first',
removeLast: 'Remove last',
},
webhook: {
name: 'Webhook',
methods: 'Methods',
contentTypes: 'Content types',
security: 'Security',
schema: 'Schema',
response: 'Response',
executionMode: 'Execution mode',
authMethods: 'Authentication Methods',
authType: 'Authentication Type',
limit: 'Request Limit',
per: 'Time Period',
maxBodySize: 'Maximum Body Size',
ipWhitelist: 'IP Whitelist',
tokenHeader: 'Token Header',
tokenValue: 'Token Value',
username: 'Username',
password: 'Password',
algorithm: 'Algorithm',
secret: 'Secret',
issuer: 'Issuer',
audience: 'Audience',
requiredClaims: 'Required Claims',
header: 'Header',
status: 'Status',
headersTemplate: 'Headers Template',
bodyTemplate: 'Body Template',
basic: 'Basic',
bearer: 'Bearer',
apiKey: 'Api Key',
},
},
llmTools: {
bad_calculator: {

View file

@ -673,11 +673,13 @@ General实体和关系提取提示来自 GitHub - microsoft/graphrag基于
metadataTip:
'元数据过滤是使用元数据属性(例如标签、类别或访问权限)来优化和控制系统内相关信息检索的过程。',
conditions: '条件',
metadataKeys: '可选过滤项',
addCondition: '增加条件',
meta: {
disabled: '禁用',
auto: '自动',
manual: '手动',
semi_auto: '半自动',
},
cancel: '取消',
chatSetting: '聊天设置',
@ -1755,6 +1757,37 @@ Tokenizer 会根据所选方式将内容存储为对应的数据结构。`,
removeFirst: '移除第一个',
removeLast: '移除最后一个',
},
webhook: {
name: '网络钩子',
methods: '方法',
contentTypes: '内容类型',
security: '安全配置',
schema: '模式',
response: '响应',
executionMode: '执行模式',
authMethods: '认证方法',
authType: '认证类型',
limit: '请求限制',
per: '时间周期',
maxBodySize: '最大主体大小',
ipWhitelist: 'IP白名单',
tokenHeader: '令牌头部',
tokenValue: '令牌值',
username: '用户名',
password: '密码',
algorithm: '算法',
secret: '密钥',
issuer: '签发者',
audience: '受众',
requiredClaims: '必需声明',
header: '头部',
status: '状态',
headersTemplate: '头部模板',
bodyTemplate: '主体模板',
basic: '基础认证',
bearer: '承载令牌',
apiKey: 'API密钥',
},
},
footer: {
profile: 'All rights reserved @ React',

View file

@ -1,32 +1,14 @@
import i18n from '@/locales/config';
import { BeginId } from '@/pages/agent/constant';
import { ReactNode } from 'react';
const prefix = BeginId + '@';
interface VariableDisplayProps {
content: string;
getLabel?: (value?: string) => string | ReactNode;
}
// This component mimics the VariableNode's decorate function from PromptEditor
function VariableNodeDisplay({
value,
label,
}: {
value: string;
label: ReactNode;
}) {
function VariableNodeDisplay({ label }: { label: ReactNode }) {
let content: ReactNode = <span className="text-accent-primary">{label}</span>;
if (value.startsWith(prefix)) {
content = (
<div>
<span>{i18n.t(`flow.begin`)}</span> / {content}
</div>
);
}
return <div className="inline-flex items-center mr-1">{content}</div>;
}
@ -63,11 +45,7 @@ export function VariableDisplay({ content, getLabel }: VariableDisplayProps) {
if (label && label !== variableValue) {
// If we found a valid label, render as variable node
elements.push(
<VariableNodeDisplay
key={`variable-${index}`}
value={variableValue}
label={label}
/>,
<VariableNodeDisplay key={`variable-${index}`} label={label} />,
);
} else {
// If no label found, keep as original text

View file

@ -25,6 +25,7 @@ export * from './pipeline';
export enum AgentDialogueMode {
Conversational = 'conversational',
Task = 'task',
Webhook = 'Webhook',
}
import { ModelVariableType } from '@/constants/knowledge';
@ -930,3 +931,37 @@ export enum AgentVariableType {
Begin = 'begin',
Conversation = 'conversation',
}
export enum WebhookMethod {
Post = 'POST',
Get = 'GET',
Put = 'PUT',
Patch = 'PATCH',
Delete = 'DELETE',
Head = 'HEAD',
}
export enum WebhookContentType {
ApplicationJson = 'application/json',
MultipartFormData = 'multipart/form-data',
ApplicationXWwwFormUrlencoded = 'application/x-www-form-urlencoded',
TextPlain = 'text/plain',
ApplicationOctetStream = 'application/octet-stream',
}
export enum WebhookExecutionMode {
Immediately = 'Immediately',
Streaming = 'Streaming',
}
export enum WebhookSecurityAuthType {
None = 'none',
Token = 'token',
Basic = 'basic',
Jwt = 'jwt',
Hmac = 'hmac',
}
export const RateLimitPerList = ['minute', 'hour', 'day'];
export const WebhookMaxBodySize = ['10MB', '50MB', '100MB', '1000MB'];

View file

@ -42,9 +42,9 @@ import { FormWrapper } from '../components/form-wrapper';
import { Output } from '../components/output';
import { PromptEditor } from '../components/prompt-editor';
import { QueryVariable } from '../components/query-variable';
import { SchemaDialog } from '../components/schema-dialog';
import { SchemaPanel } from '../components/schema-panel';
import { AgentTools, Agents } from './agent-tools';
import { StructuredOutputDialog } from './structured-output-dialog';
import { StructuredOutputPanel } from './structured-output-panel';
import { useBuildPromptExtraPromptOptions } from './use-build-prompt-options';
import {
useHandleShowStructuredOutput,
@ -327,19 +327,17 @@ function AgentForm({ node }: INextOperatorForm) {
</Button>
</div>
<StructuredOutputPanel
value={structuredOutput}
></StructuredOutputPanel>
<SchemaPanel value={structuredOutput}></SchemaPanel>
</section>
)}
</FormWrapper>
</Form>
{structuredOutputDialogVisible && (
<StructuredOutputDialog
<SchemaDialog
hideModal={hideStructuredOutputDialog}
onOk={handleStructuredOutputDialogOk}
initialValues={structuredOutput}
></StructuredOutputDialog>
></SchemaDialog>
)}
</>
);

View file

@ -12,6 +12,7 @@ import { RAGFlowSelect } from '@/components/ui/select';
import { Switch } from '@/components/ui/switch';
import { Textarea } from '@/components/ui/textarea';
import { FormTooltip } from '@/components/ui/tooltip';
import { WebhookAlgorithmList } from '@/constants/agent';
import { zodResolver } from '@hookform/resolvers/zod';
import { t } from 'i18next';
import { Plus } from 'lucide-react';
@ -24,37 +25,71 @@ import { INextOperatorForm } from '../../interface';
import { ParameterDialog } from './parameter-dialog';
import { QueryTable } from './query-table';
import { useEditQueryRecord } from './use-edit-query';
import { useHandleModeChange } from './use-handle-mode-change';
import { useValues } from './use-values';
import { useWatchFormChange } from './use-watch-change';
import { WebHook } from './webhook';
const ModeOptions = [
{ value: AgentDialogueMode.Conversational, label: t('flow.conversational') },
{ value: AgentDialogueMode.Task, label: t('flow.task') },
{ value: AgentDialogueMode.Webhook, label: t('flow.webhook.name') },
];
const FormSchema = z.object({
enablePrologue: z.boolean().optional(),
prologue: z.string().trim().optional(),
mode: z.string(),
inputs: z
.array(
z.object({
key: z.string(),
type: z.string(),
value: z.string(),
optional: z.boolean(),
name: z.string(),
options: z.array(z.union([z.number(), z.string(), z.boolean()])),
}),
)
.optional(),
methods: z.string().optional(),
content_types: z.string().optional(),
security: z
.object({
auth_type: z.string(),
ip_whitelist: z.array(z.object({ value: z.string() })),
rate_limit: z.object({
limit: z.number(),
per: z.string().optional(),
}),
max_body_size: z.string(),
jwt: z
.object({
algorithm: z.string().default(WebhookAlgorithmList[0]).optional(),
})
.optional(),
})
.optional(),
schema: z.record(z.any()).optional(),
response: z
.object({
status: z.number(),
headers_template: z.array(
z.object({ key: z.string(), value: z.string() }),
),
body_template: z.array(z.object({ key: z.string(), value: z.string() })),
})
.optional(),
execution_mode: z.string().optional(),
});
export type BeginFormSchemaType = z.infer<typeof FormSchema>;
function BeginForm({ node }: INextOperatorForm) {
const { t } = useTranslation();
const values = useValues(node);
const FormSchema = z.object({
enablePrologue: z.boolean().optional(),
prologue: z.string().trim().optional(),
mode: z.string(),
inputs: z
.array(
z.object({
key: z.string(),
type: z.string(),
value: z.string(),
optional: z.boolean(),
name: z.string(),
options: z.array(z.union([z.number(), z.string(), z.boolean()])),
}),
)
.optional(),
});
const form = useForm({
defaultValues: values,
resolver: zodResolver(FormSchema),
@ -72,6 +107,8 @@ function BeginForm({ node }: INextOperatorForm) {
const previousModeRef = useRef(mode);
const { handleModeChange } = useHandleModeChange(form);
useEffect(() => {
if (
previousModeRef.current === AgentDialogueMode.Task &&
@ -111,6 +148,10 @@ function BeginForm({ node }: INextOperatorForm) {
placeholder={t('common.pleaseSelect')}
options={ModeOptions}
{...field}
onChange={(val) => {
handleModeChange(val);
field.onChange(val);
}}
></RAGFlowSelect>
</FormControl>
<FormMessage />
@ -158,44 +199,49 @@ function BeginForm({ node }: INextOperatorForm) {
)}
/>
)}
{/* Create a hidden field to make Form instance record this */}
<FormField
control={form.control}
name={'inputs'}
render={() => <div></div>}
/>
<Collapse
title={
<div>
{t('flow.input')}
<FormTooltip tooltip={t('flow.beginInputTip')}></FormTooltip>
</div>
}
rightContent={
<Button
variant={'ghost'}
onClick={(e) => {
e.preventDefault();
showModal();
}}
{mode === AgentDialogueMode.Webhook && <WebHook></WebHook>}
{mode !== AgentDialogueMode.Webhook && (
<>
{/* Create a hidden field to make Form instance record this */}
<FormField
control={form.control}
name={'inputs'}
render={() => <div></div>}
/>
<Collapse
title={
<div>
{t('flow.input')}
<FormTooltip tooltip={t('flow.beginInputTip')}></FormTooltip>
</div>
}
rightContent={
<Button
variant={'ghost'}
onClick={(e) => {
e.preventDefault();
showModal();
}}
>
<Plus />
</Button>
}
>
<Plus />
</Button>
}
>
<QueryTable
data={inputs}
showModal={showModal}
deleteRecord={handleDeleteRecord}
></QueryTable>
</Collapse>
{visible && (
<ParameterDialog
hideModal={hideModal}
initialValue={currentRecord}
otherThanCurrentQuery={otherThanCurrentQuery}
submit={ok}
></ParameterDialog>
<QueryTable
data={inputs}
showModal={showModal}
deleteRecord={handleDeleteRecord}
></QueryTable>
</Collapse>
{visible && (
<ParameterDialog
hideModal={hideModal}
initialValue={currentRecord}
otherThanCurrentQuery={otherThanCurrentQuery}
submit={ok}
></ParameterDialog>
)}
</>
)}
</Form>
</section>

View file

@ -0,0 +1,76 @@
import { useCallback } from 'react';
import { UseFormReturn } from 'react-hook-form';
import {
AgentDialogueMode,
RateLimitPerList,
WebhookExecutionMode,
WebhookMaxBodySize,
WebhookSecurityAuthType,
} from '../../constant';
// const WebhookSchema = {
// query: {
// type: 'object',
// required: [],
// properties: {
// // debug: { type: 'boolean' },
// // event: { type: 'string' },
// },
// },
// headers: {
// type: 'object',
// required: [],
// properties: {
// // 'X-Trace-ID': { type: 'string' },
// },
// },
// body: {
// type: 'object',
// required: [],
// properties: {
// id: { type: 'string' },
// payload: { type: 'object' },
// },
// },
// };
const schema = {
properties: {
query: {
type: 'object',
description: '',
},
headers: {
type: 'object',
description: '',
},
body: {
type: 'object',
description: '',
},
},
};
const initialFormValuesMap = {
schema: schema,
'security.auth_type': WebhookSecurityAuthType.Basic,
'security.rate_limit.per': RateLimitPerList[0],
'security.max_body_size': WebhookMaxBodySize[0],
execution_mode: WebhookExecutionMode.Immediately,
};
export function useHandleModeChange(form: UseFormReturn<any>) {
const handleModeChange = useCallback(
(mode: AgentDialogueMode) => {
if (mode === AgentDialogueMode.Webhook) {
Object.entries(initialFormValuesMap).forEach(([key, value]) => {
form.setValue(key, value, { shouldDirty: true });
});
}
},
[form],
);
return { handleModeChange };
}

View file

@ -0,0 +1,28 @@
import { JSONSchema } from '@/components/jsonjoy-builder';
import { useSetModalState } from '@/hooks/common-hooks';
import { useCallback } from 'react';
import { UseFormReturn } from 'react-hook-form';
export function useShowSchemaDialog(form: UseFormReturn<any>) {
const {
visible: schemaDialogVisible,
showModal: showSchemaDialog,
hideModal: hideSchemaDialog,
} = useSetModalState();
const handleSchemaDialogOk = useCallback(
(values: JSONSchema) => {
// Sync data to canvas
form.setValue('schema', values);
hideSchemaDialog();
},
[form, hideSchemaDialog],
);
return {
schemaDialogVisible,
showSchemaDialog,
hideSchemaDialog,
handleSchemaDialogOk,
};
}

View file

@ -1,6 +1,7 @@
import { omit } from 'lodash';
import { useEffect } from 'react';
import { UseFormReturn, useWatch } from 'react-hook-form';
import { AgentDialogueMode } from '../../constant';
import { BeginQuery } from '../../interface';
import useGraphStore from '../../store';
@ -20,9 +21,21 @@ export function useWatchFormChange(id?: string, form?: UseFormReturn) {
if (id) {
values = form?.getValues() || {};
let outputs: Record<string, any> = {};
// For webhook mode, use schema properties as direct outputs
// Each property (body, headers, query) should be able to show secondary menu
if (
values.mode === AgentDialogueMode.Webhook &&
values.schema?.properties
) {
outputs = { ...values.schema.properties };
}
const nextValues = {
...values,
inputs: transferInputsArrayToObject(values.inputs),
outputs,
};
updateNodeForm(id, nextValues);

View file

@ -0,0 +1,139 @@
import { SelectWithSearch } from '@/components/originui/select-with-search';
import { RAGFlowFormItem } from '@/components/ragflow-form';
import { Input } from '@/components/ui/input';
import { WebhookAlgorithmList } from '@/constants/agent';
import { WebhookSecurityAuthType } from '@/pages/agent/constant';
import { buildOptions } from '@/utils/form';
import { useCallback } from 'react';
import { useFormContext, useWatch } from 'react-hook-form';
import { useTranslation } from 'react-i18next';
const AlgorithmOptions = buildOptions(WebhookAlgorithmList);
const RequiredClaimsOptions = buildOptions(['exp', 'sub']);
export function Auth() {
const { t } = useTranslation();
const form = useFormContext();
const authType = useWatch({
name: 'security.auth_type',
control: form.control,
});
const renderTokenAuth = useCallback(
() => (
<>
<RAGFlowFormItem
name="security.token.token_header"
label={t('flow.webhook.tokenHeader')}
>
<Input></Input>
</RAGFlowFormItem>
<RAGFlowFormItem
name="security.token.token_value"
label={t('flow.webhook.tokenValue')}
>
<Input></Input>
</RAGFlowFormItem>
</>
),
[t],
);
const renderBasicAuth = useCallback(
() => (
<>
<RAGFlowFormItem
name="security.basic_auth.username"
label={t('flow.webhook.username')}
>
<Input></Input>
</RAGFlowFormItem>
<RAGFlowFormItem
name="security.basic_auth.password"
label={t('flow.webhook.password')}
>
<Input></Input>
</RAGFlowFormItem>
</>
),
[t],
);
const renderJwtAuth = useCallback(
() => (
<>
<RAGFlowFormItem
name="security.jwt.algorithm"
label={t('flow.webhook.algorithm')}
>
<SelectWithSearch options={AlgorithmOptions}></SelectWithSearch>
</RAGFlowFormItem>
<RAGFlowFormItem
name="security.jwt.secret"
label={t('flow.webhook.secret')}
>
<Input></Input>
</RAGFlowFormItem>
<RAGFlowFormItem
name="security.jwt.issuer"
label={t('flow.webhook.issuer')}
>
<Input></Input>
</RAGFlowFormItem>
<RAGFlowFormItem
name="security.jwt.audience"
label={t('flow.webhook.audience')}
>
<Input></Input>
</RAGFlowFormItem>
<RAGFlowFormItem
name="security.jwt.required_claims"
label={t('flow.webhook.requiredClaims')}
>
<SelectWithSearch options={RequiredClaimsOptions}></SelectWithSearch>
</RAGFlowFormItem>
</>
),
[t],
);
const renderHmacAuth = useCallback(
() => (
<>
<RAGFlowFormItem
name="security.hmac.header"
label={t('flow.webhook.header')}
>
<Input></Input>
</RAGFlowFormItem>
<RAGFlowFormItem
name="security.hmac.secret"
label={t('flow.webhook.secret')}
>
<Input></Input>
</RAGFlowFormItem>
<RAGFlowFormItem
name="security.hmac.algorithm"
label={t('flow.webhook.algorithm')}
>
<SelectWithSearch options={AlgorithmOptions}></SelectWithSearch>
</RAGFlowFormItem>
</>
),
[t],
);
const AuthMap = {
[WebhookSecurityAuthType.Token]: renderTokenAuth,
[WebhookSecurityAuthType.Basic]: renderBasicAuth,
[WebhookSecurityAuthType.Jwt]: renderJwtAuth,
[WebhookSecurityAuthType.Hmac]: renderHmacAuth,
[WebhookSecurityAuthType.None]: () => null,
};
return AuthMap[
(authType ?? WebhookSecurityAuthType.None) as WebhookSecurityAuthType
]();
}

View file

@ -0,0 +1,213 @@
import { BoolSegmented } from '@/components/bool-segmented';
import { KeyInput } from '@/components/key-input';
import { SelectWithSearch } from '@/components/originui/select-with-search';
import { RAGFlowFormItem } from '@/components/ragflow-form';
import { useIsDarkTheme } from '@/components/theme-provider';
import { Button } from '@/components/ui/button';
import { Input } from '@/components/ui/input';
import { Separator } from '@/components/ui/separator';
import { Textarea } from '@/components/ui/textarea';
import { Editor, loader } from '@monaco-editor/react';
import { X } from 'lucide-react';
import { ReactNode, useCallback } from 'react';
import { useFieldArray, useFormContext } from 'react-hook-form';
import { InputMode, TypesWithArray } from '../../../constant';
import {
InputModeOptions,
buildConversationVariableSelectOptions,
} from '../../../utils';
import { DynamicFormHeader } from '../../components/dynamic-fom-header';
import { QueryVariable } from '../../components/query-variable';
loader.config({ paths: { vs: '/vs' } });
type SelectKeysProps = {
name: string;
label: ReactNode;
tooltip?: string;
keyField?: string;
valueField?: string;
operatorField?: string;
nodeId?: string;
};
const VariableTypeOptions = buildConversationVariableSelectOptions();
const modeField = 'input_mode';
const ConstantValueMap = {
[TypesWithArray.Boolean]: true,
[TypesWithArray.Number]: 0,
[TypesWithArray.String]: '',
[TypesWithArray.ArrayBoolean]: '[]',
[TypesWithArray.ArrayNumber]: '[]',
[TypesWithArray.ArrayString]: '[]',
[TypesWithArray.ArrayObject]: '[]',
[TypesWithArray.Object]: '{}',
};
export function DynamicResponse({
name,
label,
tooltip,
keyField = 'key',
valueField = 'value',
operatorField = 'type',
}: SelectKeysProps) {
const form = useFormContext();
const isDarkTheme = useIsDarkTheme();
const { fields, remove, append } = useFieldArray({
name: name,
control: form.control,
});
const initializeValue = useCallback(
(mode: string, variableType: string, valueFieldAlias: string) => {
if (mode === InputMode.Variable) {
form.setValue(valueFieldAlias, '', { shouldDirty: true });
} else {
const val = ConstantValueMap[variableType as TypesWithArray];
form.setValue(valueFieldAlias, val, { shouldDirty: true });
}
},
[form],
);
const handleModeChange = useCallback(
(mode: string, valueFieldAlias: string, operatorFieldAlias: string) => {
const variableType = form.getValues(operatorFieldAlias);
initializeValue(mode, variableType, valueFieldAlias);
},
[form, initializeValue],
);
const handleVariableTypeChange = useCallback(
(variableType: string, valueFieldAlias: string, modeFieldAlias: string) => {
const mode = form.getValues(modeFieldAlias);
initializeValue(mode, variableType, valueFieldAlias);
},
[form, initializeValue],
);
const renderParameter = useCallback(
(operatorFieldName: string, modeFieldName: string) => {
const mode = form.getValues(modeFieldName);
const logicalOperator = form.getValues(operatorFieldName);
if (mode === InputMode.Constant) {
if (logicalOperator === TypesWithArray.Boolean) {
return <BoolSegmented></BoolSegmented>;
}
if (logicalOperator === TypesWithArray.Number) {
return <Input className="w-full" type="number"></Input>;
}
if (logicalOperator === TypesWithArray.String) {
return <Textarea></Textarea>;
}
return (
<Editor
height={300}
theme={isDarkTheme ? 'vs-dark' : 'vs'}
language={'json'}
options={{
minimap: { enabled: false },
automaticLayout: true,
}}
/>
);
}
return (
<QueryVariable
types={[logicalOperator]}
hideLabel
pureQuery
></QueryVariable>
);
},
[form, isDarkTheme],
);
return (
<section className="space-y-2">
<DynamicFormHeader
label={label}
tooltip={tooltip}
onClick={() =>
append({
[keyField]: '',
[valueField]: '',
[modeField]: InputMode.Constant,
[operatorField]: TypesWithArray.String,
})
}
></DynamicFormHeader>
<div className="space-y-5">
{fields.map((field, index) => {
const keyFieldAlias = `${name}.${index}.${keyField}`;
const valueFieldAlias = `${name}.${index}.${valueField}`;
const operatorFieldAlias = `${name}.${index}.${operatorField}`;
const modeFieldAlias = `${name}.${index}.${modeField}`;
return (
<section key={field.id} className="flex gap-2">
<div className="flex-1 space-y-3 min-w-0">
<div className="flex items-center">
<RAGFlowFormItem name={keyFieldAlias} className="flex-1 ">
<KeyInput></KeyInput>
</RAGFlowFormItem>
<Separator className="w-2" />
<RAGFlowFormItem name={operatorFieldAlias} className="flex-1">
{(field) => (
<SelectWithSearch
value={field.value}
onChange={(val) => {
handleVariableTypeChange(
val,
valueFieldAlias,
modeFieldAlias,
);
field.onChange(val);
}}
options={VariableTypeOptions}
></SelectWithSearch>
)}
</RAGFlowFormItem>
<Separator className="w-2" />
<RAGFlowFormItem name={modeFieldAlias} className="flex-1">
{(field) => (
<SelectWithSearch
value={field.value}
onChange={(val) => {
handleModeChange(
val,
valueFieldAlias,
operatorFieldAlias,
);
field.onChange(val);
}}
options={InputModeOptions}
></SelectWithSearch>
)}
</RAGFlowFormItem>
</div>
<RAGFlowFormItem name={valueFieldAlias} className="w-full">
{renderParameter(operatorFieldAlias, modeFieldAlias)}
</RAGFlowFormItem>
</div>
<Button variant={'ghost'} onClick={() => remove(index)}>
<X />
</Button>
</section>
);
})}
</div>
</section>
);
}

View file

@ -0,0 +1,134 @@
import { Collapse } from '@/components/collapse';
import { SelectWithSearch } from '@/components/originui/select-with-search';
import { RAGFlowFormItem } from '@/components/ragflow-form';
import { Button } from '@/components/ui/button';
import { Input } from '@/components/ui/input';
import { Separator } from '@/components/ui/separator';
import { Textarea } from '@/components/ui/textarea';
import { buildOptions } from '@/utils/form';
import { useFormContext, useWatch } from 'react-hook-form';
import { useTranslation } from 'react-i18next';
import {
RateLimitPerList,
WebhookContentType,
WebhookExecutionMode,
WebhookMaxBodySize,
WebhookMethod,
WebhookSecurityAuthType,
} from '../../../constant';
import { DynamicStringForm } from '../../components/dynamic-string-form';
import { SchemaDialog } from '../../components/schema-dialog';
import { SchemaPanel } from '../../components/schema-panel';
import { useShowSchemaDialog } from '../use-show-schema-dialog';
import { Auth } from './auth';
import { WebhookResponse } from './response';
const RateLimitPerOptions = buildOptions(RateLimitPerList);
export function WebHook() {
const { t } = useTranslation();
const form = useFormContext();
const executionMode = useWatch({
control: form.control,
name: 'execution_mode',
});
const {
showSchemaDialog,
schemaDialogVisible,
hideSchemaDialog,
handleSchemaDialogOk,
} = useShowSchemaDialog(form);
const schema = form.getValues('schema');
return (
<>
<RAGFlowFormItem name="methods" label={t('flow.webhook.methods')}>
<SelectWithSearch
options={buildOptions(WebhookMethod)}
></SelectWithSearch>
</RAGFlowFormItem>
<RAGFlowFormItem
name="content_types"
label={t('flow.webhook.contentTypes')}
>
<SelectWithSearch
options={buildOptions(WebhookContentType)}
></SelectWithSearch>
</RAGFlowFormItem>
<Collapse title={<div>Security</div>}>
<section className="space-y-4">
<RAGFlowFormItem
name="security.auth_type"
label={t('flow.webhook.authType')}
>
<SelectWithSearch
options={buildOptions(WebhookSecurityAuthType)}
></SelectWithSearch>
</RAGFlowFormItem>
<Auth></Auth>
<RAGFlowFormItem
name="security.rate_limit.limit"
label={t('flow.webhook.limit')}
>
<Input type="number"></Input>
</RAGFlowFormItem>
<RAGFlowFormItem
name="security.rate_limit.per"
label={t('flow.webhook.per')}
>
<SelectWithSearch options={RateLimitPerOptions}></SelectWithSearch>
</RAGFlowFormItem>
<RAGFlowFormItem
name="security.max_body_size"
label={t('flow.webhook.maxBodySize')}
>
<SelectWithSearch
options={buildOptions(WebhookMaxBodySize)}
></SelectWithSearch>
</RAGFlowFormItem>
<DynamicStringForm
name="security.ip_whitelist"
label={t('flow.webhook.ipWhitelist')}
></DynamicStringForm>
</section>
</Collapse>
<RAGFlowFormItem
name="schema"
label={t('flow.webhook.schema')}
className="hidden"
>
<Textarea></Textarea>
</RAGFlowFormItem>
<RAGFlowFormItem
name="execution_mode"
label={t('flow.webhook.executionMode')}
>
<SelectWithSearch
options={buildOptions(WebhookExecutionMode)}
></SelectWithSearch>
</RAGFlowFormItem>
{executionMode === WebhookExecutionMode.Immediately && (
<WebhookResponse></WebhookResponse>
)}
<Separator></Separator>
<section className="flex justify-between items-center">
Schema
<Button variant={'ghost'} onClick={showSchemaDialog}>
{t('flow.structuredOutput.configuration')}
</Button>
</section>
<SchemaPanel value={schema}></SchemaPanel>
{schemaDialogVisible && (
<SchemaDialog
initialValues={schema}
hideModal={hideSchemaDialog}
onOk={handleSchemaDialogOk}
pattern={''}
></SchemaDialog>
)}
</>
);
}

View file

@ -0,0 +1,30 @@
import { Collapse } from '@/components/collapse';
import { RAGFlowFormItem } from '@/components/ragflow-form';
import { Input } from '@/components/ui/input';
import { useTranslation } from 'react-i18next';
import { DynamicResponse } from './dynamic-response';
export function WebhookResponse() {
const { t } = useTranslation();
return (
<Collapse title={<div>Response</div>}>
<section className="space-y-4">
<RAGFlowFormItem
name={'response.status'}
label={t('flow.webhook.status')}
>
<Input type="number"></Input>
</RAGFlowFormItem>
<DynamicResponse
name="response.headers_template"
label={t('flow.webhook.headersTemplate')}
></DynamicResponse>
<DynamicResponse
name="response.body_template"
label={t('flow.webhook.bodyTemplate')}
></DynamicResponse>
</section>
</Collapse>
);
}

View file

@ -0,0 +1,46 @@
import { RAGFlowFormItem } from '@/components/ragflow-form';
import { Button } from '@/components/ui/button';
import { Input } from '@/components/ui/input';
import { Trash2 } from 'lucide-react';
import { useFieldArray, useFormContext } from 'react-hook-form';
import { DynamicFormHeader, FormListHeaderProps } from './dynamic-fom-header';
type DynamicStringFormProps = { name: string } & FormListHeaderProps;
export function DynamicStringForm({ name, label }: DynamicStringFormProps) {
const form = useFormContext();
const { fields, append, remove } = useFieldArray({
name: name,
control: form.control,
});
return (
<section>
<DynamicFormHeader
label={label}
onClick={() => append({ value: '' })}
></DynamicFormHeader>
<div className="space-y-4">
{fields.map((field, index) => (
<div key={field.id} className="flex items-center gap-2">
<RAGFlowFormItem
name={`${name}.${index}.value`}
label="delimiter"
labelClassName="!hidden"
className="flex-1 !m-0"
>
<Input className="!m-0"></Input>
</RAGFlowFormItem>
<Button
type="button"
variant={'ghost'}
onClick={() => remove(index)}
>
<Trash2 />
</Button>
</div>
))}
</div>
</section>
);
}

View file

@ -3,6 +3,7 @@ import {
JsonSchemaVisualizer,
SchemaVisualEditor,
} from '@/components/jsonjoy-builder';
import { KeyInputProps } from '@/components/jsonjoy-builder/components/schema-editor/interface';
import { Button } from '@/components/ui/button';
import {
Dialog,
@ -16,11 +17,12 @@ import { IModalProps } from '@/interfaces/common';
import { useCallback, useState } from 'react';
import { useTranslation } from 'react-i18next';
export function StructuredOutputDialog({
export function SchemaDialog({
hideModal,
onOk,
initialValues,
}: IModalProps<any>) {
pattern,
}: IModalProps<any> & KeyInputProps) {
const { t } = useTranslation();
const [schema, setSchema] = useState<JSONSchema>(initialValues);
@ -36,7 +38,11 @@ export function StructuredOutputDialog({
</DialogHeader>
<section className="flex overflow-auto">
<div className="flex-1">
<SchemaVisualEditor schema={schema} onChange={setSchema} />
<SchemaVisualEditor
schema={schema}
onChange={setSchema}
pattern={pattern}
/>
</div>
<div className="flex-1">
<JsonSchemaVisualizer schema={schema} onChange={setSchema} />

View file

@ -1,6 +1,6 @@
import { JSONSchema, JsonSchemaVisualizer } from '@/components/jsonjoy-builder';
export function StructuredOutputPanel({ value }: { value: JSONSchema }) {
export function SchemaPanel({ value }: { value: JSONSchema }) {
return (
<section className="h-48">
<JsonSchemaVisualizer

View file

@ -2,7 +2,9 @@ import { getStructuredDatatype } from '@/utils/canvas-util';
import { get, isPlainObject } from 'lodash';
import { ReactNode, useCallback } from 'react';
import {
AgentDialogueMode,
AgentStructuredOutputField,
BeginId,
JsonSchemaDataType,
Operator,
} from '../constant';
@ -16,36 +18,94 @@ function getNodeId(value: string) {
}
export function useShowSecondaryMenu() {
const { getOperatorTypeFromId } = useGraphStore((state) => state);
const { getOperatorTypeFromId, getNode } = useGraphStore((state) => state);
const showSecondaryMenu = useCallback(
(value: string, outputLabel: string) => {
const nodeId = getNodeId(value);
return (
getOperatorTypeFromId(nodeId) === Operator.Agent &&
const operatorType = getOperatorTypeFromId(nodeId);
// For Agent nodes, show secondary menu for 'structured' field
if (
operatorType === Operator.Agent &&
outputLabel === AgentStructuredOutputField
);
) {
return true;
}
// For Begin nodes in webhook mode, show secondary menu for schema properties (body, headers, query, etc.)
if (operatorType === Operator.Begin) {
const node = getNode(nodeId);
const mode = get(node, 'data.form.mode');
if (mode === AgentDialogueMode.Webhook) {
// Check if this output field is from the schema
const outputs = get(node, 'data.form.outputs', {});
const outputField = outputs[outputLabel];
// Show secondary menu if the field is an object or has properties
return (
outputField &&
(outputField.type === 'object' ||
(outputField.properties &&
Object.keys(outputField.properties).length > 0))
);
}
}
return false;
},
[getOperatorTypeFromId],
[getOperatorTypeFromId, getNode],
);
return showSecondaryMenu;
}
function useGetBeginOutputsOrSchema() {
const { getNode } = useGraphStore((state) => state);
const getBeginOutputs = useCallback(() => {
const node = getNode(BeginId);
const outputs = get(node, 'data.form.outputs', {});
return outputs;
}, [getNode]);
const getBeginSchema = useCallback(() => {
const node = getNode(BeginId);
const outputs = get(node, 'data.form.schema', {});
return outputs;
}, [getNode]);
return { getBeginOutputs, getBeginSchema };
}
export function useGetStructuredOutputByValue() {
const { getNode } = useGraphStore((state) => state);
const { getNode, getOperatorTypeFromId } = useGraphStore((state) => state);
const { getBeginOutputs } = useGetBeginOutputsOrSchema();
const getStructuredOutput = useCallback(
(value: string) => {
const node = getNode(getNodeId(value));
const structuredOutput = get(
node,
`data.form.outputs.${AgentStructuredOutputField}`,
);
const nodeId = getNodeId(value);
const node = getNode(nodeId);
const operatorType = getOperatorTypeFromId(nodeId);
const fields = splitValue(value);
const outputLabel = fields.at(1);
let structuredOutput;
if (operatorType === Operator.Agent) {
structuredOutput = get(
node,
`data.form.outputs.${AgentStructuredOutputField}`,
);
} else if (operatorType === Operator.Begin) {
// For Begin nodes in webhook mode, get the specific schema property
const outputs = getBeginOutputs();
if (outputLabel) {
structuredOutput = outputs[outputLabel];
}
}
return structuredOutput;
},
[getNode],
[getBeginOutputs, getNode, getOperatorTypeFromId],
);
return getStructuredOutput;
@ -66,13 +126,14 @@ export function useFindAgentStructuredOutputLabel() {
icon?: ReactNode;
}>,
) => {
// agent structured output
const fields = splitValue(value);
const operatorType = getOperatorTypeFromId(fields.at(0));
// Handle Agent structured fields
if (
getOperatorTypeFromId(fields.at(0)) === Operator.Agent &&
operatorType === Operator.Agent &&
fields.at(1)?.startsWith(AgentStructuredOutputField)
) {
// is agent structured output
const agentOption = options.find((x) => value.includes(x.value));
const jsonSchemaFields = fields
.at(1)
@ -84,6 +145,19 @@ export function useFindAgentStructuredOutputLabel() {
value: value,
};
}
// Handle Begin webhook fields
if (operatorType === Operator.Begin && fields.at(1)) {
const fieldOption = options
.filter((x) => x.parentLabel === BeginId)
.find((x) => value.startsWith(x.value));
return {
...fieldOption,
label: fields.at(1),
value: value,
};
}
},
[getOperatorTypeFromId],
);
@ -94,6 +168,7 @@ export function useFindAgentStructuredOutputLabel() {
export function useFindAgentStructuredOutputTypeByValue() {
const { getOperatorTypeFromId } = useGraphStore((state) => state);
const filterStructuredOutput = useGetStructuredOutputByValue();
const { getBeginSchema } = useGetBeginOutputsOrSchema();
const findTypeByValue = useCallback(
(
@ -136,10 +211,12 @@ export function useFindAgentStructuredOutputTypeByValue() {
}
const fields = splitValue(value);
const nodeId = fields.at(0);
const operatorType = getOperatorTypeFromId(nodeId);
const jsonSchema = filterStructuredOutput(value);
// Handle Agent structured fields
if (
getOperatorTypeFromId(nodeId) === Operator.Agent &&
operatorType === Operator.Agent &&
fields.at(1)?.startsWith(AgentStructuredOutputField)
) {
const jsonSchemaFields = fields
@ -151,13 +228,32 @@ export function useFindAgentStructuredOutputTypeByValue() {
return type;
}
}
// Handle Begin webhook fields (body, headers, query, etc.)
if (operatorType === Operator.Begin) {
const outputLabel = fields.at(1);
const schema = getBeginSchema();
if (outputLabel && schema) {
const jsonSchemaFields = fields.at(1);
if (jsonSchemaFields) {
const type = findTypeByValue(schema, jsonSchemaFields);
return type;
}
}
}
},
[filterStructuredOutput, findTypeByValue, getOperatorTypeFromId],
[
filterStructuredOutput,
findTypeByValue,
getBeginSchema,
getOperatorTypeFromId,
],
);
return findAgentStructuredOutputTypeByValue;
}
// TODO: Consider merging with useFindAgentStructuredOutputLabel
export function useFindAgentStructuredOutputLabelByValue() {
const { getNode } = useGraphStore((state) => state);

View file

@ -314,10 +314,12 @@ export function useFilterQueryVariableOptionsByTypes({
? toLower(y.type).includes(toLower(x))
: toLower(y.type) === toLower(x),
) ||
// agent structured output
isAgentStructured(
y.value,
y.value.slice(-AgentStructuredOutputField.length),
), // agent structured output
) ||
y.value.startsWith(BeginId), // begin node outputs
),
};
})

View file

@ -24,6 +24,7 @@ import {
import pipe from 'lodash/fp/pipe';
import isObject from 'lodash/isObject';
import {
AgentDialogueMode,
CategorizeAnchorPointPositions,
FileType,
FileTypeSuffixMap,
@ -34,6 +35,7 @@ import {
Operator,
TypesWithArray,
} from './constant';
import { BeginFormSchemaType } from './form/begin-form';
import { DataOperationsFormSchemaType } from './form/data-operations-form';
import { ExtractorFormSchemaType } from './form/extractor-form';
import { HierarchicalMergerFormSchemaType } from './form/hierarchical-merger-form';
@ -312,6 +314,41 @@ function transformDataOperationsParams(params: DataOperationsFormSchemaType) {
};
}
export function transformArrayToObject(
list?: Array<{ key: string; value: string }>,
) {
if (!Array.isArray(list)) return {};
return list?.reduce<Record<string, any>>((pre, cur) => {
if (cur.key) {
pre[cur.key] = cur.value;
}
return pre;
}, {});
}
function transformBeginParams(params: BeginFormSchemaType) {
if (params.mode === AgentDialogueMode.Webhook) {
return {
...params,
security: {
...params.security,
ip_whitelist: params.security?.ip_whitelist.map((x) => x.value),
},
response: {
...params.response,
headers_template: transformArrayToObject(
params.response?.headers_template,
),
body_template: transformArrayToObject(params.response?.body_template),
},
};
}
return {
...params,
};
}
// construct a dsl based on the node information of the graph
export const buildDslComponentsByGraph = (
nodes: RAGFlowNodeType[],
@ -361,6 +398,9 @@ export const buildDslComponentsByGraph = (
case Operator.DataOperations:
params = transformDataOperationsParams(params);
break;
case Operator.Begin:
params = transformBeginParams(params);
break;
default:
break;
}

View file

@ -7,14 +7,18 @@ import { z } from 'zod';
import { CrossLanguageFormField } from '@/components/cross-language-form-field';
import { FormContainer } from '@/components/form-container';
import {
initialTopKValue,
MetadataFilter,
MetadataFilterSchema,
} from '@/components/metadata-filter';
import {
RerankFormFields,
initialTopKValue,
topKSchema,
} from '@/components/rerank';
import {
SimilaritySliderFormField,
initialSimilarityThresholdValue,
initialVectorSimilarityWeightValue,
SimilaritySliderFormField,
similarityThresholdSchema,
vectorSimilarityWeightSchema,
} from '@/components/similarity-slider';
@ -33,6 +37,7 @@ import { trim } from 'lodash';
import { Send } from 'lucide-react';
import { useEffect } from 'react';
import { useTranslation } from 'react-i18next';
import { useParams } from 'umi';
type TestingFormProps = Pick<
ReturnType<typeof useTestRetrieval>,
@ -45,6 +50,8 @@ export default function TestingForm({
setValues,
}: TestingFormProps) {
const { t } = useTranslation();
const { id } = useParams();
const knowledgeBaseId = id;
const formSchema = z.object({
question: z.string().min(1, {
@ -54,6 +61,8 @@ export default function TestingForm({
...vectorSimilarityWeightSchema,
...topKSchema,
use_kg: z.boolean().optional(),
kb_ids: z.array(z.string()).optional(),
...MetadataFilterSchema,
});
const form = useForm<z.infer<typeof formSchema>>({
@ -63,6 +72,7 @@ export default function TestingForm({
...initialVectorSimilarityWeightValue,
...initialTopKValue,
use_kg: false,
kb_ids: [knowledgeBaseId],
},
});
@ -90,6 +100,7 @@ export default function TestingForm({
<CrossLanguageFormField
name={'cross_languages'}
></CrossLanguageFormField>
<MetadataFilter prefix=""></MetadataFilter>
</FormContainer>
<FormField
control={form.control}

View file

@ -202,7 +202,7 @@ export default function SearchingView({
<div className="w-full flex flex-col">
<div className="w-full highlightContent">
<ImageWithPopover
id={chunk.img_id}
id={chunk.image_id || chunk.img_id}
></ImageWithPopover>
<Popover>
<PopoverTrigger asChild>