diff --git a/api/apps/conversation_app.py b/api/apps/conversation_app.py
index a2ac131f3..01e20354f 100644
--- a/api/apps/conversation_app.py
+++ b/api/apps/conversation_app.py
@@ -402,7 +402,7 @@ async def related_questions():
     if "parameter" in gen_conf:
         del gen_conf["parameter"]
     prompt = load_prompt("related_question")
-    ans = chat_mdl.chat(
+    ans = await chat_mdl.async_chat(
         prompt,
         [
             {
diff --git a/api/apps/sdk/session.py b/api/apps/sdk/session.py
index ee81be7b7..e94f14fcc 100644
--- a/api/apps/sdk/session.py
+++ b/api/apps/sdk/session.py
@@ -788,7 +788,7 @@ Reason:
  - At the same time, related terms can also help search engines better understand user needs and return more accurate search results.
 
 """
-    ans = chat_mdl.chat(
+    ans = await chat_mdl.async_chat(
         prompt,
         [
             {
@@ -1070,7 +1070,7 @@ async def related_questions_embedded():
 
     gen_conf = search_config.get("llm_setting", {"temperature": 0.9})
     prompt = load_prompt("related_question")
-    ans = chat_mdl.chat(
+    ans = await chat_mdl.async_chat(
         prompt,
         [
             {
diff --git a/rag/llm/chat_model.py b/rag/llm/chat_model.py
index 1f38292ba..953b73942 100644
--- a/rag/llm/chat_model.py
+++ b/rag/llm/chat_model.py
@@ -1765,12 +1765,17 @@ class LiteLLMBase(ABC):
 
             yield ans, tol
 
-    async def async_chat(self, history, gen_conf, **kwargs):
-        logging.info("[HISTORY]" + json.dumps(history, ensure_ascii=False, indent=2))
+    async def async_chat(self, system, history, gen_conf, **kwargs):
+        hist = list(history) if history else []
+        if system:
+            if not hist or hist[0].get("role") != "system":
+                hist.insert(0, {"role": "system", "content": system})
+
+        logging.info("[HISTORY]" + json.dumps(hist, ensure_ascii=False, indent=2))
         if self.model_name.lower().find("qwen3") >= 0:
             kwargs["extra_body"] = {"enable_thinking": False}
 
-        completion_args = self._construct_completion_args(history=history, stream=False, tools=False, **gen_conf)
+        completion_args = self._construct_completion_args(history=hist, stream=False, tools=False, **gen_conf)
 
         for attempt in range(self.max_retries + 1):
             try: