Merge branch 'main' into main

2025-11-17 09:52:33 +08:00 · 2025-11-17 09:52:33 +08:00 · 7a499e3771
commit 7a499e3771
parent dc299526d2 13e212c856
53 changed files with 2213 additions and 220 deletions
--- a/4
+++ b/4
@ -51,7 +51,9 @@ RUN --mount=type=cache,id=ragflow_apt,target=/var/cache/apt,sharing=locked \
    apt install -y libpython3-dev libgtk-4-1 libnss3 xdg-utils libgbm-dev && \
    apt install -y libjemalloc-dev && \
    apt install -y python3-pip pipx nginx unzip curl wget git vim less && \
-    apt install -y ghostscript
+    apt install -y ghostscript && \
+    apt install -y pandoc && \
+    apt install -y texlive

 RUN if [ "$NEED_MIRROR" == "1" ]; then \
        pip3 config set global.index-url https://pypi.tuna.tsinghua.edu.cn/simple && \
--- a/admin/client/admin_client.py
+++ b/admin/client/admin_client.py
@ -393,7 +393,9 @@ class AdminCLI(Cmd):
                print(f"Can't access {self.host}, port: {self.port}")

    def _format_service_detail_table(self, data):
-        if not any([isinstance(v, list) for v in data.values()]):
+        if isinstance(data, list):
+            return data
+        if not all([isinstance(v, list) for v in data.values()]):
            # normal table
            return data
        # handle task_executor heartbeats map, for example {'name': [{'done': 2, 'now': timestamp1}, {'done': 3, 'now': timestamp2}]
@ -404,7 +406,7 @@ class AdminCLI(Cmd):
            task_executor_list.append({
                "task_executor_name": k,
                **heartbeats[0],
-            })
+            } if heartbeats else {"task_executor_name": k})
        return task_executor_list

    def _print_table_simple(self, data):
@ -415,7 +417,8 @@ class AdminCLI(Cmd):
            # handle single row data
            data = [data]

-        columns = list(data[0].keys())
+        columns = list(set().union(*(d.keys() for d in data)))
+        columns.sort()
        col_widths = {}

        def get_string_width(text):
--- a/agent/canvas.py
+++ b/agent/canvas.py
@ -406,6 +406,10 @@ class Canvas(Graph):
                    else:
                        yield decorate("message", {"content": cpn_obj.output("content")})
                        cite = re.search(r"\[ID:[ 0-9]+\]",  cpn_obj.output("content"))
+
+                    if isinstance(cpn_obj.output("attachment"), tuple):
+                        yield decorate("message", {"attachment": cpn_obj.output("attachment")})
+                        
                    yield decorate("message_end", {"reference": self.get_reference() if cite else None})

                    while partials:
--- a/agent/component/base.py
+++ b/agent/component/base.py
@ -463,12 +463,15 @@ class ComponentBase(ABC):
        return self._param.outputs.get("_ERROR", {}).get("value")

    def reset(self, only_output=False):
-        for k in self._param.outputs.keys():
-            self._param.outputs[k]["value"] = None
+        outputs: dict = self._param.outputs # for better performance
+        for k in outputs.keys():
+            outputs[k]["value"] = None
        if only_output:
            return
-        for k in self._param.inputs.keys():
-            self._param.inputs[k]["value"] = None
+
+        inputs: dict = self._param.inputs # for better performance
+        for k in inputs.keys():
+            inputs[k]["value"] = None
        self._param.debug_inputs = {}

    def get_input(self, key: str=None) -> Union[Any, dict[str, Any]]:
--- a/agent/component/list_operations.py
+++ b/agent/component/list_operations.py
@ -0,0 +1,166 @@
+from abc import ABC
+import os
+from agent.component.base import ComponentBase, ComponentParamBase
+from api.utils.api_utils import timeout
+
+class ListOperationsParam(ComponentParamBase):
+    """
+    Define the List Operations component parameters.
+    """
+    def __init__(self):
+        super().__init__()
+        self.query = ""
+        self.operations = "topN"
+        self.n=0
+        self.sort_method = "asc"
+        self.filter = {
+            "operator": "=",
+            "value": ""
+        }
+        self.outputs = {
+            "result": {
+                "value": [],
+                "type": "Array of ?"
+            },
+            "first": {
+                "value": "",
+                "type": "?"
+            },
+            "last": {
+                "value": "",
+                "type": "?"
+            }
+        }
+    
+    def check(self):
+        self.check_empty(self.query, "query")
+        self.check_valid_value(self.operations, "Support operations", ["topN","head","tail","filter","sort","drop_duplicates"])
+
+    def get_input_form(self) -> dict[str, dict]:
+        return {}
+    
+
+class ListOperations(ComponentBase,ABC):
+    component_name = "ListOperations"
+
+    @timeout(int(os.environ.get("COMPONENT_EXEC_TIMEOUT", 10*60)))
+    def _invoke(self, **kwargs):
+        self.input_objects=[]
+        inputs = getattr(self._param, "query", None)
+        self.inputs=self._canvas.get_variable_value(inputs)
+        self.set_input_value(inputs, self.inputs)
+        if self._param.operations == "topN":
+            self._topN()
+        elif self._param.operations == "head":
+            self._head()
+        elif self._param.operations == "tail":
+            self._tail()
+        elif self._param.operations == "filter":
+            self._filter()
+        elif self._param.operations == "sort":
+            self._sort()
+        elif self._param.operations == "drop_duplicates":
+            self._drop_duplicates()
+
+
+    def _coerce_n(self):
+        try:
+            return int(getattr(self._param, "n", 0))
+        except Exception:
+            return 0
+        
+    def _set_outputs(self, outputs):
+        self._param.outputs["result"]["value"] = outputs
+        self._param.outputs["first"]["value"] = outputs[0] if outputs else None
+        self._param.outputs["last"]["value"]  = outputs[-1] if outputs else None
+        
+    def _topN(self):
+        n = self._coerce_n()
+        if n < 1:
+            outputs = []
+        else:
+            n = min(n, len(self.inputs))
+            outputs = self.inputs[:n]
+        self._set_outputs(outputs)
+
+    def _head(self):
+        n = self._coerce_n()
+        if 1 <= n <= len(self.inputs):
+            outputs = [self.inputs[n - 1]]
+        else:
+            outputs = []
+        self._set_outputs(outputs)
+
+    def _tail(self):
+        n = self._coerce_n()
+        if 1 <= n <= len(self.inputs):
+            outputs = [self.inputs[-n]]
+        else:
+            outputs = []
+        self._set_outputs(outputs)
+
+    def _filter(self):
+        self._set_outputs([i for i in self.inputs if self._eval(self._norm(i),self._param.filter["operator"],self._param.filter["value"])])
+
+    def _norm(self,v):
+        s = "" if v is None else str(v)
+        return s
+    
+    def _eval(self, v, operator, value):
+        if operator == "=":
+            return v == value
+        elif operator == "≠":
+            return v != value
+        elif operator == "contains":
+            return value in v
+        elif operator == "start with":
+            return v.startswith(value)
+        elif operator == "end with":
+            return v.endswith(value)
+        else:
+            return False
+
+    def _sort(self):
+        items = self.inputs or []
+        method = getattr(self._param, "sort_method", "asc") or "asc"
+        reverse = method == "desc"
+
+        if not items:
+            self._set_outputs([])
+            return
+
+        first = items[0]
+
+        if isinstance(first, dict):
+            outputs = sorted(
+                items,
+                key=lambda x: self._hashable(x),
+                reverse=reverse,
+            )
+        else:
+            outputs = sorted(items, reverse=reverse)
+
+        self._set_outputs(outputs)
+
+    def _drop_duplicates(self):
+        seen = set()
+        outs = []
+        for item in self.inputs:
+            k = self._hashable(item)
+            if k in seen:
+                continue
+            seen.add(k)
+            outs.append(item)
+        self._set_outputs(outs)
+
+    def _hashable(self,x):
+        if isinstance(x, dict):
+            return tuple(sorted((k, self._hashable(v)) for k, v in x.items()))
+        if isinstance(x, (list, tuple)):
+            return tuple(self._hashable(v) for v in x)
+        if isinstance(x, set):
+            return tuple(sorted(self._hashable(v) for v in x))
+        return x
+    
+    def thoughts(self) -> str:
+        return "ListOperation in progress"
--- a/agent/component/message.py
+++ b/agent/component/message.py
@ -17,6 +17,9 @@ import json
 import os
 import random
 import re
+import pypandoc
+import logging
+import tempfile
 from functools import partial
 from typing import Any

@ -24,7 +27,8 @@ from agent.component.base import ComponentBase, ComponentParamBase
 from jinja2 import Template as Jinja2Template

 from common.connection_utils import timeout
-
+from common.misc_utils import get_uuid
+from common import settings

 class MessageParam(ComponentParamBase):
    """
@ -34,6 +38,7 @@ class MessageParam(ComponentParamBase):
        super().__init__()
        self.content = []
        self.stream = True
+        self.output_format = None  # default output format
        self.outputs = {
            "content": {
                "type": "str"
@ -133,6 +138,7 @@ class Message(ComponentBase):
            yield rand_cnt[s: ]

        self.set_output("content", all_content)
+        self._convert_content(all_content)

    def _is_jinjia2(self, content:str) -> bool:
        patt = [
@ -164,6 +170,68 @@ class Message(ComponentBase):
            content = re.sub(n, v, content)

        self.set_output("content", content)
+        self._convert_content(content)

    def thoughts(self) -> str:
        return ""
+
+    def _convert_content(self, content):
+        doc_id = get_uuid()
+        
+        if self._param.output_format.lower() not in {"markdown", "html", "pdf", "docx"}:
+            self._param.output_format = "markdown"
+
+        try:
+            if self._param.output_format in {"markdown", "html"}:
+                if isinstance(content, str):
+                    converted = pypandoc.convert_text(
+                        content,
+                        to=self._param.output_format,
+                        format="markdown",
+                    )
+                else:
+                    converted = pypandoc.convert_file(
+                        content,
+                        to=self._param.output_format,
+                        format="markdown",
+                    )
+
+                binary_content = converted.encode("utf-8")
+
+            else:  # pdf, docx
+                with tempfile.NamedTemporaryFile(suffix=f".{self._param.output_format}", delete=False) as tmp:
+                    tmp_name = tmp.name
+
+                try:
+                    if isinstance(content, str):
+                        pypandoc.convert_text(
+                            content,
+                            to=self._param.output_format,
+                            format="markdown",
+                            outputfile=tmp_name,
+                        )
+                    else:
+                        pypandoc.convert_file(
+                            content,
+                            to=self._param.output_format,
+                            format="markdown",
+                            outputfile=tmp_name,
+                        )
+
+                    with open(tmp_name, "rb") as f:
+                        binary_content = f.read()
+
+                finally:
+                    if os.path.exists(tmp_name):
+                        os.remove(tmp_name)
+
+            settings.STORAGE_IMPL.put(self._canvas._tenant_id, doc_id, binary_content)
+            self.set_output("attachment", {
+                "doc_id":doc_id, 
+                "format":self._param.output_format, 
+                "file_name":f"{doc_id[:8]}.{self._param.output_format}"})
+
+            logging.info(f"Converted content uploaded as {doc_id} (format={self._param.output_format})")
+
+        except Exception as e:
+            logging.error(f"Error converting content to {self._param.output_format}: {e}")
--- a/api/apps/document_app.py
+++ b/api/apps/document_app.py
@ -509,6 +509,7 @@ async def get(doc_id):
        ext = ext.group(1) if ext else None
        if ext:
            if doc.type == FileType.VISUAL.value:
+
                content_type = CONTENT_TYPE_MAP.get(ext, f"image/{ext}")
            else:
                content_type = CONTENT_TYPE_MAP.get(ext, f"application/{ext}")
@ -518,6 +519,22 @@ async def get(doc_id):
        return server_error_response(e)


+@manager.route("/download/<attachment_id>", methods=["GET"])  # noqa: F821
+@login_required
+def download_attachment(attachment_id):
+    try:
+        ext = request.args.get("ext", "markdown")
+        data = settings.STORAGE_IMPL.get(current_user.id, attachment_id)
+        # data = settings.STORAGE_IMPL.get("eb500d50bb0411f0907561d2782adda5", attachment_id)
+        response = flask.make_response(data)
+        response.headers.set("Content-Type", CONTENT_TYPE_MAP.get(ext, f"application/{ext}"))
+
+        return response
+
+    except Exception as e:
+        return server_error_response(e)
+
+
@manager.route("/change_parser", methods=["POST"])  # noqa: F821
@login_required
@validate_request("doc_id")
--- a/api/db/db_models.py
+++ b/api/db/db_models.py
@ -305,6 +305,7 @@ class RetryingPooledMySQLDatabase(PooledMySQLDatabase):
                    time.sleep(self.retry_delay * (2 ** attempt))
                else:
                    raise
+        return None


 class RetryingPooledPostgresqlDatabase(PooledPostgresqlDatabase):
@ -772,7 +773,7 @@ class Document(DataBaseModel):
    thumbnail = TextField(null=True, help_text="thumbnail base64 string")
    kb_id = CharField(max_length=256, null=False, index=True)
    parser_id = CharField(max_length=32, null=False, help_text="default parser ID", index=True)
-    pipeline_id = CharField(max_length=32, null=True, help_text="pipleline ID", index=True)
+    pipeline_id = CharField(max_length=32, null=True, help_text="pipeline ID", index=True)
    parser_config = JSONField(null=False, default={"pages": [[1, 1000000]]})
    source_type = CharField(max_length=128, null=False, default="local", help_text="where dose this document come from", index=True)
    type = CharField(max_length=32, null=False, help_text="file extension", index=True)
@ -876,7 +877,7 @@ class Dialog(DataBaseModel):
 class Conversation(DataBaseModel):
    id = CharField(max_length=32, primary_key=True)
    dialog_id = CharField(max_length=32, null=False, index=True)
-    name = CharField(max_length=255, null=True, help_text="converastion name", index=True)
+    name = CharField(max_length=255, null=True, help_text="conversation name", index=True)
    message = JSONField(null=True)
    reference = JSONField(null=True, default=[])
    user_id = CharField(max_length=255, null=True, help_text="user_id", index=True)
--- a/api/db/services/connector_service.py
+++ b/api/db/services/connector_service.py
@ -70,7 +70,7 @@ class ConnectorService(CommonService):
    def rebuild(cls, kb_id:str, connector_id: str, tenant_id:str):
        e, conn = cls.get_by_id(connector_id)
        if not e:
-            return
+            return None
        SyncLogsService.filter_delete([SyncLogs.connector_id==connector_id, SyncLogs.kb_id==kb_id])
        docs = DocumentService.query(source_type=f"{conn.source}/{conn.id}", kb_id=kb_id)
        err = FileService.delete_docs([d.id for d in docs], tenant_id)
@ -125,11 +125,11 @@ class SyncLogsService(CommonService):
            )

        query = query.distinct().order_by(cls.model.update_time.desc())
-        totbal = query.count()
+        total = query.count()
        if page_number:
            query = query.paginate(page_number, items_per_page)

-        return list(query.dicts()), totbal
+        return list(query.dicts()), total

    @classmethod
    def start(cls, id, connector_id):
--- a/api/db/services/dialog_service.py
+++ b/api/db/services/dialog_service.py
@ -342,7 +342,7 @@ def chat(dialog, messages, stream=True, **kwargs):
    if not dialog.kb_ids and not dialog.prompt_config.get("tavily_api_key"):
        for ans in chat_solo(dialog, messages, stream):
            yield ans
-        return
+        return None

    chat_start_ts = timer()

@ -386,7 +386,7 @@ def chat(dialog, messages, stream=True, **kwargs):
        ans = use_sql(questions[-1], field_map, dialog.tenant_id, chat_mdl, prompt_config.get("quote", True), dialog.kb_ids)
        if ans:
            yield ans
-            return
+            return None

    for p in prompt_config["parameters"]:
        if p["key"] == "knowledge":
@ -617,6 +617,8 @@ def chat(dialog, messages, stream=True, **kwargs):
        res["audio_binary"] = tts(tts_mdl, answer)
        yield res

+    return None
+

 def use_sql(question, field_map, tenant_id, chat_mdl, quota=True, kb_ids=None):
    sys_prompt = """
@ -745,7 +747,7 @@ Please write the SQL, only SQL, without any other explanations or text.

 def tts(tts_mdl, text):
    if not tts_mdl or not text:
-        return
+        return None
    bin = b""
    for chunk in tts_mdl.tts(text):
        bin += chunk
--- a/api/db/services/document_service.py
+++ b/api/db/services/document_service.py
@ -113,7 +113,7 @@ class DocumentService(CommonService):
    def check_doc_health(cls, tenant_id: str, filename):
        import os
        MAX_FILE_NUM_PER_USER = int(os.environ.get("MAX_FILE_NUM_PER_USER", 0))
-        if MAX_FILE_NUM_PER_USER > 0 and DocumentService.get_doc_count(tenant_id) >= MAX_FILE_NUM_PER_USER:
+        if 0 < MAX_FILE_NUM_PER_USER <= DocumentService.get_doc_count(tenant_id):
            raise RuntimeError("Exceed the maximum file number of a free user!")
        if len(filename.encode("utf-8")) > FILE_NAME_LEN_LIMIT:
            raise RuntimeError("Exceed the maximum length of file name!")
@ -464,7 +464,7 @@ class DocumentService(CommonService):
            cls.model.id == doc_id, Knowledgebase.status == StatusEnum.VALID.value)
        docs = docs.dicts()
        if not docs:
-            return
+            return None
        return docs[0]["tenant_id"]

    @classmethod
@ -473,7 +473,7 @@ class DocumentService(CommonService):
        docs = cls.model.select(cls.model.kb_id).where(cls.model.id == doc_id)
        docs = docs.dicts()
        if not docs:
-            return
+            return None
        return docs[0]["kb_id"]

    @classmethod
@ -486,7 +486,7 @@ class DocumentService(CommonService):
            cls.model.name == name, Knowledgebase.status == StatusEnum.VALID.value)
        docs = docs.dicts()
        if not docs:
-            return
+            return None
        return docs[0]["tenant_id"]

    @classmethod
@ -533,7 +533,7 @@ class DocumentService(CommonService):
            cls.model.id == doc_id, Knowledgebase.status == StatusEnum.VALID.value)
        docs = docs.dicts()
        if not docs:
-            return
+            return None
        return docs[0]["embd_id"]

    @classmethod
@ -569,7 +569,7 @@ class DocumentService(CommonService):
            .where(cls.model.name == doc_name)
        doc_id = doc_id.dicts()
        if not doc_id:
-            return
+            return None
        return doc_id[0]["id"]

    @classmethod
@ -715,7 +715,7 @@ class DocumentService(CommonService):
                    prg = 1
                    status = TaskStatus.DONE.value

-                # only for special task and parsed docs and unfinised
+                # only for special task and parsed docs and unfinished
                freeze_progress = special_task_running and doc_progress >= 1 and not finished
                msg = "\n".join(sorted(msg))
                info = {
@ -974,13 +974,13 @@ def doc_upload_and_parse(conversation_id, file_objs, user_id):

    def embedding(doc_id, cnts, batch_size=16):
        nonlocal embd_mdl, chunk_counts, token_counts
-        vects = []
+        vectors = []
        for i in range(0, len(cnts), batch_size):
            vts, c = embd_mdl.encode(cnts[i: i + batch_size])
-            vects.extend(vts.tolist())
+            vectors.extend(vts.tolist())
            chunk_counts[doc_id] += len(cnts[i:i + batch_size])
            token_counts[doc_id] += c
-        return vects
+        return vectors

    idxnm = search.index_name(kb.tenant_id)
    try_create_idx = True
@ -1011,15 +1011,15 @@ def doc_upload_and_parse(conversation_id, file_objs, user_id):
            except Exception:
                logging.exception("Mind map generation error")

-        vects = embedding(doc_id, [c["content_with_weight"] for c in cks])
-        assert len(cks) == len(vects)
+        vectors = embedding(doc_id, [c["content_with_weight"] for c in cks])
+        assert len(cks) == len(vectors)
        for i, d in enumerate(cks):
-            v = vects[i]
+            v = vectors[i]
            d["q_%d_vec" % len(v)] = v
        for b in range(0, len(cks), es_bulk_size):
            if try_create_idx:
                if not settings.docStoreConn.indexExist(idxnm, kb_id):
-                    settings.docStoreConn.createIdx(idxnm, kb_id, len(vects[0]))
+                    settings.docStoreConn.createIdx(idxnm, kb_id, len(vectors[0]))
                try_create_idx = False
            settings.docStoreConn.insert(cks[b:b + es_bulk_size], idxnm, kb_id)

--- a/api/db/services/knowledgebase_service.py
+++ b/api/db/services/knowledgebase_service.py
@ -424,6 +424,7 @@ class KnowledgebaseService(CommonService):

        # Default parser_config (align with kb_app.create) — do not accept external overrides
        payload["parser_config"] = get_parser_config(parser_id, kwargs.get("parser_config"))
+
        return payload


--- a/api/utils/email_templates.py
+++ b/api/utils/email_templates.py
@ -1,3 +1,19 @@
+#
+#  Copyright 2025 The InfiniFlow Authors. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+#
+
 """
 Reusable HTML email templates and registry.
 """
--- a/api/utils/health_utils.py
+++ b/api/utils/health_utils.py
@ -173,7 +173,8 @@ def check_task_executor_alive():
            heartbeats = [json.loads(heartbeat) for heartbeat in heartbeats]
            task_executor_heartbeats[task_executor_id] = heartbeats
        if task_executor_heartbeats:
-            return {"status": "alive", "message": task_executor_heartbeats}
+            status = "alive" if any(task_executor_heartbeats.values()) else "timeout"
+            return {"status": status, "message": task_executor_heartbeats}
        else:
            return {"status": "timeout", "message": "Not found any task executor."}
    except Exception as e:
--- a/api/utils/json_encode.py
+++ b/api/utils/json_encode.py
@ -1,3 +1,19 @@
+#
+#  Copyright 2025 The InfiniFlow Authors. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+#
+
 import datetime
 import json
 from enum import Enum, IntEnum
--- a/common/data_source/init.py
+++ b/common/data_source/init.py
@ -11,7 +11,7 @@ from .confluence_connector import ConfluenceConnector
 from .discord_connector import DiscordConnector
 from .dropbox_connector import DropboxConnector
 from .google_drive.connector import GoogleDriveConnector
-from .jira_connector import JiraConnector
+from .jira.connector import JiraConnector
 from .sharepoint_connector import SharePointConnector
 from .teams_connector import TeamsConnector
 from .config import BlobType, DocumentSource
--- a/common/data_source/config.py
+++ b/common/data_source/config.py
@ -13,6 +13,7 @@ def get_current_tz_offset() -> int:
    return round(time_diff.total_seconds() / 3600)


+ONE_MINUTE = 60
 ONE_HOUR = 3600
 ONE_DAY = ONE_HOUR * 24

@ -42,6 +43,7 @@ class DocumentSource(str, Enum):
    OCI_STORAGE = "oci_storage"
    SLACK = "slack"
    CONFLUENCE = "confluence"
+    JIRA = "jira"
    GOOGLE_DRIVE = "google_drive"
    GMAIL = "gmail"
    DISCORD = "discord"
@ -178,6 +180,21 @@ GOOGLE_DRIVE_CONNECTOR_SIZE_THRESHOLD = int(
    os.environ.get("GOOGLE_DRIVE_CONNECTOR_SIZE_THRESHOLD", 10 * 1024 * 1024)
 )

+JIRA_CONNECTOR_LABELS_TO_SKIP = [
+    ignored_tag
+    for ignored_tag in os.environ.get("JIRA_CONNECTOR_LABELS_TO_SKIP", "").split(",")
+    if ignored_tag
+]
+JIRA_CONNECTOR_MAX_TICKET_SIZE = int(
+    os.environ.get("JIRA_CONNECTOR_MAX_TICKET_SIZE", 100 * 1024)
+)
+JIRA_SYNC_TIME_BUFFER_SECONDS = int(
+    os.environ.get("JIRA_SYNC_TIME_BUFFER_SECONDS", ONE_MINUTE)
+)
+JIRA_TIMEZONE_OFFSET = float(
+    os.environ.get("JIRA_TIMEZONE_OFFSET", get_current_tz_offset())
+)
+
 OAUTH_SLACK_CLIENT_ID = os.environ.get("OAUTH_SLACK_CLIENT_ID", "")
 OAUTH_SLACK_CLIENT_SECRET = os.environ.get("OAUTH_SLACK_CLIENT_SECRET", "")
 OAUTH_CONFLUENCE_CLOUD_CLIENT_ID = os.environ.get(
--- a/common/data_source/confluence_connector.py
+++ b/common/data_source/confluence_connector.py
@ -1788,6 +1788,7 @@ class ConfluenceConnector(
        cql_url = self.confluence_client.build_cql_url(
            page_query, expand=",".join(_PAGE_EXPANSION_FIELDS)
        )
+        logging.info(f"[Confluence Connector] Building CQL URL {cql_url}")
        return update_param_in_path(cql_url, "limit", str(limit))

    @override
--- a/common/data_source/jira/init.py
+++ b/common/data_source/jira/init.py
--- a/common/data_source/jira/connector.py
+++ b/common/data_source/jira/connector.py
@ -0,0 +1,973 @@
+"""Checkpointed Jira connector that emits markdown blobs for each issue."""
+
+from __future__ import annotations
+
+import argparse
+import copy
+import logging
+import os
+import re
+from collections.abc import Callable, Generator, Iterable, Iterator, Sequence
+from datetime import datetime, timedelta, timezone
+from typing import Any
+from zoneinfo import ZoneInfo, ZoneInfoNotFoundError
+
+from jira import JIRA
+from jira.resources import Issue
+from pydantic import Field
+
+from common.data_source.config import (
+    INDEX_BATCH_SIZE,
+    JIRA_CONNECTOR_LABELS_TO_SKIP,
+    JIRA_CONNECTOR_MAX_TICKET_SIZE,
+    JIRA_TIMEZONE_OFFSET,
+    ONE_HOUR,
+    DocumentSource,
+)
+from common.data_source.exceptions import (
+    ConnectorMissingCredentialError,
+    ConnectorValidationError,
+    InsufficientPermissionsError,
+    UnexpectedValidationError,
+)
+from common.data_source.interfaces import (
+    CheckpointedConnectorWithPermSync,
+    CheckpointOutputWrapper,
+    SecondsSinceUnixEpoch,
+    SlimConnectorWithPermSync,
+)
+from common.data_source.jira.utils import (
+    JIRA_CLOUD_API_VERSION,
+    JIRA_SERVER_API_VERSION,
+    build_issue_url,
+    extract_body_text,
+    extract_named_value,
+    extract_user,
+    format_attachments,
+    format_comments,
+    parse_jira_datetime,
+    should_skip_issue,
+)
+from common.data_source.models import (
+    ConnectorCheckpoint,
+    ConnectorFailure,
+    Document,
+    DocumentFailure,
+    SlimDocument,
+)
+from common.data_source.utils import is_atlassian_cloud_url, is_atlassian_date_error, scoped_url
+
+logger = logging.getLogger(__name__)
+
+_DEFAULT_FIELDS = "summary,description,updated,created,status,priority,assignee,reporter,labels,issuetype,project,comment,attachment"
+_SLIM_FIELDS = "key,project"
+_MAX_RESULTS_FETCH_IDS = 5000
+_JIRA_SLIM_PAGE_SIZE = 500
+_JIRA_FULL_PAGE_SIZE = 50
+_DEFAULT_ATTACHMENT_SIZE_LIMIT = 10 * 1024 * 1024  # 10MB
+
+
+class JiraCheckpoint(ConnectorCheckpoint):
+    """Checkpoint that tracks which slice of the current JQL result set was emitted."""
+
+    start_at: int = 0
+    cursor: str | None = None
+    ids_done: bool = False
+    all_issue_ids: list[list[str]] = Field(default_factory=list)
+
+
+_TZ_OFFSET_PATTERN = re.compile(r"([+-])(\d{2})(:?)(\d{2})$")
+
+
+class JiraConnector(CheckpointedConnectorWithPermSync, SlimConnectorWithPermSync):
+    """Retrieve Jira issues and emit them as markdown documents."""
+
+    def __init__(
+        self,
+        jira_base_url: str,
+        project_key: str | None = None,
+        jql_query: str | None = None,
+        batch_size: int = INDEX_BATCH_SIZE,
+        include_comments: bool = True,
+        include_attachments: bool = False,
+        labels_to_skip: Sequence[str] | None = None,
+        comment_email_blacklist: Sequence[str] | None = None,
+        scoped_token: bool = False,
+        attachment_size_limit: int | None = None,
+        timezone_offset: float | None = None,
+    ) -> None:
+        if not jira_base_url:
+            raise ConnectorValidationError("Jira base URL must be provided.")
+
+        self.jira_base_url = jira_base_url.rstrip("/")
+        self.project_key = project_key
+        self.jql_query = jql_query
+        self.batch_size = batch_size
+        self.include_comments = include_comments
+        self.include_attachments = include_attachments
+        configured_labels = labels_to_skip or JIRA_CONNECTOR_LABELS_TO_SKIP
+        self.labels_to_skip = {label.lower() for label in configured_labels}
+        self.comment_email_blacklist = {email.lower() for email in comment_email_blacklist or []}
+        self.scoped_token = scoped_token
+        self.jira_client: JIRA | None = None
+
+        self.max_ticket_size = JIRA_CONNECTOR_MAX_TICKET_SIZE
+        self.attachment_size_limit = attachment_size_limit if attachment_size_limit and attachment_size_limit > 0 else _DEFAULT_ATTACHMENT_SIZE_LIMIT
+        self._fields_param = _DEFAULT_FIELDS
+        self._slim_fields = _SLIM_FIELDS
+
+        tz_offset_value = float(timezone_offset) if timezone_offset is not None else float(JIRA_TIMEZONE_OFFSET)
+        self.timezone_offset = tz_offset_value
+        self.timezone = timezone(offset=timedelta(hours=tz_offset_value))
+        self._timezone_overridden = timezone_offset is not None
+
+    # -------------------------------------------------------------------------
+    # Connector lifecycle helpers
+    # -------------------------------------------------------------------------
+
+    def load_credentials(self, credentials: dict[str, Any]) -> dict[str, Any] | None:
+        """Instantiate the Jira client using either an API token or username/password."""
+        jira_url_for_client = self.jira_base_url
+        if self.scoped_token:
+            if is_atlassian_cloud_url(self.jira_base_url):
+                try:
+                    jira_url_for_client = scoped_url(self.jira_base_url, "jira")
+                except ValueError as exc:
+                    raise ConnectorValidationError(str(exc)) from exc
+            else:
+                logger.warning(f"[Jira] Scoped token requested but Jira base URL {self.jira_base_url} does not appear to be an Atlassian Cloud domain; scoped token ignored.")
+
+        user_email = credentials.get("jira_user_email") or credentials.get("username")
+        api_token = credentials.get("jira_api_token") or credentials.get("token") or credentials.get("api_token")
+        password = credentials.get("jira_password") or credentials.get("password")
+        rest_api_version = credentials.get("rest_api_version")
+
+        if not rest_api_version:
+            rest_api_version = JIRA_CLOUD_API_VERSION if api_token else JIRA_SERVER_API_VERSION
+        options: dict[str, Any] = {"rest_api_version": rest_api_version}
+
+        try:
+            if user_email and api_token:
+                self.jira_client = JIRA(
+                    server=jira_url_for_client,
+                    basic_auth=(user_email, api_token),
+                    options=options,
+                )
+            elif api_token:
+                self.jira_client = JIRA(
+                    server=jira_url_for_client,
+                    token_auth=api_token,
+                    options=options,
+                )
+            elif user_email and password:
+                self.jira_client = JIRA(
+                    server=jira_url_for_client,
+                    basic_auth=(user_email, password),
+                    options=options,
+                )
+            else:
+                raise ConnectorMissingCredentialError("Jira credentials must include either an API token or username/password.")
+        except Exception as exc:  # pragma: no cover - jira lib raises many types
+            raise ConnectorMissingCredentialError(f"Jira: {exc}") from exc
+        self._sync_timezone_from_server()
+        return None
+
+    def validate_connector_settings(self) -> None:
+        """Validate connectivity by fetching basic Jira info."""
+        if not self.jira_client:
+            raise ConnectorMissingCredentialError("Jira")
+
+        try:
+            if self.jql_query:
+                dummy_checkpoint = self.build_dummy_checkpoint()
+                checkpoint_callback = self._make_checkpoint_callback(dummy_checkpoint)
+                iterator = self._perform_jql_search(
+                    jql=self.jql_query,
+                    start=0,
+                    max_results=1,
+                    fields="key",
+                    all_issue_ids=dummy_checkpoint.all_issue_ids,
+                    checkpoint_callback=checkpoint_callback,
+                    next_page_token=dummy_checkpoint.cursor,
+                    ids_done=dummy_checkpoint.ids_done,
+                )
+                next(iter(iterator), None)
+            elif self.project_key:
+                self.jira_client.project(self.project_key)
+            else:
+                self.jira_client.projects()
+        except Exception as exc:  # pragma: no cover - dependent on Jira responses
+            self._handle_validation_error(exc)
+
+    # -------------------------------------------------------------------------
+    # Checkpointed connector implementation
+    # -------------------------------------------------------------------------
+
+    def load_from_checkpoint(
+        self,
+        start: SecondsSinceUnixEpoch,
+        end: SecondsSinceUnixEpoch,
+        checkpoint: JiraCheckpoint,
+    ) -> Generator[Document | ConnectorFailure, None, JiraCheckpoint]:
+        """Load Jira issues, emitting a Document per issue."""
+        try:
+            return (yield from self._load_with_retry(start, end, checkpoint))
+        except Exception as exc:
+            logger.exception(f"[Jira] Jira query ultimately failed: {exc}")
+            yield ConnectorFailure(
+                failure_message=f"Failed to query Jira: {exc}",
+                exception=exc,
+            )
+            return JiraCheckpoint(has_more=False, start_at=checkpoint.start_at)
+
+    def load_from_checkpoint_with_perm_sync(
+        self,
+        start: SecondsSinceUnixEpoch,
+        end: SecondsSinceUnixEpoch,
+        checkpoint: JiraCheckpoint,
+    ) -> Generator[Document | ConnectorFailure, None, JiraCheckpoint]:
+        """Permissions are not synced separately, so reuse the standard loader."""
+        return (yield from self.load_from_checkpoint(start=start, end=end, checkpoint=checkpoint))
+
+    def _load_with_retry(
+        self,
+        start: SecondsSinceUnixEpoch,
+        end: SecondsSinceUnixEpoch,
+        checkpoint: JiraCheckpoint,
+    ) -> Generator[Document | ConnectorFailure, None, JiraCheckpoint]:
+        if not self.jira_client:
+            raise ConnectorMissingCredentialError("Jira")
+
+        attempt_start = start
+        retried_with_buffer = False
+        attempt = 0
+
+        while True:
+            attempt += 1
+            jql = self._build_jql(attempt_start, end)
+            logger.info(f"[Jira] Executing Jira JQL attempt {attempt} (start={attempt_start}, end={end}, buffered_retry={retried_with_buffer}): {jql}")
+            try:
+                return (yield from self._load_from_checkpoint_internal(jql, checkpoint, start_filter=start))
+            except Exception as exc:
+                if attempt_start is not None and not retried_with_buffer and is_atlassian_date_error(exc):
+                    attempt_start = attempt_start - ONE_HOUR
+                    retried_with_buffer = True
+                    logger.info(f"[Jira] Atlassian date error detected; retrying with start={attempt_start}.")
+                    continue
+                raise
+
+    def _handle_validation_error(self, exc: Exception) -> None:
+        status_code = getattr(exc, "status_code", None)
+        if status_code == 401:
+            raise InsufficientPermissionsError("Jira credential appears to be invalid or expired (HTTP 401).") from exc
+        if status_code == 403:
+            raise InsufficientPermissionsError("Jira token does not have permission to access the requested resources (HTTP 403).") from exc
+        if status_code == 404:
+            raise ConnectorValidationError("Jira resource not found (HTTP 404).") from exc
+        if status_code == 429:
+            raise ConnectorValidationError("Jira rate limit exceeded during validation (HTTP 429).") from exc
+
+        message = getattr(exc, "text", str(exc))
+        if not message:
+            raise UnexpectedValidationError("Unexpected Jira validation error.") from exc
+
+        raise ConnectorValidationError(f"Jira validation failed: {message}") from exc
+
+    def _load_from_checkpoint_internal(
+        self,
+        jql: str,
+        checkpoint: JiraCheckpoint,
+        start_filter: SecondsSinceUnixEpoch | None = None,
+    ) -> Generator[Document | ConnectorFailure, None, JiraCheckpoint]:
+        assert self.jira_client, "load_credentials must be called before loading issues."
+
+        page_size = self._full_page_size()
+        new_checkpoint = copy.deepcopy(checkpoint)
+        starting_offset = new_checkpoint.start_at or 0
+        current_offset = starting_offset
+        checkpoint_callback = self._make_checkpoint_callback(new_checkpoint)
+
+        issue_iter = self._perform_jql_search(
+            jql=jql,
+            start=current_offset,
+            max_results=page_size,
+            fields=self._fields_param,
+            all_issue_ids=new_checkpoint.all_issue_ids,
+            checkpoint_callback=checkpoint_callback,
+            next_page_token=new_checkpoint.cursor,
+            ids_done=new_checkpoint.ids_done,
+        )
+
+        start_cutoff = float(start_filter) if start_filter is not None else None
+
+        for issue in issue_iter:
+            current_offset += 1
+            issue_key = getattr(issue, "key", "unknown")
+            if should_skip_issue(issue, self.labels_to_skip):
+                continue
+
+            issue_updated = parse_jira_datetime(issue.raw.get("fields", {}).get("updated"))
+            if start_cutoff is not None and issue_updated is not None and issue_updated.timestamp() <= start_cutoff:
+                # Jira JQL only supports minute precision, so we discard already-processed
+                # issues here based on the original second-level cutoff.
+                continue
+
+            try:
+                document = self._issue_to_document(issue)
+            except Exception as exc:  # pragma: no cover - defensive
+                logger.exception(f"[Jira] Failed to convert Jira issue {issue_key}: {exc}")
+                yield ConnectorFailure(
+                    failure_message=f"Failed to convert Jira issue {issue_key}: {exc}",
+                    failed_document=DocumentFailure(
+                        document_id=issue_key,
+                        document_link=build_issue_url(self.jira_base_url, issue_key),
+                    ),
+                    exception=exc,
+                )
+                continue
+
+            if document is not None:
+                yield document
+                if self.include_attachments:
+                    for attachment_document in self._attachment_documents(issue):
+                        if attachment_document is not None:
+                            yield attachment_document
+
+        self._update_checkpoint_for_next_run(
+            checkpoint=new_checkpoint,
+            current_offset=current_offset,
+            starting_offset=starting_offset,
+            page_size=page_size,
+        )
+        new_checkpoint.start_at = current_offset
+        return new_checkpoint
+
+    def build_dummy_checkpoint(self) -> JiraCheckpoint:
+        """Create an empty checkpoint used to kick off ingestion."""
+        return JiraCheckpoint(has_more=True, start_at=0)
+
+    def validate_checkpoint_json(self, checkpoint_json: str) -> JiraCheckpoint:
+        """Validate a serialized checkpoint."""
+        return JiraCheckpoint.model_validate_json(checkpoint_json)
+
+    # -------------------------------------------------------------------------
+    # Slim connector implementation
+    # -------------------------------------------------------------------------
+
+    def retrieve_all_slim_docs_perm_sync(
+        self,
+        start: SecondsSinceUnixEpoch | None = None,
+        end: SecondsSinceUnixEpoch | None = None,
+        callback: Any = None,  # noqa: ARG002 - maintained for interface compatibility
+    ) -> Generator[list[SlimDocument], None, None]:
+        """Return lightweight references to Jira issues (used for permission syncing)."""
+        if not self.jira_client:
+            raise ConnectorMissingCredentialError("Jira")
+
+        start_ts = start if start is not None else 0
+        end_ts = end if end is not None else datetime.now(timezone.utc).timestamp()
+        jql = self._build_jql(start_ts, end_ts)
+
+        checkpoint = self.build_dummy_checkpoint()
+        checkpoint_callback = self._make_checkpoint_callback(checkpoint)
+        prev_offset = 0
+        current_offset = 0
+        slim_batch: list[SlimDocument] = []
+
+        while checkpoint.has_more:
+            for issue in self._perform_jql_search(
+                jql=jql,
+                start=current_offset,
+                max_results=_JIRA_SLIM_PAGE_SIZE,
+                fields=self._slim_fields,
+                all_issue_ids=checkpoint.all_issue_ids,
+                checkpoint_callback=checkpoint_callback,
+                next_page_token=checkpoint.cursor,
+                ids_done=checkpoint.ids_done,
+            ):
+                current_offset += 1
+                if should_skip_issue(issue, self.labels_to_skip):
+                    continue
+
+                doc_id = build_issue_url(self.jira_base_url, issue.key)
+                slim_batch.append(SlimDocument(id=doc_id))
+
+                if len(slim_batch) >= _JIRA_SLIM_PAGE_SIZE:
+                    yield slim_batch
+                    slim_batch = []
+
+            self._update_checkpoint_for_next_run(
+                checkpoint=checkpoint,
+                current_offset=current_offset,
+                starting_offset=prev_offset,
+                page_size=_JIRA_SLIM_PAGE_SIZE,
+            )
+            prev_offset = current_offset
+
+        if slim_batch:
+            yield slim_batch
+
+    # -------------------------------------------------------------------------
+    # Internal helpers
+    # -------------------------------------------------------------------------
+
+    def _build_jql(self, start: SecondsSinceUnixEpoch, end: SecondsSinceUnixEpoch) -> str:
+        clauses: list[str] = []
+        if self.jql_query:
+            clauses.append(f"({self.jql_query})")
+        elif self.project_key:
+            clauses.append(f'project = "{self.project_key}"')
+        else:
+            raise ConnectorValidationError("Either project_key or jql_query must be provided for Jira connector.")
+
+        if self.labels_to_skip:
+            labels = ", ".join(f'"{label}"' for label in self.labels_to_skip)
+            clauses.append(f"labels NOT IN ({labels})")
+
+        if start is not None:
+            clauses.append(f'updated >= "{self._format_jql_time(start)}"')
+        if end is not None:
+            clauses.append(f'updated <= "{self._format_jql_time(end)}"')
+
+        if not clauses:
+            raise ConnectorValidationError("Unable to build Jira JQL query.")
+
+        jql = " AND ".join(clauses)
+        if "order by" not in jql.lower():
+            jql = f"{jql} ORDER BY updated ASC"
+        return jql
+
+    def _format_jql_time(self, timestamp: SecondsSinceUnixEpoch) -> str:
+        dt_utc = datetime.fromtimestamp(float(timestamp), tz=timezone.utc)
+        dt_local = dt_utc.astimezone(self.timezone)
+        # Jira only accepts minute-precision timestamps in JQL, so we format accordingly
+        # and rely on a post-query second-level filter to avoid duplicates.
+        return dt_local.strftime("%Y-%m-%d %H:%M")
+
+    def _issue_to_document(self, issue: Issue) -> Document | None:
+        fields = issue.raw.get("fields", {})
+        summary = fields.get("summary") or ""
+        description_text = extract_body_text(fields.get("description"))
+        comments_text = (
+            format_comments(
+                fields.get("comment"),
+                blacklist=self.comment_email_blacklist,
+            )
+            if self.include_comments
+            else ""
+        )
+        attachments_text = format_attachments(fields.get("attachment"))
+
+        reporter_name, reporter_email = extract_user(fields.get("reporter"))
+        assignee_name, assignee_email = extract_user(fields.get("assignee"))
+        status = extract_named_value(fields.get("status"))
+        priority = extract_named_value(fields.get("priority"))
+        issue_type = extract_named_value(fields.get("issuetype"))
+        project = fields.get("project") or {}
+
+        issue_url = build_issue_url(self.jira_base_url, issue.key)
+
+        metadata_lines = [
+            f"key: {issue.key}",
+            f"url: {issue_url}",
+            f"summary: {summary}",
+            f"status: {status or 'Unknown'}",
+            f"priority: {priority or 'Unspecified'}",
+            f"issue_type: {issue_type or 'Unknown'}",
+            f"project: {project.get('name') or ''}",
+            f"project_key: {project.get('key') or self.project_key or ''}",
+        ]
+
+        if reporter_name:
+            metadata_lines.append(f"reporter: {reporter_name}")
+        if reporter_email:
+            metadata_lines.append(f"reporter_email: {reporter_email}")
+        if assignee_name:
+            metadata_lines.append(f"assignee: {assignee_name}")
+        if assignee_email:
+            metadata_lines.append(f"assignee_email: {assignee_email}")
+        if fields.get("labels"):
+            metadata_lines.append(f"labels: {', '.join(fields.get('labels'))}")
+
+        created_dt = parse_jira_datetime(fields.get("created"))
+        updated_dt = parse_jira_datetime(fields.get("updated")) or created_dt or datetime.now(timezone.utc)
+        metadata_lines.append(f"created: {created_dt.isoformat() if created_dt else ''}")
+        metadata_lines.append(f"updated: {updated_dt.isoformat() if updated_dt else ''}")
+
+        sections: list[str] = [
+            "---",
+            "\n".join(filter(None, metadata_lines)),
+            "---",
+            "",
+            "## Description",
+            description_text or "No description provided.",
+        ]
+
+        if comments_text:
+            sections.extend(["", "## Comments", comments_text])
+        if attachments_text:
+            sections.extend(["", "## Attachments", attachments_text])
+
+        blob_text = "\n".join(sections).strip() + "\n"
+        blob = blob_text.encode("utf-8")
+
+        if len(blob) > self.max_ticket_size:
+            logger.info(f"[Jira] Skipping {issue.key} because it exceeds the maximum size of {self.max_ticket_size} bytes.")
+            return None
+
+        semantic_identifier = f"{issue.key}: {summary}" if summary else issue.key
+
+        return Document(
+            id=issue_url,
+            source=DocumentSource.JIRA,
+            semantic_identifier=semantic_identifier,
+            extension=".md",
+            blob=blob,
+            doc_updated_at=updated_dt,
+            size_bytes=len(blob),
+        )
+
+    def _attachment_documents(self, issue: Issue) -> Iterable[Document]:
+        attachments = issue.raw.get("fields", {}).get("attachment") or []
+        for attachment in attachments:
+            try:
+                document = self._attachment_to_document(issue, attachment)
+                if document is not None:
+                    yield document
+            except Exception as exc:  # pragma: no cover - defensive
+                failed_id = attachment.get("id") or attachment.get("filename")
+                issue_key = getattr(issue, "key", "unknown")
+                logger.warning(f"[Jira] Failed to process attachment {failed_id} for issue {issue_key}: {exc}")
+
+    def _attachment_to_document(self, issue: Issue, attachment: dict[str, Any]) -> Document | None:
+        if not self.include_attachments:
+            return None
+
+        filename = attachment.get("filename")
+        content_url = attachment.get("content")
+        if not filename or not content_url:
+            return None
+
+        try:
+            attachment_size = int(attachment.get("size", 0))
+        except (TypeError, ValueError):
+            attachment_size = 0
+        if attachment_size and attachment_size > self.attachment_size_limit:
+            logger.info(f"[Jira] Skipping attachment {filename} on {issue.key} because reported size exceeds limit ({self.attachment_size_limit} bytes).")
+            return None
+
+        blob = self._download_attachment(content_url)
+        if blob is None:
+            return None
+
+        if len(blob) > self.attachment_size_limit:
+            logger.info(f"[Jira] Skipping attachment {filename} on {issue.key} because it exceeds the size limit ({self.attachment_size_limit} bytes).")
+            return None
+
+        attachment_time = parse_jira_datetime(attachment.get("created")) or parse_jira_datetime(attachment.get("updated"))
+        updated_dt = attachment_time or parse_jira_datetime(issue.raw.get("fields", {}).get("updated")) or datetime.now(timezone.utc)
+
+        extension = os.path.splitext(filename)[1] or ""
+        document_id = f"{issue.key}::attachment::{attachment.get('id') or filename}"
+        semantic_identifier = f"{issue.key} attachment: {filename}"
+
+        return Document(
+            id=document_id,
+            source=DocumentSource.JIRA,
+            semantic_identifier=semantic_identifier,
+            extension=extension,
+            blob=blob,
+            doc_updated_at=updated_dt,
+            size_bytes=len(blob),
+        )
+
+    def _download_attachment(self, url: str) -> bytes | None:
+        if not self.jira_client:
+            raise ConnectorMissingCredentialError("Jira")
+        response = self.jira_client._session.get(url)
+        response.raise_for_status()
+        return response.content
+
+    def _sync_timezone_from_server(self) -> None:
+        if self._timezone_overridden or not self.jira_client:
+            return
+        try:
+            server_info = self.jira_client.server_info()
+        except Exception as exc:  # pragma: no cover - defensive
+            logger.info(f"[Jira] Unable to determine timezone from server info; continuing with offset {self.timezone_offset}. Error: {exc}")
+            return
+
+        detected_offset = self._extract_timezone_offset(server_info)
+        if detected_offset is None or detected_offset == self.timezone_offset:
+            return
+
+        self.timezone_offset = detected_offset
+        self.timezone = timezone(offset=timedelta(hours=detected_offset))
+        logger.info(f"[Jira] Timezone offset adjusted to {detected_offset} hours using Jira server info.")
+
+    def _extract_timezone_offset(self, server_info: dict[str, Any]) -> float | None:
+        server_time_raw = server_info.get("serverTime")
+        if isinstance(server_time_raw, str):
+            offset = self._parse_offset_from_datetime_string(server_time_raw)
+            if offset is not None:
+                return offset
+
+        tz_name = server_info.get("timeZone")
+        if isinstance(tz_name, str):
+            offset = self._offset_from_zone_name(tz_name)
+            if offset is not None:
+                return offset
+        return None
+
+    @staticmethod
+    def _parse_offset_from_datetime_string(value: str) -> float | None:
+        normalized = JiraConnector._normalize_datetime_string(value)
+        try:
+            dt = datetime.fromisoformat(normalized)
+        except ValueError:
+            return None
+
+        if dt.tzinfo is None:
+            return 0.0
+
+        offset = dt.tzinfo.utcoffset(dt)
+        if offset is None:
+            return None
+        return offset.total_seconds() / 3600.0
+
+    @staticmethod
+    def _normalize_datetime_string(value: str) -> str:
+        trimmed = (value or "").strip()
+        if trimmed.endswith("Z"):
+            return f"{trimmed[:-1]}+00:00"
+
+        match = _TZ_OFFSET_PATTERN.search(trimmed)
+        if match and match.group(3) != ":":
+            sign, hours, _, minutes = match.groups()
+            trimmed = f"{trimmed[: match.start()]}{sign}{hours}:{minutes}"
+        return trimmed
+
+    @staticmethod
+    def _offset_from_zone_name(name: str) -> float | None:
+        try:
+            tz = ZoneInfo(name)
+        except (ZoneInfoNotFoundError, ValueError):
+            return None
+        reference = datetime.now(tz)
+        offset = reference.utcoffset()
+        if offset is None:
+            return None
+        return offset.total_seconds() / 3600.0
+
+    def _is_cloud_client(self) -> bool:
+        if not self.jira_client:
+            return False
+        rest_version = str(self.jira_client._options.get("rest_api_version", "")).strip()
+        return rest_version == str(JIRA_CLOUD_API_VERSION)
+
+    def _full_page_size(self) -> int:
+        return max(1, min(self.batch_size, _JIRA_FULL_PAGE_SIZE))
+
+    def _perform_jql_search(
+        self,
+        *,
+        jql: str,
+        start: int,
+        max_results: int,
+        fields: str | None = None,
+        all_issue_ids: list[list[str]] | None = None,
+        checkpoint_callback: Callable[[Iterable[list[str]], str | None], None] | None = None,
+        next_page_token: str | None = None,
+        ids_done: bool = False,
+    ) -> Iterable[Issue]:
+        assert self.jira_client, "Jira client not initialized."
+        is_cloud = self._is_cloud_client()
+        if is_cloud:
+            if all_issue_ids is None:
+                raise ValueError("all_issue_ids is required for Jira Cloud searches.")
+            yield from self._perform_jql_search_v3(
+                jql=jql,
+                max_results=max_results,
+                fields=fields,
+                all_issue_ids=all_issue_ids,
+                checkpoint_callback=checkpoint_callback,
+                next_page_token=next_page_token,
+                ids_done=ids_done,
+            )
+        else:
+            yield from self._perform_jql_search_v2(
+                jql=jql,
+                start=start,
+                max_results=max_results,
+                fields=fields,
+            )
+
+    def _perform_jql_search_v3(
+        self,
+        *,
+        jql: str,
+        max_results: int,
+        all_issue_ids: list[list[str]],
+        fields: str | None = None,
+        checkpoint_callback: Callable[[Iterable[list[str]], str | None], None] | None = None,
+        next_page_token: str | None = None,
+        ids_done: bool = False,
+    ) -> Iterable[Issue]:
+        assert self.jira_client, "Jira client not initialized."
+
+        if not ids_done:
+            new_ids, page_token = self._enhanced_search_ids(jql, next_page_token)
+            if checkpoint_callback is not None and new_ids:
+                checkpoint_callback(
+                    self._chunk_issue_ids(new_ids, max_results),
+                    page_token,
+                )
+            elif checkpoint_callback is not None:
+                checkpoint_callback([], page_token)
+
+        if all_issue_ids:
+            issue_ids = all_issue_ids.pop()
+            if issue_ids:
+                yield from self._bulk_fetch_issues(issue_ids, fields)
+
+    def _perform_jql_search_v2(
+        self,
+        *,
+        jql: str,
+        start: int,
+        max_results: int,
+        fields: str | None = None,
+    ) -> Iterable[Issue]:
+        assert self.jira_client, "Jira client not initialized."
+
+        issues = self.jira_client.search_issues(
+            jql_str=jql,
+            startAt=start,
+            maxResults=max_results,
+            fields=fields or self._fields_param,
+            expand="renderedFields",
+        )
+        for issue in issues:
+            yield issue
+
+    def _enhanced_search_ids(
+        self,
+        jql: str,
+        next_page_token: str | None,
+    ) -> tuple[list[str], str | None]:
+        assert self.jira_client, "Jira client not initialized."
+        enhanced_search_path = self.jira_client._get_url("search/jql")
+        params: dict[str, str | int | None] = {
+            "jql": jql,
+            "maxResults": _MAX_RESULTS_FETCH_IDS,
+            "nextPageToken": next_page_token,
+            "fields": "id",
+        }
+        response = self.jira_client._session.get(enhanced_search_path, params=params)
+        response.raise_for_status()
+        data = response.json()
+        return [str(issue["id"]) for issue in data.get("issues", [])], data.get("nextPageToken")
+
+    def _bulk_fetch_issues(
+        self,
+        issue_ids: list[str],
+        fields: str | None,
+    ) -> Iterable[Issue]:
+        assert self.jira_client, "Jira client not initialized."
+        if not issue_ids:
+            return []
+
+        bulk_fetch_path = self.jira_client._get_url("issue/bulkfetch")
+        payload: dict[str, Any] = {"issueIdsOrKeys": issue_ids}
+        payload["fields"] = fields.split(",") if fields else ["*all"]
+
+        response = self.jira_client._session.post(bulk_fetch_path, json=payload)
+        response.raise_for_status()
+        data = response.json()
+        return [Issue(self.jira_client._options, self.jira_client._session, raw=issue) for issue in data.get("issues", [])]
+
+    @staticmethod
+    def _chunk_issue_ids(issue_ids: list[str], chunk_size: int) -> Iterable[list[str]]:
+        if chunk_size <= 0:
+            chunk_size = _JIRA_FULL_PAGE_SIZE
+
+        for idx in range(0, len(issue_ids), chunk_size):
+            yield issue_ids[idx : idx + chunk_size]
+
+    def _make_checkpoint_callback(self, checkpoint: JiraCheckpoint) -> Callable[[Iterable[list[str]], str | None], None]:
+        def checkpoint_callback(
+            issue_ids: Iterable[list[str]] | list[list[str]],
+            page_token: str | None,
+        ) -> None:
+            for id_batch in issue_ids:
+                checkpoint.all_issue_ids.append(list(id_batch))
+            checkpoint.cursor = page_token
+            checkpoint.ids_done = page_token is None
+
+        return checkpoint_callback
+
+    def _update_checkpoint_for_next_run(
+        self,
+        *,
+        checkpoint: JiraCheckpoint,
+        current_offset: int,
+        starting_offset: int,
+        page_size: int,
+    ) -> None:
+        if self._is_cloud_client():
+            checkpoint.has_more = bool(checkpoint.all_issue_ids) or not checkpoint.ids_done
+        else:
+            checkpoint.has_more = current_offset - starting_offset == page_size
+            checkpoint.cursor = None
+            checkpoint.ids_done = True
+            checkpoint.all_issue_ids = []
+
+
+def iterate_jira_documents(
+    connector: "JiraConnector",
+    start: SecondsSinceUnixEpoch,
+    end: SecondsSinceUnixEpoch,
+    iteration_limit: int = 100_000,
+) -> Iterator[Document]:
+    """Yield documents without materializing the entire result set."""
+
+    checkpoint = connector.build_dummy_checkpoint()
+    iterations = 0
+
+    while checkpoint.has_more:
+        wrapper = CheckpointOutputWrapper[JiraCheckpoint]()
+        generator = wrapper(connector.load_from_checkpoint(start=start, end=end, checkpoint=checkpoint))
+
+        for document, failure, next_checkpoint in generator:
+            if failure is not None:
+                failure_message = getattr(failure, "failure_message", str(failure))
+                raise RuntimeError(f"Failed to load Jira documents: {failure_message}")
+            if document is not None:
+                yield document
+            if next_checkpoint is not None:
+                checkpoint = next_checkpoint
+
+        iterations += 1
+        if iterations > iteration_limit:
+            raise RuntimeError("Too many iterations while loading Jira documents.")
+
+
+def test_jira(
+    *,
+    base_url: str,
+    project_key: str | None = None,
+    jql_query: str | None = None,
+    credentials: dict[str, Any],
+    batch_size: int = INDEX_BATCH_SIZE,
+    start_ts: float | None = None,
+    end_ts: float | None = None,
+    connector_options: dict[str, Any] | None = None,
+) -> list[Document]:
+    """Programmatic entry point that mirrors the CLI workflow."""
+
+    connector_kwargs = connector_options.copy() if connector_options else {}
+    connector = JiraConnector(
+        jira_base_url=base_url,
+        project_key=project_key,
+        jql_query=jql_query,
+        batch_size=batch_size,
+        **connector_kwargs,
+    )
+    connector.load_credentials(credentials)
+    connector.validate_connector_settings()
+
+    now_ts = datetime.now(timezone.utc).timestamp()
+    start = start_ts if start_ts is not None else 0.0
+    end = end_ts if end_ts is not None else now_ts
+
+    documents = list(iterate_jira_documents(connector, start=start, end=end))
+    logger.info(f"[Jira] Fetched {len(documents)} Jira documents.")
+    for doc in documents[:5]:
+        logger.info(f"[Jira] Document {doc.semantic_identifier} ({doc.id}) size={doc.size_bytes} bytes")
+    return documents
+
+
+def _build_arg_parser() -> argparse.ArgumentParser:
+    parser = argparse.ArgumentParser(description="Fetch Jira issues and print summary statistics.")
+    parser.add_argument("--base-url", dest="base_url", default=os.environ.get("JIRA_BASE_URL"))
+    parser.add_argument("--project", dest="project_key", default=os.environ.get("JIRA_PROJECT_KEY"))
+    parser.add_argument("--jql", dest="jql_query", default=os.environ.get("JIRA_JQL"))
+    parser.add_argument("--email", dest="user_email", default=os.environ.get("JIRA_USER_EMAIL"))
+    parser.add_argument("--token", dest="api_token", default=os.environ.get("JIRA_API_TOKEN"))
+    parser.add_argument("--password", dest="password", default=os.environ.get("JIRA_PASSWORD"))
+    parser.add_argument("--batch-size", dest="batch_size", type=int, default=int(os.environ.get("JIRA_BATCH_SIZE", INDEX_BATCH_SIZE)))
+    parser.add_argument("--include_comments", dest="include_comments", type=bool, default=True)
+    parser.add_argument("--include_attachments", dest="include_attachments", type=bool, default=True)
+    parser.add_argument("--attachment_size_limit", dest="attachment_size_limit", type=float, default=_DEFAULT_ATTACHMENT_SIZE_LIMIT)
+    parser.add_argument("--start-ts", dest="start_ts", type=float, default=None, help="Epoch seconds inclusive lower bound for updated issues.")
+    parser.add_argument("--end-ts", dest="end_ts", type=float, default=9999999999, help="Epoch seconds inclusive upper bound for updated issues.")
+    return parser
+
+
+def main(config: dict[str, Any] | None = None) -> None:
+    if config is None:
+        args = _build_arg_parser().parse_args()
+        config = {
+            "base_url": args.base_url,
+            "project_key": args.project_key,
+            "jql_query": args.jql_query,
+            "batch_size": args.batch_size,
+            "start_ts": args.start_ts,
+            "end_ts": args.end_ts,
+            "include_comments": args.include_comments,
+            "include_attachments": args.include_attachments,
+            "attachment_size_limit": args.attachment_size_limit,
+            "credentials": {
+                "jira_user_email": args.user_email,
+                "jira_api_token": args.api_token,
+                "jira_password": args.password,
+            },
+        }
+
+    base_url = config.get("base_url")
+    credentials = config.get("credentials", {})
+
+    print(f"[Jira] {config=}", flush=True)
+    print(f"[Jira] {credentials=}", flush=True)
+
+    if not base_url:
+        raise RuntimeError("Jira base URL must be provided via config or CLI arguments.")
+    if not (credentials.get("jira_api_token") or (credentials.get("jira_user_email") and credentials.get("jira_password"))):
+        raise RuntimeError("Provide either an API token or both email/password for Jira authentication.")
+
+    connector_options = {
+        key: value
+        for key, value in (
+            ("include_comments", config.get("include_comments")),
+            ("include_attachments", config.get("include_attachments")),
+            ("attachment_size_limit", config.get("attachment_size_limit")),
+            ("labels_to_skip", config.get("labels_to_skip")),
+            ("comment_email_blacklist", config.get("comment_email_blacklist")),
+            ("scoped_token", config.get("scoped_token")),
+            ("timezone_offset", config.get("timezone_offset")),
+        )
+        if value is not None
+    }
+
+    documents = test_jira(
+        base_url=base_url,
+        project_key=config.get("project_key"),
+        jql_query=config.get("jql_query"),
+        credentials=credentials,
+        batch_size=config.get("batch_size", INDEX_BATCH_SIZE),
+        start_ts=config.get("start_ts"),
+        end_ts=config.get("end_ts"),
+        connector_options=connector_options,
+    )
+
+    preview_count = min(len(documents), 5)
+    for idx in range(preview_count):
+        doc = documents[idx]
+        print(f"[Jira] [Sample {idx + 1}] {doc.semantic_identifier} | id={doc.id} | size={doc.size_bytes} bytes")
+
+    print(f"[Jira] Jira connector test completed. Documents fetched: {len(documents)}")
+
+
+if __name__ == "__main__":  # pragma: no cover - manual execution path
+    logging.basicConfig(level=logging.DEBUG, format="%(asctime)s %(levelname)s %(name)s %(message)s")
+    main()
--- a/common/data_source/jira/utils.py
+++ b/common/data_source/jira/utils.py
@ -0,0 +1,149 @@
+"""Helper utilities for the Jira connector."""
+
+from __future__ import annotations
+
+import os
+from collections.abc import Collection
+from datetime import datetime, timezone
+from typing import Any, Iterable
+
+from jira.resources import Issue
+
+from common.data_source.utils import datetime_from_string
+
+JIRA_SERVER_API_VERSION = os.environ.get("JIRA_SERVER_API_VERSION", "2")
+JIRA_CLOUD_API_VERSION = os.environ.get("JIRA_CLOUD_API_VERSION", "3")
+
+
+def build_issue_url(base_url: str, issue_key: str) -> str:
+    """Return the canonical UI URL for a Jira issue."""
+    return f"{base_url.rstrip('/')}/browse/{issue_key}"
+
+
+def parse_jira_datetime(value: Any) -> datetime | None:
+    """Best-effort parse of Jira datetime values to aware UTC datetimes."""
+    if value is None:
+        return None
+    if isinstance(value, datetime):
+        return value.astimezone(timezone.utc) if value.tzinfo else value.replace(tzinfo=timezone.utc)
+    if isinstance(value, str):
+        return datetime_from_string(value)
+    return None
+
+
+def extract_named_value(value: Any) -> str | None:
+    """Extract a readable string out of Jira's typed objects."""
+    if value is None:
+        return None
+    if isinstance(value, str):
+        return value
+    if isinstance(value, dict):
+        return value.get("name") or value.get("value")
+    return getattr(value, "name", None)
+
+
+def extract_user(value: Any) -> tuple[str | None, str | None]:
+    """Return display name + email tuple for a Jira user blob."""
+    if value is None:
+        return None, None
+    if isinstance(value, dict):
+        return value.get("displayName"), value.get("emailAddress")
+
+    display = getattr(value, "displayName", None)
+    email = getattr(value, "emailAddress", None)
+    return display, email
+
+
+def extract_text_from_adf(adf: Any) -> str:
+    """Flatten Atlassian Document Format (ADF) structures to text."""
+    texts: list[str] = []
+
+    def _walk(node: Any) -> None:
+        if node is None:
+            return
+        if isinstance(node, dict):
+            node_type = node.get("type")
+            if node_type == "text":
+                texts.append(node.get("text", ""))
+            for child in node.get("content", []):
+                _walk(child)
+        elif isinstance(node, list):
+            for child in node:
+                _walk(child)
+
+    _walk(adf)
+    return "\n".join(part for part in texts if part)
+
+
+def extract_body_text(value: Any) -> str:
+    """Normalize Jira description/comments (raw/adf/str) into plain text."""
+    if value is None:
+        return ""
+    if isinstance(value, str):
+        return value.strip()
+    if isinstance(value, dict):
+        return extract_text_from_adf(value).strip()
+    return str(value).strip()
+
+
+def format_comments(
+    comment_block: Any,
+    *,
+    blacklist: Collection[str],
+) -> str:
+    """Convert Jira comments into a markdown-ish bullet list."""
+    if not isinstance(comment_block, dict):
+        return ""
+
+    comments = comment_block.get("comments") or []
+    lines: list[str] = []
+    normalized_blacklist = {email.lower() for email in blacklist if email}
+
+    for comment in comments:
+        author = comment.get("author") or {}
+        author_email = (author.get("emailAddress") or "").lower()
+        if author_email and author_email in normalized_blacklist:
+            continue
+
+        author_name = author.get("displayName") or author.get("name") or author_email or "Unknown"
+        created = parse_jira_datetime(comment.get("created"))
+        created_str = created.isoformat() if created else "Unknown time"
+        body = extract_body_text(comment.get("body"))
+        if not body:
+            continue
+
+        lines.append(f"- {author_name} ({created_str}):\n{body}")
+
+    return "\n\n".join(lines)
+
+
+def format_attachments(attachments: Any) -> str:
+    """List Jira attachments as bullet points."""
+    if not isinstance(attachments, list):
+        return ""
+
+    attachment_lines: list[str] = []
+    for attachment in attachments:
+        filename = attachment.get("filename")
+        if not filename:
+            continue
+        size = attachment.get("size")
+        size_text = f" ({size} bytes)" if isinstance(size, int) else ""
+        content_url = attachment.get("content") or ""
+        url_suffix = f" -> {content_url}" if content_url else ""
+        attachment_lines.append(f"- {filename}{size_text}{url_suffix}")
+
+    return "\n".join(attachment_lines)
+
+
+def should_skip_issue(issue: Issue, labels_to_skip: set[str]) -> bool:
+    """Return True if the issue contains any label from the skip list."""
+    if not labels_to_skip:
+        return False
+
+    fields = getattr(issue, "raw", {}).get("fields", {})
+    labels: Iterable[str] = fields.get("labels") or []
+    for label in labels:
+        if (label or "").lower() in labels_to_skip:
+            return True
+    return False
--- a/common/data_source/jira_connector.py
+++ b/common/data_source/jira_connector.py
@ -1,112 +0,0 @@
-"""Jira connector"""
-
-from typing import Any
-
-from jira import JIRA
-
-from common.data_source.config import INDEX_BATCH_SIZE
-from common.data_source.exceptions import (
-    ConnectorValidationError,
-    InsufficientPermissionsError,
-    UnexpectedValidationError, ConnectorMissingCredentialError
-)
-from common.data_source.interfaces import (
-    CheckpointedConnectorWithPermSync,
-    SecondsSinceUnixEpoch,
-    SlimConnectorWithPermSync
-)
-from common.data_source.models import (
-    ConnectorCheckpoint
-)
-
-
-class JiraConnector(CheckpointedConnectorWithPermSync, SlimConnectorWithPermSync):
-    """Jira connector for accessing Jira issues and projects"""
-
-    def __init__(self, batch_size: int = INDEX_BATCH_SIZE) -> None:
-        self.batch_size = batch_size
-        self.jira_client: JIRA | None = None
-
-    def load_credentials(self, credentials: dict[str, Any]) -> dict[str, Any] | None:
-        """Load Jira credentials"""
-        try:
-            url = credentials.get("url")
-            username = credentials.get("username")
-            password = credentials.get("password")
-            token = credentials.get("token")
-            
-            if not url:
-                raise ConnectorMissingCredentialError("Jira URL is required")
-            
-            if token:
-                # API token authentication
-                self.jira_client = JIRA(server=url, token_auth=token)
-            elif username and password:
-                # Basic authentication
-                self.jira_client = JIRA(server=url, basic_auth=(username, password))
-            else:
-                raise ConnectorMissingCredentialError("Jira credentials are incomplete")
-            
-            return None
-        except Exception as e:
-            raise ConnectorMissingCredentialError(f"Jira: {e}")
-
-    def validate_connector_settings(self) -> None:
-        """Validate Jira connector settings"""
-        if not self.jira_client:
-            raise ConnectorMissingCredentialError("Jira")
-        
-        try:
-            # Test connection by getting server info
-            self.jira_client.server_info()
-        except Exception as e:
-            if "401" in str(e) or "403" in str(e):
-                raise InsufficientPermissionsError("Invalid credentials or insufficient permissions")
-            elif "404" in str(e):
-                raise ConnectorValidationError("Jira instance not found")
-            else:
-                raise UnexpectedValidationError(f"Jira validation error: {e}")
-
-    def poll_source(self, start: SecondsSinceUnixEpoch, end: SecondsSinceUnixEpoch) -> Any:
-        """Poll Jira for recent issues"""
-        # Simplified implementation - in production this would handle actual polling
-        return []
-
-    def load_from_checkpoint(
-        self,
-        start: SecondsSinceUnixEpoch,
-        end: SecondsSinceUnixEpoch,
-        checkpoint: ConnectorCheckpoint,
-    ) -> Any:
-        """Load documents from checkpoint"""
-        # Simplified implementation
-        return []
-
-    def load_from_checkpoint_with_perm_sync(
-        self,
-        start: SecondsSinceUnixEpoch,
-        end: SecondsSinceUnixEpoch,
-        checkpoint: ConnectorCheckpoint,
-    ) -> Any:
-        """Load documents from checkpoint with permission sync"""
-        # Simplified implementation
-        return []
-
-    def build_dummy_checkpoint(self) -> ConnectorCheckpoint:
-        """Build dummy checkpoint"""
-        return ConnectorCheckpoint()
-
-    def validate_checkpoint_json(self, checkpoint_json: str) -> ConnectorCheckpoint:
-        """Validate checkpoint JSON"""
-        # Simplified implementation
-        return ConnectorCheckpoint()
-
-    def retrieve_all_slim_docs_perm_sync(
-        self,
-        start: SecondsSinceUnixEpoch | None = None,
-        end: SecondsSinceUnixEpoch | None = None,
-        callback: Any = None,
-    ) -> Any:
-        """Retrieve all simplified documents with permission sync"""
-        # Simplified implementation
-        return []
--- a/common/data_source/utils.py
+++ b/common/data_source/utils.py
@ -48,17 +48,35 @@ from common.data_source.exceptions import RateLimitTriedTooManyTimesError
 from common.data_source.interfaces import CT, CheckpointedConnector, CheckpointOutputWrapper, ConfluenceUser, LoadFunction, OnyxExtensionType, SecondsSinceUnixEpoch, TokenResponse
 from common.data_source.models import BasicExpertInfo, Document

+_TZ_SUFFIX_PATTERN = re.compile(r"([+-])([\d:]+)$")
+

 def datetime_from_string(datetime_string: str) -> datetime:
    datetime_string = datetime_string.strip()

+    match_jira_format = _TZ_SUFFIX_PATTERN.search(datetime_string)
+    if match_jira_format:
+        sign, tz_field = match_jira_format.groups()
+        digits = tz_field.replace(":", "")
+
+        if digits.isdigit() and 1 <= len(digits) <= 4:
+            if len(digits) >= 3:
+                hours = digits[:-2].rjust(2, "0")
+                minutes = digits[-2:]
+            else:
+                hours = digits.rjust(2, "0")
+                minutes = "00"
+
+            normalized = f"{sign}{hours}:{minutes}"
+            datetime_string = f"{datetime_string[: match_jira_format.start()]}{normalized}"
+
    # Handle the case where the datetime string ends with 'Z' (Zulu time)
-    if datetime_string.endswith('Z'):
-        datetime_string = datetime_string[:-1] + '+00:00'
+    if datetime_string.endswith("Z"):
+        datetime_string = datetime_string[:-1] + "+00:00"

    # Handle timezone format "+0000" -> "+00:00"
-    if datetime_string.endswith('+0000'):
-        datetime_string = datetime_string[:-5] + '+00:00'
+    if datetime_string.endswith("+0000"):
+        datetime_string = datetime_string[:-5] + "+00:00"

    datetime_object = datetime.fromisoformat(datetime_string)

@ -480,7 +498,7 @@ def get_file_ext(file_name: str) -> str:


 def is_accepted_file_ext(file_ext: str, extension_type: OnyxExtensionType) -> bool:
-    image_extensions = {'.jpg', '.jpeg', '.png', '.gif', '.bmp', '.tiff', '.webp'}
+    image_extensions = {".jpg", ".jpeg", ".png", ".gif", ".bmp", ".tiff", ".webp"}
    text_extensions = {".txt", ".md", ".mdx", ".conf", ".log", ".json", ".csv", ".tsv", ".xml", ".yml", ".yaml", ".sql"}
    document_extensions = {".pdf", ".docx", ".pptx", ".xlsx", ".eml", ".epub", ".html"}

@ -902,6 +920,18 @@ def load_all_docs_from_checkpoint_connector(
    )


+_ATLASSIAN_CLOUD_DOMAINS = (".atlassian.net", ".jira.com", ".jira-dev.com")
+
+
+def is_atlassian_cloud_url(url: str) -> bool:
+    try:
+        host = urlparse(url).hostname or ""
+    except ValueError:
+        return False
+    host = host.lower()
+    return any(host.endswith(domain) for domain in _ATLASSIAN_CLOUD_DOMAINS)
+
+
 def get_cloudId(base_url: str) -> str:
    tenant_info_url = urljoin(base_url, "/_edge/tenant_info")
    response = requests.get(tenant_info_url, timeout=10)
--- a/common/log_utils.py
+++ b/common/log_utils.py
@ -80,4 +80,4 @@ def log_exception(e, *args):
            raise Exception(a.text)
        else:
            logging.error(str(a))
-    raise e
+    raise e
--- a/deepdoc/parser/mineru_parser.py
+++ b/deepdoc/parser/mineru_parser.py
@ -434,7 +434,7 @@ class MinerUParser(RAGFlowPdfParser):
                    if not section.strip():
                        section = "FAILED TO PARSE TABLE"
                case MinerUContentType.IMAGE:
-                    section = "".join(output["image_caption"]) + "\n" + "".join(output["image_footnote"])
+                    section = "".join(output.get(["image_caption"],[])) + "\n" + "".join(output.get(["image_footnote"],[]))
                case MinerUContentType.EQUATION:
                    section = output["text"]
                case MinerUContentType.CODE:
--- a/docs/guides/accessing_admin_ui.md
+++ b/docs/guides/accessing_admin_ui.md
@ -12,6 +12,10 @@ The RAGFlow Admin UI is a web-based interface that provides comprehensive system

 To access the RAGFlow admin UI, append `/admin` to the web UI's address, e.g. `http://[RAGFLOW_WEB_UI_ADDR]/admin`, replace `[RAGFLOW_WEB_UI_ADDR]` with real RAGFlow web UI address.

+### Default Credentials
+| Username | Password |
+|----------|----------|
+| admin@ragflow.io   | admin |

 ## Admin UI Overview

--- a/pyproject.toml
+++ b/pyproject.toml
@ -148,6 +148,7 @@ dependencies = [
    "markdownify>=1.2.0",
    "captcha>=0.7.1",
    "pip>=25.2",
+    "pypandoc>=1.16",
 ]

 [dependency-groups]
--- a/rag/svr/sync_data_source.py
+++ b/rag/svr/sync_data_source.py
@ -20,33 +20,40 @@


 import copy
+import faulthandler
+import logging
+import os
+import signal
 import sys
 import threading
 import time
 import traceback
+from datetime import datetime, timezone
 from typing import Any

+import trio
+
 from api.db.services.connector_service import ConnectorService, SyncLogsService
 from api.db.services.knowledgebase_service import KnowledgebaseService
-from common.log_utils import init_root_logger
-from common.config_utils import show_configs
-from common.data_source import BlobStorageConnector, NotionConnector, DiscordConnector, GoogleDriveConnector
-import logging
-import os
-from datetime import datetime, timezone
-import signal
-import trio
-import faulthandler
-from common.constants import FileSource, TaskStatus
 from common import settings
-from common.versions import get_ragflow_version
+from common.config_utils import show_configs
+from common.constants import FileSource, TaskStatus
+from common.data_source import (
+    BlobStorageConnector,
+    DiscordConnector,
+    GoogleDriveConnector,
+    JiraConnector,
+    NotionConnector,
+)
+from common.data_source.config import INDEX_BATCH_SIZE
 from common.data_source.confluence_connector import ConfluenceConnector
 from common.data_source.interfaces import CheckpointOutputWrapper
 from common.data_source.utils import load_all_docs_from_checkpoint_connector
-from common.data_source.config import INDEX_BATCH_SIZE
+from common.log_utils import init_root_logger
 from common.signal_utils import start_tracemalloc_and_snapshot, stop_tracemalloc
+from common.versions import get_ragflow_version

-MAX_CONCURRENT_TASKS = int(os.environ.get('MAX_CONCURRENT_TASKS', "5"))
+MAX_CONCURRENT_TASKS = int(os.environ.get("MAX_CONCURRENT_TASKS", "5"))
 task_limiter = trio.Semaphore(MAX_CONCURRENT_TASKS)


@ -72,31 +79,32 @@ class SyncBase:
                        min_update = min([doc.doc_updated_at for doc in document_batch])
                        max_update = max([doc.doc_updated_at for doc in document_batch])
                        next_update = max([next_update, max_update])
-                        docs = [{
-                            "id": doc.id,
-                            "connector_id": task["connector_id"],
-                            "source": self.SOURCE_NAME,
-                            "semantic_identifier": doc.semantic_identifier,
-                            "extension": doc.extension,
-                            "size_bytes": doc.size_bytes,
-                            "doc_updated_at": doc.doc_updated_at,
-                            "blob": doc.blob
-                        } for doc in document_batch]
+                        docs = [
+                            {
+                                "id": doc.id,
+                                "connector_id": task["connector_id"],
+                                "source": self.SOURCE_NAME,
+                                "semantic_identifier": doc.semantic_identifier,
+                                "extension": doc.extension,
+                                "size_bytes": doc.size_bytes,
+                                "doc_updated_at": doc.doc_updated_at,
+                                "blob": doc.blob,
+                            }
+                            for doc in document_batch
+                        ]

                        e, kb = KnowledgebaseService.get_by_id(task["kb_id"])
                        err, dids = SyncLogsService.duplicate_and_parse(kb, docs, task["tenant_id"], f"{self.SOURCE_NAME}/{task['connector_id']}", task["auto_parse"])
                        SyncLogsService.increase_docs(task["id"], min_update, max_update, len(docs), "\n".join(err), len(err))
                        doc_num += len(docs)

-                    logging.info("{} docs synchronized till {}".format(doc_num, next_update))
+                    prefix = "[Jira] " if self.SOURCE_NAME == FileSource.JIRA else ""
+                    logging.info(f"{prefix}{doc_num} docs synchronized till {next_update}")
                    SyncLogsService.done(task["id"], task["connector_id"])
                    task["poll_range_start"] = next_update

        except Exception as ex:
-            msg = '\n'.join([
-                ''.join(traceback.format_exception_only(None, ex)).strip(),
-                ''.join(traceback.format_exception(None, ex, ex.__traceback__)).strip()
-            ])
+            msg = "\n".join(["".join(traceback.format_exception_only(None, ex)).strip(), "".join(traceback.format_exception(None, ex, ex.__traceback__)).strip()])
            SyncLogsService.update_by_id(task["id"], {"status": TaskStatus.FAIL, "full_exception_trace": msg, "error_msg": str(ex)})

        SyncLogsService.schedule(task["connector_id"], task["kb_id"], task["poll_range_start"])
@ -109,21 +117,16 @@ class S3(SyncBase):
    SOURCE_NAME: str = FileSource.S3

    async def _generate(self, task: dict):
-        self.connector = BlobStorageConnector(
-            bucket_type=self.conf.get("bucket_type", "s3"),
-            bucket_name=self.conf["bucket_name"],
-            prefix=self.conf.get("prefix", "")
-        )
+        self.connector = BlobStorageConnector(bucket_type=self.conf.get("bucket_type", "s3"), bucket_name=self.conf["bucket_name"], prefix=self.conf.get("prefix", ""))
        self.connector.load_credentials(self.conf["credentials"])
-        document_batch_generator = self.connector.load_from_state() if task["reindex"]=="1" or not task["poll_range_start"] \
-            else  self.connector.poll_source(task["poll_range_start"].timestamp(), datetime.now(timezone.utc).timestamp())
+        document_batch_generator = (
+            self.connector.load_from_state()
+            if task["reindex"] == "1" or not task["poll_range_start"]
+            else self.connector.poll_source(task["poll_range_start"].timestamp(), datetime.now(timezone.utc).timestamp())
+        )

-        begin_info = "totally" if task["reindex"]=="1" or not task["poll_range_start"] else "from {}".format(task["poll_range_start"])
-        logging.info("Connect to {}: {}(prefix/{}) {}".format(self.conf.get("bucket_type", "s3"),
-                                                                  self.conf["bucket_name"],
-                                                                  self.conf.get("prefix", ""),
-                                                                  begin_info
-                                                                  ))
+        begin_info = "totally" if task["reindex"] == "1" or not task["poll_range_start"] else "from {}".format(task["poll_range_start"])
+        logging.info("Connect to {}: {}(prefix/{}) {}".format(self.conf.get("bucket_type", "s3"), self.conf["bucket_name"], self.conf.get("prefix", ""), begin_info))
        return document_batch_generator


@ -131,8 +134,8 @@ class Confluence(SyncBase):
    SOURCE_NAME: str = FileSource.CONFLUENCE

    async def _generate(self, task: dict):
-        from common.data_source.interfaces import StaticCredentialsProvider
        from common.data_source.config import DocumentSource
+        from common.data_source.interfaces import StaticCredentialsProvider

        self.connector = ConfluenceConnector(
            wiki_base=self.conf["wiki_base"],
@ -141,11 +144,7 @@ class Confluence(SyncBase):
            # page_id=self.conf.get("page_id", ""),
        )

-        credentials_provider = StaticCredentialsProvider(
-            tenant_id=task["tenant_id"],
-            connector_name=DocumentSource.CONFLUENCE,
-            credential_json=self.conf["credentials"]
-        )
+        credentials_provider = StaticCredentialsProvider(tenant_id=task["tenant_id"], connector_name=DocumentSource.CONFLUENCE, credential_json=self.conf["credentials"])
        self.connector.set_credentials_provider(credentials_provider)

        # Determine the time range for synchronization based on reindex or poll_range_start
@ -174,10 +173,13 @@ class Notion(SyncBase):
    async def _generate(self, task: dict):
        self.connector = NotionConnector(root_page_id=self.conf["root_page_id"])
        self.connector.load_credentials(self.conf["credentials"])
-        document_generator = self.connector.load_from_state() if task["reindex"]=="1" or not task["poll_range_start"] \
-            else  self.connector.poll_source(task["poll_range_start"].timestamp(), datetime.now(timezone.utc).timestamp())
+        document_generator = (
+            self.connector.load_from_state()
+            if task["reindex"] == "1" or not task["poll_range_start"]
+            else self.connector.poll_source(task["poll_range_start"].timestamp(), datetime.now(timezone.utc).timestamp())
+        )

-        begin_info = "totally" if task["reindex"]=="1" or not task["poll_range_start"] else "from {}".format(task["poll_range_start"])
+        begin_info = "totally" if task["reindex"] == "1" or not task["poll_range_start"] else "from {}".format(task["poll_range_start"])
        logging.info("Connect to Notion: root({}) {}".format(self.conf["root_page_id"], begin_info))
        return document_generator

@ -194,13 +196,16 @@ class Discord(SyncBase):
            server_ids=server_ids.split(",") if server_ids else [],
            channel_names=channel_names.split(",") if channel_names else [],
            start_date=datetime(1970, 1, 1, tzinfo=timezone.utc).strftime("%Y-%m-%d"),
-            batch_size=self.conf.get("batch_size", 1024)
+            batch_size=self.conf.get("batch_size", 1024),
        )
        self.connector.load_credentials(self.conf["credentials"])
-        document_generator = self.connector.load_from_state() if task["reindex"]=="1" or not task["poll_range_start"] \
-            else  self.connector.poll_source(task["poll_range_start"].timestamp(), datetime.now(timezone.utc).timestamp())
+        document_generator = (
+            self.connector.load_from_state()
+            if task["reindex"] == "1" or not task["poll_range_start"]
+            else self.connector.poll_source(task["poll_range_start"].timestamp(), datetime.now(timezone.utc).timestamp())
+        )

-        begin_info = "totally" if task["reindex"]=="1" or not task["poll_range_start"] else "from {}".format(task["poll_range_start"])
+        begin_info = "totally" if task["reindex"] == "1" or not task["poll_range_start"] else "from {}".format(task["poll_range_start"])
        logging.info("Connect to Discord: servers({}),  channel({}) {}".format(server_ids, channel_names, begin_info))
        return document_generator

@ -285,7 +290,7 @@ class GoogleDrive(SyncBase):
            admin_email = self.connector.primary_admin_email
        except RuntimeError:
            admin_email = "unknown"
-        logging.info("Connect to Google Drive as %s %s", admin_email, begin_info)
+        logging.info(f"Connect to Google Drive as {admin_email} {begin_info}")
        return document_batches()

    def _persist_rotated_credentials(self, connector_id: str, credentials: dict[str, Any]) -> None:
@ -303,7 +308,93 @@ class Jira(SyncBase):
    SOURCE_NAME: str = FileSource.JIRA

    async def _generate(self, task: dict):
-        pass
+        connector_kwargs = {
+            "jira_base_url": self.conf["base_url"],
+            "project_key": self.conf.get("project_key"),
+            "jql_query": self.conf.get("jql_query"),
+            "batch_size": self.conf.get("batch_size", INDEX_BATCH_SIZE),
+            "include_comments": self.conf.get("include_comments", True),
+            "include_attachments": self.conf.get("include_attachments", False),
+            "labels_to_skip": self._normalize_list(self.conf.get("labels_to_skip")),
+            "comment_email_blacklist": self._normalize_list(self.conf.get("comment_email_blacklist")),
+            "scoped_token": self.conf.get("scoped_token", False),
+            "attachment_size_limit": self.conf.get("attachment_size_limit"),
+            "timezone_offset": self.conf.get("timezone_offset"),
+        }
+
+        self.connector = JiraConnector(**connector_kwargs)
+
+        credentials = self.conf.get("credentials")
+        if not credentials:
+            raise ValueError("Jira connector is missing credentials.")
+
+        self.connector.load_credentials(credentials)
+        self.connector.validate_connector_settings()
+
+        if task["reindex"] == "1" or not task["poll_range_start"]:
+            start_time = 0.0
+            begin_info = "totally"
+        else:
+            start_time = task["poll_range_start"].timestamp()
+            begin_info = f"from {task['poll_range_start']}"
+
+        end_time = datetime.now(timezone.utc).timestamp()
+
+        raw_batch_size = self.conf.get("sync_batch_size") or self.conf.get("batch_size") or INDEX_BATCH_SIZE
+        try:
+            batch_size = int(raw_batch_size)
+        except (TypeError, ValueError):
+            batch_size = INDEX_BATCH_SIZE
+        if batch_size <= 0:
+            batch_size = INDEX_BATCH_SIZE
+
+        def document_batches():
+            checkpoint = self.connector.build_dummy_checkpoint()
+            pending_docs = []
+            iterations = 0
+            iteration_limit = 100_000
+
+            while checkpoint.has_more:
+                wrapper = CheckpointOutputWrapper()
+                generator = wrapper(
+                    self.connector.load_from_checkpoint(
+                        start_time,
+                        end_time,
+                        checkpoint,
+                    )
+                )
+                for document, failure, next_checkpoint in generator:
+                    if failure is not None:
+                        logging.warning(
+                            f"[Jira] Jira connector failure: {getattr(failure, 'failure_message', failure)}"
+                        )
+                        continue
+                    if document is not None:
+                        pending_docs.append(document)
+                        if len(pending_docs) >= batch_size:
+                            yield pending_docs
+                            pending_docs = []
+                    if next_checkpoint is not None:
+                        checkpoint = next_checkpoint
+
+                iterations += 1
+                if iterations > iteration_limit:
+                    logging.error(f"[Jira] Task {task.get('id')} exceeded iteration limit ({iteration_limit}).")
+                    raise RuntimeError("Too many iterations while loading Jira documents.")
+
+            if pending_docs:
+                yield pending_docs
+
+        logging.info(f"[Jira] Connect to Jira {connector_kwargs['jira_base_url']} {begin_info}")
+        return document_batches()
+
+    @staticmethod
+    def _normalize_list(values: Any) -> list[str] | None:
+        if values is None:
+            return None
+        if isinstance(values, str):
+            values = [item.strip() for item in values.split(",")]
+        return [str(value).strip() for value in values if value is not None and str(value).strip()]


 class SharePoint(SyncBase):
@ -337,9 +428,10 @@ func_factory = {
    FileSource.JIRA: Jira,
    FileSource.SHAREPOINT: SharePoint,
    FileSource.SLACK: Slack,
-    FileSource.TEAMS: Teams
+    FileSource.TEAMS: Teams,
 }

+
 async def dispatch_tasks():
    async with trio.open_nursery() as nursery:
        while True:
@ -385,7 +477,7 @@ async def main():
                                  __/ |
                                 |___/
    """)
-    logging.info(f'RAGFlow version: {get_ragflow_version()}')
+    logging.info(f"RAGFlow version: {get_ragflow_version()}")
    show_configs()
    settings.init_settings()
    if sys.platform != "win32":
--- a/rag/utils/redis_conn.py
+++ b/rag/utils/redis_conn.py
@ -110,7 +110,7 @@ class RedisDB:
        info = self.REDIS.info()
        return {
            'redis_version': info["redis_version"],
-            'server_mode': info["server_mode"],
+            'server_mode': info["server_mode"] if "server_mode" in info else info.get("redis_mode", ""),
            'used_memory': info["used_memory_human"],
            'total_system_memory': info["total_system_memory_human"],
            'mem_fragmentation_ratio': info["mem_fragmentation_ratio"],
--- a/uv.lock
+++ b/uv.lock
@ -4923,6 +4923,14 @@ wheels = [
    { url = "https://mirrors.aliyun.com/pypi/packages/80/28/2659c02301b9500751f8d42f9a6632e1508aa5120de5e43042b8b30f8d5d/pyopenssl-25.1.0-py3-none-any.whl", hash = "sha256:2b11f239acc47ac2e5aca04fd7fa829800aeee22a2eb30d744572a157bd8a1ab" },
 ]

+[[package]]
+name = "pypandoc"
+version = "1.16"
+source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" }
+wheels = [
+    { url = "https://pypi.tuna.tsinghua.edu.cn/packages/24/77/af1fc54740a0712988f9518e629d38edc7b8ffccd7549203f19c3d8a2db6/pypandoc-1.16-py3-none-any.whl", hash = "sha256:868f390d48388743e7a5885915cbbaa005dea36a825ecdfd571f8c523416c822", size = 19425, upload-time = "2025-11-08T15:44:38.429Z" },
+]
+
 [[package]]
 name = "pyparsing"
 version = "3.2.3"
@ -5368,6 +5376,7 @@ dependencies = [
    { name = "pyicu" },
    { name = "pymysql" },
    { name = "pyodbc" },
+    { name = "pypandoc" },
    { name = "pypdf" },
    { name = "pypdf2" },
    { name = "python-calamine" },
@ -5526,6 +5535,7 @@ requires-dist = [
    { name = "pyicu", specifier = ">=2.15.3,<3.0.0" },
    { name = "pymysql", specifier = ">=1.1.1,<2.0.0" },
    { name = "pyodbc", specifier = ">=5.2.0,<6.0.0" },
+    { name = "pypandoc", specifier = ">=1.16" },
    { name = "pypdf", specifier = "==6.0.0" },
    { name = "pypdf2", specifier = ">=3.0.1,<4.0.0" },
    { name = "python-calamine", specifier = ">=0.4.0" },
--- a/web/src/assets/svg/data-source/jira.svg
+++ b/web/src/assets/svg/data-source/jira.svg
@ -0,0 +1,16 @@
+<svg xmlns="http://www.w3.org/2000/svg" width="128" height="128" viewBox="0 0 128 128">
+  <defs>
+    <linearGradient id="jira-a" x1="88.136" y1="39.092" x2="68.472" y2="59.368" gradientUnits="userSpaceOnUse">
+      <stop offset="0.176" stop-color="#0052cc"/>
+      <stop offset="1" stop-color="#2684ff"/>
+    </linearGradient>
+    <linearGradient id="jira-b" x1="66.564" y1="62.256" x2="43.828" y2="84.376" gradientUnits="userSpaceOnUse">
+      <stop offset="0.176" stop-color="#0052cc"/>
+      <stop offset="1" stop-color="#2684ff"/>
+    </linearGradient>
+  </defs>
+
+  <path d="M108.023 16H61.805c0 11.52 9.324 20.848 20.847 20.848h8.5v8.226c0 11.52 9.328 20.848 20.848 20.848V19.977A3.98 3.98 0 00108.023 16z" fill="#2684ff"/>
+  <path d="M85.121 39.04H38.902c0 11.519 9.325 20.847 20.844 20.847h8.504v8.226c0 11.52 9.328 20.848 20.848 20.848V43.016a3.983 3.983 0 00-3.977-3.977z" fill="url(#jira-a)"/>
+  <path d="M62.219 62.078H16c0 11.524 9.324 20.848 20.848 20.848h8.5v8.23c0 11.52 9.328 20.844 20.847 20.844V66.059a3.984 3.984 0 00-3.976-3.98z" fill="url(#jira-b)"/>
+</svg>
--- a/web/src/components/next-message-item/index.tsx
+++ b/web/src/components/next-message-item/index.tsx
@ -18,8 +18,10 @@ import { cn } from '@/lib/utils';
 import { AgentChatContext } from '@/pages/agent/context';
 import { WorkFlowTimeline } from '@/pages/agent/log-sheet/workflow-timeline';
 import { IMessage } from '@/pages/chat/interface';
+import { downloadFile } from '@/services/file-manager-service';
+import { downloadFileFromBlob } from '@/utils/file-util';
 import { isEmpty } from 'lodash';
-import { Atom, ChevronDown, ChevronUp } from 'lucide-react';
+import { Atom, ChevronDown, ChevronUp, Download } from 'lucide-react';
 import MarkdownContent from '../next-markdown-content';
 import { RAGFlowAvatar } from '../ragflow-avatar';
 import { useTheme } from '../theme-provider';
@ -245,6 +247,32 @@ function MessageItem({
            {isUser && (
              <UploadedMessageFiles files={item.files}></UploadedMessageFiles>
            )}
+            {isAssistant && item.attachment && item.attachment.doc_id && (
+              <div className="w-full flex items-center justify-end">
+                <Button
+                  variant="link"
+                  className="p-1 m-0 h-auto text-text-sub-title-invert"
+                  onClick={async () => {
+                    if (item.attachment?.doc_id) {
+                      try {
+                        const response = await downloadFile({
+                          docId: item.attachment.doc_id,
+                          ext: item.attachment.format,
+                        });
+                        const blob = new Blob([response.data], {
+                          type: response.data.type,
+                        });
+                        downloadFileFromBlob(blob, item.attachment.file_name);
+                      } catch (error) {
+                        console.error('Download failed:', error);
+                      }
+                    }
+                  }}
+                >
+                  <Download size={16} />
+                </Button>
+              </div>
+            )}
          </section>
        </div>
      </section>
--- a/web/src/constants/agent.tsx
+++ b/web/src/constants/agent.tsx
@ -109,6 +109,7 @@ export enum Operator {
  SearXNG = 'SearXNG',
  Placeholder = 'Placeholder',
  DataOperations = 'DataOperations',
+  ListOperations = 'ListOperations',
  VariableAssigner = 'VariableAssigner',
  VariableAggregator = 'VariableAggregator',
  File = 'File', // pipeline
--- a/web/src/hooks/use-send-message.ts
+++ b/web/src/hooks/use-send-message.ts
@ -44,9 +44,14 @@ export interface IInputData {
  inputs: Record<string, BeginQuery>;
  tips: string;
 }
-
+export interface IAttachment {
+  doc_id: string;
+  format: string;
+  file_name: string;
+}
 export interface IMessageData {
  content: string;
+  outputs: any;
  start_to_think?: boolean;
  end_to_think?: boolean;
 }
--- a/web/src/interfaces/database/chat.ts
+++ b/web/src/interfaces/database/chat.ts
@ -1,4 +1,5 @@
 import { MessageType } from '@/constants/chat';
+import { IAttachment } from '@/hooks/use-send-message';

 export interface PromptConfig {
  empty_response: string;
@ -97,6 +98,7 @@ export interface Message {
  data?: any;
  files?: File[];
  chatBoxId?: string;
+  attachment?: IAttachment;
 }

 export interface IReferenceChunk {
@ -126,6 +128,7 @@ export interface IReferenceObject {

 export interface IAnswer {
  answer: string;
+  attachment?: IAttachment;
  reference?: IReference;
  conversationId?: string;
  prompt?: string;
--- a/web/src/locales/en.ts
+++ b/web/src/locales/en.ts
@ -732,6 +732,33 @@ Example: general/v2/`,
        'Comma-separated emails whose “My Drive” contents should be indexed (include the primary admin).',
      google_driveSharedFoldersTip:
        'Comma-separated Google Drive folder links to crawl.',
+      jiraDescription:
+        'Connect your Jira workspace to sync issues, comments, and attachments.',
+      jiraBaseUrlTip:
+        'Base URL of your Jira site (e.g., https://your-domain.atlassian.net).',
+      jiraProjectKeyTip:
+        'Optional: limit syncing to a single project key (e.g., ENG).',
+      jiraJqlTip:
+        'Optional JQL filter. Leave blank to rely on project/time filters.',
+      jiraBatchSizeTip:
+        'Maximum number of issues requested from Jira per batch.',
+      jiraCommentsTip:
+        'Include Jira comments in the generated markdown document.',
+      jiraAttachmentsTip:
+        'Download attachments as separate documents during sync.',
+      jiraAttachmentSizeTip:
+        'Attachments larger than this number of bytes will be skipped.',
+      jiraLabelsTip:
+        'Labels that should be skipped while indexing (comma separated).',
+      jiraBlacklistTip:
+        'Comments whose author email matches these entries will be ignored.',
+      jiraScopedTokenTip:
+        'Enable this when using scoped Atlassian tokens (api.atlassian.com).',
+      jiraEmailTip: 'Email associated with the Jira account/API token.',
+      jiraTokenTip:
+        'API token generated from https://id.atlassian.com/manage-profile/security/api-tokens.',
+      jiraPasswordTip:
+        'Optional password for Jira Server/Data Center environments.',
      availableSourcesDescription: 'Select a data source to add',
      availableSources: 'Available sources',
      datasourceDescription: 'Manage your data source and connections',
@ -1009,6 +1036,8 @@ Example: general/v2/`,
      pleaseUploadAtLeastOneFile: 'Please upload at least one file',
    },
    flow: {
+      downloadFileTypeTip: 'The file type to download',
+      downloadFileType: 'Download file type',
      formatTypeError: 'Format or type error',
      variableNameMessage:
        'Variable name can only contain letters and underscores',
@ -1591,6 +1620,8 @@ This delimiter is used to split the input text into several text pieces echo of
      codeDescription: 'It allows developers to write custom Python logic.',
      dataOperations: 'Data operations',
      dataOperationsDescription: 'Perform various operations on a Data object.',
+      listOperations: 'List operations',
+      listOperationsDescription: 'Perform operations on a list.',
      variableAssigner: 'Variable assigner',
      variableAssignerDescription:
        'This component performs operations on Data objects, including extracting, filtering, and editing keys and values in the Data.',
@ -1806,6 +1837,19 @@ Important structured information may include: names, dates, locations, events, k
        removeKeys: 'Remove keys',
        renameKeys: 'Rename keys',
      },
+      ListOperationsOptions: {
+        topN: 'Top N',
+        head: 'Head',
+        tail: 'Tail',
+        sort: 'Sort',
+        filter: 'Filter',
+        dropDuplicates: 'Drop duplicates',
+      },
+      sortMethod: 'Sort method',
+      SortMethodOptions: {
+        asc: 'Ascending',
+        desc: 'Descending',
+      },
    },
    llmTools: {
      bad_calculator: {
--- a/web/src/locales/zh.ts
+++ b/web/src/locales/zh.ts
@ -716,6 +716,23 @@ General：实体和关系提取提示来自 GitHub - microsoft/graphrag：基于
        '需要索引其 “我的云端硬盘” 的邮箱，多个邮箱用逗号分隔（建议包含管理员）。',
      google_driveSharedFoldersTip:
        '需要同步的 Google Drive 文件夹链接，多个链接用逗号分隔。',
+      jiraDescription: '接入 Jira 工作区，持续同步Issues、评论与附件。',
+      jiraBaseUrlTip:
+        'Jira 的 Base URL，例如：https://your-domain.atlassian.net。',
+      jiraProjectKeyTip: '可选：仅同步指定的项目（如 RAG）。',
+      jiraJqlTip: '可选：自定义 JQL 过滤条件，留空则使用项目 / 时间范围。',
+      jiraBatchSizeTip: '每次向 Jira 请求的 Issue 数量上限。',
+      jiraCommentsTip: '同步评论。',
+      jiraAttachmentsTip: '开启后会将附件下载为独立文档。',
+      jiraAttachmentSizeTip: '超过该字节阈值的附件会被跳过。',
+      jiraLabelsTip: '需要跳过的标签（逗号分隔）。',
+      jiraBlacklistTip: '这些邮箱作者的评论会被忽略。',
+      jiraScopedTokenTip:
+        '仅当凭证为 Atlassian scoped token（api.atlassian.com）时生效。',
+      jiraEmailTip: '与 API Token 对应的 Jira 账户邮箱。',
+      jiraTokenTip:
+        '在 https://id.atlassian.com/manage-profile/security/api-tokens 生成的 API Token。 (Clould only)',
+      jiraPasswordTip: '可选：仅 Jira Server/Data Center 环境需要的密码字段。',
      availableSourcesDescription: '选择要添加的数据源',
      availableSources: '可用数据源',
      datasourceDescription: '管理您的数据源和连接',
@ -956,6 +973,8 @@ General：实体和关系提取提示来自 GitHub - microsoft/graphrag：基于
      pleaseUploadAtLeastOneFile: '请上传至少一个文件',
    },
    flow: {
+      downloadFileTypeTip: '文件下载的类型',
+      downloadFileType: '文件类型',
      formatTypeError: '格式或类型错误',
      variableNameMessage: '名称只能包含字母和下划线',
      variableDescription: '变量的描述',
@ -1508,6 +1527,8 @@ General：实体和关系提取提示来自 GitHub - microsoft/graphrag：基于
      codeDescription: '它允许开发人员编写自定义 Python 逻辑。',
      dataOperations: '数据操作',
      dataOperationsDescription: '对数据对象执行各种操作。',
+      listOperations: '列表操作',
+      listOperationsDescription: '对列表对象执行各种操作。',
      variableAssigner: '变量赋值器',
      variableAssignerDescription:
        '此组件对数据对象执行操作，包括提取、筛选和编辑数据中的键和值。',
@ -1679,6 +1700,19 @@ Tokenizer 会根据所选方式将内容存储为对应的数据结构。`,
        removeKeys: '删除键',
        renameKeys: '重命名键',
      },
+      ListOperationsOptions: {
+        topN: '取前N项',
+        head: '取前第N项',
+        tail: '取后第N项',
+        sort: '排序',
+        filter: '筛选',
+        dropDuplicates: '去重',
+      },
+      sortMethod: '排序方式',
+      SortMethodOptions: {
+        asc: '升序',
+        desc: '降序',
+      },
    },
    footer: {
      profile: 'All rights reserved @ React',
--- a/web/src/pages/agent/canvas/index.tsx
+++ b/web/src/pages/agent/canvas/index.tsx
@ -61,6 +61,7 @@ import { FileNode } from './node/file-node';
 import { InvokeNode } from './node/invoke-node';
 import { IterationNode, IterationStartNode } from './node/iteration-node';
 import { KeywordNode } from './node/keyword-node';
+import { ListOperationsNode } from './node/list-operations-node';
 import { MessageNode } from './node/message-node';
 import NoteNode from './node/note-node';
 import ParserNode from './node/parser-node';
@ -101,6 +102,7 @@ export const nodeTypes: NodeTypes = {
  splitterNode: SplitterNode,
  contextNode: ExtractorNode,
  dataOperationsNode: DataOperationsNode,
+  listOperationsNode: ListOperationsNode,
  variableAssignerNode: VariableAssignerNode,
  variableAggregatorNode: VariableAggregatorNode,
 };
--- a/web/src/pages/agent/canvas/node/dropdown/accordion-operators.tsx
+++ b/web/src/pages/agent/canvas/node/dropdown/accordion-operators.tsx
@ -79,6 +79,7 @@ export function AccordionOperators({
              Operator.Code,
              Operator.StringTransform,
              Operator.DataOperations,
+              Operator.ListOperations,
              // Operator.VariableAssigner,
              Operator.VariableAggregator,
            ]}
--- a/web/src/pages/agent/canvas/node/list-operations-node.tsx
+++ b/web/src/pages/agent/canvas/node/list-operations-node.tsx
@ -0,0 +1,22 @@
+import { BaseNode } from '@/interfaces/database/agent';
+import { NodeProps } from '@xyflow/react';
+import { camelCase } from 'lodash';
+import { useTranslation } from 'react-i18next';
+import { RagNode } from '.';
+import { ListOperationsFormSchemaType } from '../../form/list-operations-form';
+import { LabelCard } from './card';
+
+export function ListOperationsNode({
+  ...props
+}: NodeProps<BaseNode<ListOperationsFormSchemaType>>) {
+  const { data } = props;
+  const { t } = useTranslation();
+
+  return (
+    <RagNode {...props}>
+      <LabelCard>
+        {t(`flow.ListOperationsOptions.${camelCase(data.form?.operations)}`)}
+      </LabelCard>
+    </RagNode>
+  );
+}
--- a/web/src/pages/agent/chat/use-send-agent-message.ts
+++ b/web/src/pages/agent/chat/use-send-agent-message.ts
@ -5,6 +5,7 @@ import {
  useSelectDerivedMessages,
 } from '@/hooks/logic-hooks';
 import {
+  IAttachment,
  IEventList,
  IInputEvent,
  IMessageEndData,
@ -75,9 +76,13 @@ export function findMessageFromList(eventList: IEventList) {
    nextContent += '</think>';
  }

+  const workflowFinished = eventList.find(
+    (x) => x.event === MessageEventType.WorkflowFinished,
+  ) as IMessageEvent;
  return {
    id: eventList[0]?.message_id,
    content: nextContent,
+    attachment: workflowFinished?.data?.outputs?.attachment || {},
  };
 }

@ -388,12 +393,13 @@ export const useSendAgentMessage = ({
  }, [sendMessageInTaskMode]);

  useEffect(() => {
-    const { content, id } = findMessageFromList(answerList);
+    const { content, id, attachment } = findMessageFromList(answerList);
    const inputAnswer = findInputFromList(answerList);
    const answer = content || getLatestError(answerList);
    if (answerList.length > 0) {
      addNewestOneAnswer({
        answer: answer ?? '',
+        attachment: attachment as IAttachment,
        id: id,
        ...inputAnswer,
      });
--- a/web/src/pages/agent/constant/index.tsx
+++ b/web/src/pages/agent/constant/index.tsx
@ -417,6 +417,7 @@ export const initialIterationValues = {
  items_ref: '',
  outputs: {},
 };
+
 export const initialIterationStartValues = {
  outputs: {
    item: {
@ -595,6 +596,35 @@ export const initialDataOperationsValues = {
    },
  },
 };
+export enum SortMethod {
+  Asc = 'asc',
+  Desc = 'desc',
+}
+
+export enum ListOperations {
+  TopN = 'topN',
+  Head = 'head',
+  Tail = 'tail',
+  Filter = 'filter',
+  Sort = 'sort',
+  DropDuplicates = 'drop_duplicates',
+}
+
+export const initialListOperationsValues = {
+  query: '',
+  operations: ListOperations.TopN,
+  outputs: {
+    result: {
+      type: 'Array<?>',
+    },
+    first: {
+      type: '?',
+    },
+    last: {
+      type: '?',
+    },
+  },
+};

 export const initialVariableAssignerValues = {};

@ -673,6 +703,7 @@ export const RestrictedUpstreamMap = {
  [Operator.Tool]: [Operator.Begin],
  [Operator.Placeholder]: [Operator.Begin],
  [Operator.DataOperations]: [Operator.Begin],
+  [Operator.ListOperations]: [Operator.Begin],
  [Operator.Parser]: [Operator.Begin], // pipeline
  [Operator.Splitter]: [Operator.Begin],
  [Operator.HierarchicalMerger]: [Operator.Begin],
@ -729,6 +760,7 @@ export const NodeMap = {
  [Operator.HierarchicalMerger]: 'splitterNode',
  [Operator.Extractor]: 'contextNode',
  [Operator.DataOperations]: 'dataOperationsNode',
+  [Operator.ListOperations]: 'listOperationsNode',
  [Operator.VariableAssigner]: 'variableAssignerNode',
  [Operator.VariableAggregator]: 'variableAggregatorNode',
 };
@ -814,3 +846,10 @@ export enum JsonSchemaDataType {
  Array = 'array',
  Object = 'object',
 }
+
+export enum ExportFileType {
+  PDF = 'pdf',
+  HTML = 'html',
+  Markdown = 'md',
+  DOCX = 'docx',
+}
--- a/web/src/pages/agent/form-sheet/form-config-map.tsx
+++ b/web/src/pages/agent/form-sheet/form-config-map.tsx
@ -21,6 +21,7 @@ import IterationForm from '../form/iteration-form';
 import IterationStartForm from '../form/iteration-start-from';
 import Jin10Form from '../form/jin10-form';
 import KeywordExtractForm from '../form/keyword-extract-form';
+import ListOperationsForm from '../form/list-operations-form';
 import MessageForm from '../form/message-form';
 import ParserForm from '../form/parser-form';
 import PubMedForm from '../form/pubmed-form';
@ -184,6 +185,9 @@ export const FormConfigMap = {
  [Operator.DataOperations]: {
    component: DataOperationsForm,
  },
+  [Operator.ListOperations]: {
+    component: ListOperationsForm,
+  },
  [Operator.VariableAssigner]: {
    component: VariableAssignerForm,
  },
--- a/web/src/pages/agent/form/list-operations-form/index.tsx
+++ b/web/src/pages/agent/form/list-operations-form/index.tsx
@ -0,0 +1,140 @@
+import NumberInput from '@/components/originui/number-input';
+import { SelectWithSearch } from '@/components/originui/select-with-search';
+import { RAGFlowFormItem } from '@/components/ragflow-form';
+import {
+  Form,
+  FormControl,
+  FormField,
+  FormItem,
+  FormLabel,
+  FormMessage,
+} from '@/components/ui/form';
+import { Separator } from '@/components/ui/separator';
+import { useBuildSwitchOperatorOptions } from '@/hooks/logic-hooks/use-build-operator-options';
+import { buildOptions } from '@/utils/form';
+import { zodResolver } from '@hookform/resolvers/zod';
+import { memo } from 'react';
+import { useForm, useWatch } from 'react-hook-form';
+import { useTranslation } from 'react-i18next';
+import { z } from 'zod';
+import {
+  DataOperationsOperatorOptions,
+  JsonSchemaDataType,
+  ListOperations,
+  SortMethod,
+  initialListOperationsValues,
+} from '../../constant';
+import { useFormValues } from '../../hooks/use-form-values';
+import { useWatchFormChange } from '../../hooks/use-watch-form-change';
+import { INextOperatorForm } from '../../interface';
+import { buildOutputList } from '../../utils/build-output-list';
+import { FormWrapper } from '../components/form-wrapper';
+import { Output, OutputSchema } from '../components/output';
+import { PromptEditor } from '../components/prompt-editor';
+import { QueryVariable } from '../components/query-variable';
+
+export const RetrievalPartialSchema = {
+  query: z.string(),
+  operations: z.string(),
+  n: z.number().int().min(0).optional(),
+  sort_method: z.string().optional(),
+  filter: z
+    .object({
+      value: z.string().optional(),
+      operator: z.string().optional(),
+    })
+    .optional(),
+  ...OutputSchema,
+};
+
+export const FormSchema = z.object(RetrievalPartialSchema);
+
+export type ListOperationsFormSchemaType = z.infer<typeof FormSchema>;
+
+const outputList = buildOutputList(initialListOperationsValues.outputs);
+
+function ListOperationsForm({ node }: INextOperatorForm) {
+  const { t } = useTranslation();
+
+  const defaultValues = useFormValues(initialListOperationsValues, node);
+
+  const form = useForm<ListOperationsFormSchemaType>({
+    defaultValues: defaultValues,
+    mode: 'onChange',
+    resolver: zodResolver(FormSchema),
+    shouldUnregister: true,
+  });
+
+  const operations = useWatch({ control: form.control, name: 'operations' });
+
+  const ListOperationsOptions = buildOptions(
+    ListOperations,
+    t,
+    `flow.ListOperationsOptions`,
+    true,
+  );
+  const SortMethodOptions = buildOptions(
+    SortMethod,
+    t,
+    `flow.SortMethodOptions`,
+    true,
+  );
+  const operatorOptions = useBuildSwitchOperatorOptions(
+    DataOperationsOperatorOptions,
+  );
+  useWatchFormChange(node?.id, form, true);
+
+  return (
+    <Form {...form}>
+      <FormWrapper>
+        <QueryVariable
+          name="query"
+          className="flex-1"
+          types={[JsonSchemaDataType.Array]}
+        ></QueryVariable>
+        <Separator />
+        <RAGFlowFormItem name="operations" label={t('flow.operations')}>
+          <SelectWithSearch options={ListOperationsOptions} />
+        </RAGFlowFormItem>
+        {[
+          ListOperations.TopN,
+          ListOperations.Head,
+          ListOperations.Tail,
+        ].includes(operations as ListOperations) && (
+          <FormField
+            control={form.control}
+            name="n"
+            render={({ field }) => (
+              <FormItem>
+                <FormLabel>{t('flowNum')}</FormLabel>
+                <FormControl>
+                  <NumberInput {...field} className="w-full"></NumberInput>
+                </FormControl>
+                <FormMessage />
+              </FormItem>
+            )}
+          />
+        )}
+        {[ListOperations.Sort].includes(operations as ListOperations) && (
+          <RAGFlowFormItem name="sort_method" label={t('flow.sortMethod')}>
+            <SelectWithSearch options={SortMethodOptions} />
+          </RAGFlowFormItem>
+        )}
+        {[ListOperations.Filter].includes(operations as ListOperations) && (
+          <div className="flex items-center gap-2">
+            <RAGFlowFormItem name="filter.operator" className="flex-1">
+              <SelectWithSearch options={operatorOptions}></SelectWithSearch>
+            </RAGFlowFormItem>
+            <Separator className="w-2" />
+            <RAGFlowFormItem name="filter.value" className="flex-1">
+              <PromptEditor showToolbar={false} multiLine={false} />
+            </RAGFlowFormItem>
+          </div>
+        )}
+        <Output list={outputList} isFormRequired></Output>
+      </FormWrapper>
+    </Form>
+  );
+}
+
+export default memo(ListOperationsForm);
--- a/web/src/pages/agent/form/message-form/index.tsx
+++ b/web/src/pages/agent/form/message-form/index.tsx
@ -8,12 +8,14 @@ import {
  FormLabel,
  FormMessage,
 } from '@/components/ui/form';
+import { RAGFlowSelect } from '@/components/ui/select';
 import { zodResolver } from '@hookform/resolvers/zod';
 import { X } from 'lucide-react';
 import { memo } from 'react';
 import { useFieldArray, useForm } from 'react-hook-form';
 import { useTranslation } from 'react-i18next';
 import { z } from 'zod';
+import { ExportFileType } from '../../constant';
 import { INextOperatorForm } from '../../interface';
 import { FormWrapper } from '../components/form-wrapper';
 import { PromptEditor } from '../components/prompt-editor';
@ -33,10 +35,14 @@ function MessageForm({ node }: INextOperatorForm) {
        }),
      )
      .optional(),
+    output_format: z.string().optional(),
  });

  const form = useForm({
-    defaultValues: values,
+    defaultValues: {
+      ...values,
+      output_format: values.output_format,
+    },
    resolver: zodResolver(FormSchema),
  });

@ -50,6 +56,39 @@ function MessageForm({ node }: INextOperatorForm) {
  return (
    <Form {...form}>
      <FormWrapper>
+        <FormContainer>
+          <FormItem>
+            <FormLabel tooltip={t('flow.downloadFileTypeTip')}>
+              {t('flow.downloadFileType')}
+            </FormLabel>
+            <FormField
+              control={form.control}
+              name={`output_format`}
+              render={({ field }) => (
+                <FormItem className="flex-1">
+                  <FormControl>
+                    <RAGFlowSelect
+                      options={Object.keys(ExportFileType).map(
+                        (key: string) => {
+                          return {
+                            value:
+                              ExportFileType[
+                                key as keyof typeof ExportFileType
+                              ],
+                            label: key,
+                          };
+                        },
+                      )}
+                      {...field}
+                      onValueChange={field.onChange}
+                      placeholder={t('flow.messagePlaceholder')}
+                    ></RAGFlowSelect>
+                  </FormControl>
+                </FormItem>
+              )}
+            />
+          </FormItem>
+        </FormContainer>
        <FormContainer>
          <FormItem>
            <FormLabel tooltip={t('flow.msgTip')}>{t('flow.msg')}</FormLabel>
--- a/web/src/pages/agent/form/message-form/use-values.ts
+++ b/web/src/pages/agent/form/message-form/use-values.ts
@ -1,7 +1,7 @@
 import { RAGFlowNodeType } from '@/interfaces/database/flow';
 import { isEmpty } from 'lodash';
 import { useMemo } from 'react';
-import { initialMessageValues } from '../../constant';
+import { ExportFileType, initialMessageValues } from '../../constant';
 import { convertToObjectArray } from '../../utils';

 export function useValues(node?: RAGFlowNodeType) {
@ -15,6 +15,7 @@ export function useValues(node?: RAGFlowNodeType) {
    return {
      ...formData,
      content: convertToObjectArray(formData.content),
+      output_format: formData.output_format || ExportFileType.PDF,
    };
  }, [node]);

--- a/web/src/pages/agent/hooks/use-add-node.ts
+++ b/web/src/pages/agent/hooks/use-add-node.ts
@ -31,6 +31,7 @@ import {
  initialIterationValues,
  initialJin10Values,
  initialKeywordExtractValues,
+  initialListOperationsValues,
  initialMessageValues,
  initialNoteValues,
  initialParserValues,
@ -129,6 +130,7 @@ export const useInitializeOperatorParams = () => {
        prompts: t('flow.prompts.user.summary'),
      },
      [Operator.DataOperations]: initialDataOperationsValues,
+      [Operator.ListOperations]: initialListOperationsValues,
      [Operator.VariableAssigner]: initialVariableAssignerValues,
      [Operator.VariableAggregator]: initialVariableAggregatorValues,
    };
--- a/web/src/pages/agent/operator-icon.tsx
+++ b/web/src/pages/agent/operator-icon.tsx
@ -14,7 +14,7 @@ import { ReactComponent as YahooFinanceIcon } from '@/assets/svg/yahoo-finance.s

 import { IconFont } from '@/components/icon-font';
 import { cn } from '@/lib/utils';
-import { Equal, FileCode, HousePlus, Variable } from 'lucide-react';
+import { Columns3, Equal, FileCode, HousePlus, Variable } from 'lucide-react';
 import { Operator } from './constant';

 interface IProps {
@ -57,6 +57,7 @@ export const SVGIconMap = {
 };
 export const LucideIconMap = {
  [Operator.DataOperations]: FileCode,
+  [Operator.ListOperations]: Columns3,
  [Operator.VariableAssigner]: Equal,
  [Operator.VariableAggregator]: Variable,
 };
--- a/web/src/pages/agent/utils.ts
+++ b/web/src/pages/agent/utils.ts
@ -328,7 +328,6 @@ export const buildDslComponentsByGraph = (
        case Operator.DataOperations:
          params = transformDataOperationsParams(params);
          break;
-
        default:
          break;
      }
--- a/web/src/pages/user-setting/data-source/contant.tsx
+++ b/web/src/pages/user-setting/data-source/contant.tsx
@ -9,8 +9,8 @@ export enum DataSourceKey {
  NOTION = 'notion',
  DISCORD = 'discord',
  GOOGLE_DRIVE = 'google_drive',
-  //   GMAIL = 'gmail',
-  //   JIRA = 'jira',
+  // GMAIL = 'gmail',
+  JIRA = 'jira',
  //   SHAREPOINT = 'sharepoint',
  //   SLACK = 'slack',
  //   TEAMS = 'teams',
@ -42,6 +42,11 @@ export const DataSourceInfo = {
    description: t(`setting.${DataSourceKey.GOOGLE_DRIVE}Description`),
    icon: <SvgIcon name={'data-source/google-drive'} width={38} />,
  },
+  [DataSourceKey.JIRA]: {
+    name: 'Jira',
+    description: t(`setting.${DataSourceKey.JIRA}Description`),
+    icon: <SvgIcon name={'data-source/jira'} width={38} />,
+  },
 };

 export const DataSourceFormBaseFields = [
@ -270,6 +275,106 @@ export const DataSourceFormFields = {
      defaultValue: 'uploaded',
    },
  ],
+  [DataSourceKey.JIRA]: [
+    {
+      label: 'Jira Base URL',
+      name: 'config.base_url',
+      type: FormFieldType.Text,
+      required: true,
+      placeholder: 'https://your-domain.atlassian.net',
+      tooltip: t('setting.jiraBaseUrlTip'),
+    },
+    {
+      label: 'Project Key',
+      name: 'config.project_key',
+      type: FormFieldType.Text,
+      required: false,
+      placeholder: 'RAGFlow',
+      tooltip: t('setting.jiraProjectKeyTip'),
+    },
+    {
+      label: 'Custom JQL',
+      name: 'config.jql_query',
+      type: FormFieldType.Textarea,
+      required: false,
+      placeholder: 'project = RAG AND updated >= -7d',
+      tooltip: t('setting.jiraJqlTip'),
+    },
+    {
+      label: 'Batch Size',
+      name: 'config.batch_size',
+      type: FormFieldType.Number,
+      required: false,
+      tooltip: t('setting.jiraBatchSizeTip'),
+    },
+    {
+      label: 'Include Comments',
+      name: 'config.include_comments',
+      type: FormFieldType.Checkbox,
+      required: false,
+      defaultValue: true,
+      tooltip: t('setting.jiraCommentsTip'),
+    },
+    {
+      label: 'Include Attachments',
+      name: 'config.include_attachments',
+      type: FormFieldType.Checkbox,
+      required: false,
+      defaultValue: false,
+      tooltip: t('setting.jiraAttachmentsTip'),
+    },
+    {
+      label: 'Attachment Size Limit (bytes)',
+      name: 'config.attachment_size_limit',
+      type: FormFieldType.Number,
+      required: false,
+      defaultValue: 10 * 1024 * 1024,
+      tooltip: t('setting.jiraAttachmentSizeTip'),
+    },
+    {
+      label: 'Labels to Skip',
+      name: 'config.labels_to_skip',
+      type: FormFieldType.Tag,
+      required: false,
+      tooltip: t('setting.jiraLabelsTip'),
+    },
+    {
+      label: 'Comment Email Blacklist',
+      name: 'config.comment_email_blacklist',
+      type: FormFieldType.Tag,
+      required: false,
+      tooltip: t('setting.jiraBlacklistTip'),
+    },
+    {
+      label: 'Use Scoped Token (Clould only)',
+      name: 'config.scoped_token',
+      type: FormFieldType.Checkbox,
+      required: false,
+      tooltip: t('setting.jiraScopedTokenTip'),
+    },
+    {
+      label: 'Jira User Email (Cloud) or User Name (Server)',
+      name: 'config.credentials.jira_user_email',
+      type: FormFieldType.Text,
+      required: true,
+      placeholder: 'you@example.com',
+      tooltip: t('setting.jiraEmailTip'),
+    },
+    {
+      label: 'Jira API Token (Cloud only)',
+      name: 'config.credentials.jira_api_token',
+      type: FormFieldType.Password,
+      required: false,
+      tooltip: t('setting.jiraTokenTip'),
+    },
+    {
+      label: 'Jira Password (Server only)',
+      name: 'config.credentials.jira_password',
+      type: FormFieldType.Password,
+      required: false,
+      tooltip: t('setting.jiraPasswordTip'),
+    },
+  ],
  // [DataSourceKey.GOOGLE_DRIVE]: [
  //   {
  //     label: 'Primary Admin Email',
@ -433,4 +538,25 @@ export const DataSourceFormDefaultValues = {
      },
    },
  },
+  [DataSourceKey.JIRA]: {
+    name: '',
+    source: DataSourceKey.JIRA,
+    config: {
+      base_url: '',
+      project_key: '',
+      jql_query: '',
+      batch_size: 2,
+      include_comments: true,
+      include_attachments: false,
+      attachment_size_limit: 10 * 1024 * 1024,
+      labels_to_skip: [],
+      comment_email_blacklist: [],
+      scoped_token: false,
+      credentials: {
+        jira_user_email: '',
+        jira_api_token: '',
+        jira_password: '',
+      },
+    },
+  },
 };
--- a/web/src/pages/user-setting/data-source/index.tsx
+++ b/web/src/pages/user-setting/data-source/index.tsx
@ -44,6 +44,12 @@ const dataSourceTemplates = [
    description: DataSourceInfo[DataSourceKey.NOTION].description,
    icon: DataSourceInfo[DataSourceKey.NOTION].icon,
  },
+  {
+    id: DataSourceKey.JIRA,
+    name: DataSourceInfo[DataSourceKey.JIRA].name,
+    description: DataSourceInfo[DataSourceKey.JIRA].description,
+    icon: DataSourceInfo[DataSourceKey.JIRA].icon,
+  },
 ];
 const DataSource = () => {
  const { t } = useTranslation();
--- a/web/src/services/file-manager-service.ts
+++ b/web/src/services/file-manager-service.ts
@ -13,6 +13,7 @@ const {
  get_document_file,
  getFile,
  moveFile,
+  get_document_file_download,
 } = api;

 const methods = {
@ -65,4 +66,10 @@ const fileManagerService = registerServer<keyof typeof methods>(
  request,
 );

+export const downloadFile = (data: { docId: string; ext: string }) => {
+  return request.get(get_document_file_download(data.docId), {
+    params: { ext: data.ext },
+    responseType: 'blob',
+  });
+};
 export default fileManagerService;
--- a/web/src/utils/api.ts
+++ b/web/src/utils/api.ts
@ -100,6 +100,8 @@ export default {
  document_change_parser: `${api_host}/document/change_parser`,
  document_thumbnails: `${api_host}/document/thumbnails`,
  get_document_file: `${api_host}/document/get`,
+  get_document_file_download: (docId: string) =>
+    `${api_host}/document/download/${docId}`,
  document_upload: `${api_host}/document/upload`,
  web_crawl: `${api_host}/document/web_crawl`,
  document_infos: `${api_host}/document/infos`,