From 87e69868c0b1c817f2fd51f30a2737e6020a728b Mon Sep 17 00:00:00 2001
From: chanx <1243304602@qq.com>
Date: Fri, 14 Nov 2025 13:56:56 +0800
Subject: [PATCH 01/15] Fixes: Added session variable types and modified
 configuration  (#11269)

### What problem does this PR solve?

Fixes: Added session variable types and modified configuration

- Added more types of session variables
- Modified the embedding model switching logic in the knowledge base
configuration

### Type of change

- [x] Bug Fix (non-breaking change which fixes an issue)
---
 web/src/components/dynamic-form.tsx           | 152 ++++++++---
 web/src/components/ui/segmented.tsx           |   5 +-
 web/src/locales/en.ts                         |   1 +
 web/src/locales/zh.ts                         |   1 +
 .../component/add-variable-modal.tsx          | 134 ++++++++++
 .../{contant.ts => constant.ts}               |  26 +-
 .../gobal-variable-sheet/hooks/use-form.tsx   |  41 +++
 .../hooks/use-object-fields.tsx               | 246 ++++++++++++++++++
 .../agent/gobal-variable-sheet/index.tsx      | 188 ++++---------
 web/src/pages/agent/hooks/use-build-dsl.ts    |  10 +-
 web/src/pages/agent/hooks/use-save-graph.ts   |   2 +-
 web/src/pages/agent/index.tsx                 |  18 +-
 web/src/pages/agent/utils.ts                  |  24 +-
 .../configuration/common-item.tsx             |  46 +++-
 .../dataset/dataset-setting/general-form.tsx  |   2 +-
 .../pages/dataset/dataset-setting/hooks.ts    |  21 ++
 web/src/services/knowledge-service.ts         |   6 +
 web/src/utils/api.ts                          |   2 +
 18 files changed, 712 insertions(+), 213 deletions(-)
 create mode 100644 web/src/pages/agent/gobal-variable-sheet/component/add-variable-modal.tsx
 rename web/src/pages/agent/gobal-variable-sheet/{contant.ts => constant.ts} (72%)
 create mode 100644 web/src/pages/agent/gobal-variable-sheet/hooks/use-form.tsx
 create mode 100644 web/src/pages/agent/gobal-variable-sheet/hooks/use-object-fields.tsx
diff --git a/web/src/components/dynamic-form.tsx b/web/src/components/dynamic-form.tsx
index f7449ec9f..a90afe287 100644
--- a/web/src/components/dynamic-form.tsx
+++ b/web/src/components/dynamic-form.tsx
@@ -61,6 +61,12 @@ export interface FormFieldConfig {
   horizontal?: boolean;
   onChange?: (value: any) => void;
   tooltip?: React.ReactNode;
+  customValidate?: (
+    value: any,
+    formValues: any,
+  ) => string | boolean | Promise<string | boolean>;
+  dependencies?: string[];
+  schema?: ZodSchema;
 }
 
 // Component props interface
@@ -94,36 +100,40 @@ const generateSchema = (fields: FormFieldConfig[]): ZodSchema<any> => {
     let fieldSchema: ZodSchema;
 
     // Create base validation schema based on field type
-    switch (field.type) {
-      case FormFieldType.Email:
-        fieldSchema = z.string().email('Please enter a valid email address');
-        break;
-      case FormFieldType.Number:
-        fieldSchema = z.coerce.number();
-        if (field.validation?.min !== undefined) {
-          fieldSchema = (fieldSchema as z.ZodNumber).min(
-            field.validation.min,
-            field.validation.message ||
-              `Value cannot be less than ${field.validation.min}`,
-          );
-        }
-        if (field.validation?.max !== undefined) {
-          fieldSchema = (fieldSchema as z.ZodNumber).max(
-            field.validation.max,
-            field.validation.message ||
-              `Value cannot be greater than ${field.validation.max}`,
-          );
-        }
-        break;
-      case FormFieldType.Checkbox:
-        fieldSchema = z.boolean();
-        break;
-      case FormFieldType.Tag:
-        fieldSchema = z.array(z.string());
-        break;
-      default:
-        fieldSchema = z.string();
-        break;
+    if (field.schema) {
+      fieldSchema = field.schema;
+    } else {
+      switch (field.type) {
+        case FormFieldType.Email:
+          fieldSchema = z.string().email('Please enter a valid email address');
+          break;
+        case FormFieldType.Number:
+          fieldSchema = z.coerce.number();
+          if (field.validation?.min !== undefined) {
+            fieldSchema = (fieldSchema as z.ZodNumber).min(
+              field.validation.min,
+              field.validation.message ||
+                `Value cannot be less than ${field.validation.min}`,
+            );
+          }
+          if (field.validation?.max !== undefined) {
+            fieldSchema = (fieldSchema as z.ZodNumber).max(
+              field.validation.max,
+              field.validation.message ||
+                `Value cannot be greater than ${field.validation.max}`,
+            );
+          }
+          break;
+        case FormFieldType.Checkbox:
+          fieldSchema = z.boolean();
+          break;
+        case FormFieldType.Tag:
+          fieldSchema = z.array(z.string());
+          break;
+        default:
+          fieldSchema = z.string();
+          break;
+      }
     }
 
     // Handle required fields
@@ -300,10 +310,90 @@ const DynamicForm = {
 
       // Initialize form
       const form = useForm<T>({
-        resolver: zodResolver(schema),
+        resolver: async (data, context, options) => {
+          const zodResult = await zodResolver(schema)(data, context, options);
+
+          let combinedErrors = { ...zodResult.errors };
+
+          const fieldErrors: Record<string, { type: string; message: string }> =
+            {};
+          for (const field of fields) {
+            if (field.customValidate && data[field.name] !== undefined) {
+              try {
+                const result = await field.customValidate(
+                  data[field.name],
+                  data,
+                );
+                if (typeof result === 'string') {
+                  fieldErrors[field.name] = {
+                    type: 'custom',
+                    message: result,
+                  };
+                } else if (result === false) {
+                  fieldErrors[field.name] = {
+                    type: 'custom',
+                    message:
+                      field.validation?.message || `${field.label} is invalid`,
+                  };
+                }
+              } catch (error) {
+                fieldErrors[field.name] = {
+                  type: 'custom',
+                  message:
+                    error instanceof Error
+                      ? error.message
+                      : 'Validation failed',
+                };
+              }
+            }
+          }
+
+          combinedErrors = {
+            ...combinedErrors,
+            ...fieldErrors,
+          } as any;
+          console.log('combinedErrors', combinedErrors);
+          return {
+            values: Object.keys(combinedErrors).length ? {} : data,
+            errors: combinedErrors,
+          } as any;
+        },
         defaultValues,
       });
 
+      useEffect(() => {
+        const dependencyMap: Record<string, string[]> = {};
+
+        fields.forEach((field) => {
+          if (field.dependencies && field.dependencies.length > 0) {
+            field.dependencies.forEach((dep) => {
+              if (!dependencyMap[dep]) {
+                dependencyMap[dep] = [];
+              }
+              dependencyMap[dep].push(field.name);
+            });
+          }
+        });
+
+        const subscriptions = Object.keys(dependencyMap).map((depField) => {
+          return form.watch((values: any, { name }) => {
+            if (name === depField && dependencyMap[depField]) {
+              dependencyMap[depField].forEach((dependentField) => {
+                form.trigger(dependentField as any);
+              });
+            }
+          });
+        });
+
+        return () => {
+          subscriptions.forEach((sub) => {
+            if (sub.unsubscribe) {
+              sub.unsubscribe();
+            }
+          });
+        };
+      }, [fields, form]);
+
       // Expose form methods via ref
       useImperativeHandle(ref, () => ({
         submit: () => form.handleSubmit(onSubmit)(),
diff --git a/web/src/components/ui/segmented.tsx b/web/src/components/ui/segmented.tsx
index 8aadc3b21..3f9b0cc53 100644
--- a/web/src/components/ui/segmented.tsx
+++ b/web/src/components/ui/segmented.tsx
@@ -51,6 +51,7 @@ export interface SegmentedProps
   direction?: 'ltr' | 'rtl';
   motionName?: string;
   activeClassName?: string;
+  itemClassName?: string;
   rounded?: keyof typeof segmentedVariants.round;
   sizeType?: keyof typeof segmentedVariants.size;
   buttonSize?: keyof typeof segmentedVariants.buttonSize;
@@ -62,6 +63,7 @@ export function Segmented({
   onChange,
   className,
   activeClassName,
+  itemClassName,
   rounded = 'default',
   sizeType = 'default',
   buttonSize = 'default',
@@ -92,12 +94,13 @@ export function Segmented({
           <div
             key={actualValue}
             className={cn(
-              'inline-flex items-center  text-base font-normal cursor-pointer',
+              'inline-flex items-center text-base font-normal cursor-pointer',
               segmentedVariants.round[rounded],
               segmentedVariants.buttonSize[buttonSize],
               {
                 'text-text-primary bg-bg-base': selectedValue === actualValue,
               },
+              itemClassName,
               activeClassName && selectedValue === actualValue
                 ? activeClassName
                 : '',
diff --git a/web/src/locales/en.ts b/web/src/locales/en.ts
index 915508692..9a0569ab5 100644
--- a/web/src/locales/en.ts
+++ b/web/src/locales/en.ts
@@ -1009,6 +1009,7 @@ Example: general/v2/`,
       pleaseUploadAtLeastOneFile: 'Please upload at least one file',
     },
     flow: {
+      formatTypeError: 'Format or type error',
       variableNameMessage:
         'Variable name can only contain letters and underscores',
       variableDescription: 'Variable Description',
diff --git a/web/src/locales/zh.ts b/web/src/locales/zh.ts
index a5f4a9d52..c065986f2 100644
--- a/web/src/locales/zh.ts
+++ b/web/src/locales/zh.ts
@@ -956,6 +956,7 @@ General：实体和关系提取提示来自 GitHub - microsoft/graphrag：基于
       pleaseUploadAtLeastOneFile: '请上传至少一个文件',
     },
     flow: {
+      formatTypeError: '格式或类型错误',
       variableNameMessage: '名称只能包含字母和下划线',
       variableDescription: '变量的描述',
       defaultValue: '默认值',
diff --git a/web/src/pages/agent/gobal-variable-sheet/component/add-variable-modal.tsx b/web/src/pages/agent/gobal-variable-sheet/component/add-variable-modal.tsx
new file mode 100644
index 000000000..8ba82f526
--- /dev/null
+++ b/web/src/pages/agent/gobal-variable-sheet/component/add-variable-modal.tsx
@@ -0,0 +1,134 @@
+import {
+  DynamicForm,
+  DynamicFormRef,
+  FormFieldConfig,
+} from '@/components/dynamic-form';
+import { Modal } from '@/components/ui/modal/modal';
+import { t } from 'i18next';
+import { useEffect, useRef } from 'react';
+import { FieldValues } from 'react-hook-form';
+import { TypeMaps, TypesWithArray } from '../constant';
+import { useHandleForm } from '../hooks/use-form';
+import { useObjectFields } from '../hooks/use-object-fields';
+
+export const AddVariableModal = (props: {
+  fields?: FormFieldConfig[];
+  setFields: (value: any) => void;
+  visible?: boolean;
+  hideModal: () => void;
+  defaultValues?: FieldValues;
+  setDefaultValues?: (value: FieldValues) => void;
+}) => {
+  const {
+    fields,
+    setFields,
+    visible,
+    hideModal,
+    defaultValues,
+    setDefaultValues,
+  } = props;
+
+  const { handleSubmit: submitForm, loading } = useHandleForm();
+
+  const { handleCustomValidate, handleCustomSchema, handleRender } =
+    useObjectFields();
+
+  const formRef = useRef<DynamicFormRef>(null);
+
+  const handleFieldUpdate = (
+    fieldName: string,
+    updatedField: Partial<FormFieldConfig>,
+  ) => {
+    setFields((prevFields: any) =>
+      prevFields.map((field: any) =>
+        field.name === fieldName ? { ...field, ...updatedField } : field,
+      ),
+    );
+  };
+
+  useEffect(() => {
+    const typeField = fields?.find((item) => item.name === 'type');
+
+    if (typeField) {
+      typeField.onChange = (value) => {
+        handleFieldUpdate('value', {
+          type: TypeMaps[value as keyof typeof TypeMaps],
+          render: handleRender(value),
+          customValidate: handleCustomValidate(value),
+          schema: handleCustomSchema(value),
+        });
+        const values = formRef.current?.getValues();
+        // setTimeout(() => {
+        switch (value) {
+          case TypesWithArray.Boolean:
+            setDefaultValues?.({ ...values, value: false });
+            break;
+          case TypesWithArray.Number:
+            setDefaultValues?.({ ...values, value: 0 });
+            break;
+          case TypesWithArray.Object:
+            setDefaultValues?.({ ...values, value: {} });
+            break;
+          case TypesWithArray.ArrayString:
+            setDefaultValues?.({ ...values, value: [''] });
+            break;
+          case TypesWithArray.ArrayNumber:
+            setDefaultValues?.({ ...values, value: [''] });
+            break;
+          case TypesWithArray.ArrayBoolean:
+            setDefaultValues?.({ ...values, value: [false] });
+            break;
+          case TypesWithArray.ArrayObject:
+            setDefaultValues?.({ ...values, value: [] });
+            break;
+          default:
+            setDefaultValues?.({ ...values, value: '' });
+            break;
+        }
+        // }, 0);
+      };
+    }
+  }, [fields]);
+
+  const handleSubmit = async (fieldValue: FieldValues) => {
+    await submitForm(fieldValue);
+    hideModal();
+  };
+
+  return (
+    <Modal
+      title={t('flow.add') + t('flow.conversationVariable')}
+      open={visible || false}
+      onCancel={hideModal}
+      showfooter={false}
+    >
+      <DynamicForm.Root
+        ref={formRef}
+        fields={fields || []}
+        onSubmit={(data) => {
+          console.log(data);
+        }}
+        defaultValues={defaultValues}
+        onFieldUpdate={handleFieldUpdate}
+      >
+        <div className="flex items-center justify-end w-full gap-2">
+          <DynamicForm.CancelButton
+            handleCancel={() => {
+              hideModal?.();
+            }}
+          />
+          <DynamicForm.SavingButton
+            submitLoading={loading || false}
+            buttonText={t('common.ok')}
+            submitFunc={(values: FieldValues) => {
+              handleSubmit(values);
+              // console.log(values);
+              // console.log(nodes, edges);
+              //   handleOk(values);
+            }}
+          />
+        </div>
+      </DynamicForm.Root>
+    </Modal>
+  );
+};
diff --git a/web/src/pages/agent/gobal-variable-sheet/contant.ts b/web/src/pages/agent/gobal-variable-sheet/constant.ts
similarity index 72%
rename from web/src/pages/agent/gobal-variable-sheet/contant.ts
rename to web/src/pages/agent/gobal-variable-sheet/constant.ts
index 2f3bd395f..fc668e330 100644
--- a/web/src/pages/agent/gobal-variable-sheet/contant.ts
+++ b/web/src/pages/agent/gobal-variable-sheet/constant.ts
@@ -13,14 +13,14 @@ export enum TypesWithArray {
   String = 'string',
   Number = 'number',
   Boolean = 'boolean',
-  // Object = 'object',
-  // ArrayString = 'array<string>',
-  // ArrayNumber = 'array<number>',
-  // ArrayBoolean = 'array<boolean>',
-  // ArrayObject = 'array<object>',
+  Object = 'object',
+  ArrayString = 'array<string>',
+  ArrayNumber = 'array<number>',
+  ArrayBoolean = 'array<boolean>',
+  ArrayObject = 'array<object>',
 }
 
-export const GobalFormFields = [
+export const GlobalFormFields = [
   {
     label: t('flow.name'),
     name: 'name',
@@ -50,11 +50,11 @@ export const GobalFormFields = [
     label: t('flow.description'),
     name: 'description',
     placeholder: t('flow.variableDescription'),
-    type: 'textarea',
+    type: FormFieldType.Textarea,
   },
 ] as FormFieldConfig[];
 
-export const GobalVariableFormDefaultValues = {
+export const GlobalVariableFormDefaultValues = {
   name: '',
   type: TypesWithArray.String,
   value: '',
@@ -65,9 +65,9 @@ export const TypeMaps = {
   [TypesWithArray.String]: FormFieldType.Textarea,
   [TypesWithArray.Number]: FormFieldType.Number,
   [TypesWithArray.Boolean]: FormFieldType.Checkbox,
-  // [TypesWithArray.Object]: FormFieldType.Textarea,
-  // [TypesWithArray.ArrayString]: FormFieldType.Textarea,
-  // [TypesWithArray.ArrayNumber]: FormFieldType.Textarea,
-  // [TypesWithArray.ArrayBoolean]: FormFieldType.Textarea,
-  // [TypesWithArray.ArrayObject]: FormFieldType.Textarea,
+  [TypesWithArray.Object]: FormFieldType.Textarea,
+  [TypesWithArray.ArrayString]: FormFieldType.Textarea,
+  [TypesWithArray.ArrayNumber]: FormFieldType.Textarea,
+  [TypesWithArray.ArrayBoolean]: FormFieldType.Textarea,
+  [TypesWithArray.ArrayObject]: FormFieldType.Textarea,
 };
diff --git a/web/src/pages/agent/gobal-variable-sheet/hooks/use-form.tsx b/web/src/pages/agent/gobal-variable-sheet/hooks/use-form.tsx
new file mode 100644
index 000000000..cb38012f3
--- /dev/null
+++ b/web/src/pages/agent/gobal-variable-sheet/hooks/use-form.tsx
@@ -0,0 +1,41 @@
+import { useFetchAgent } from '@/hooks/use-agent-request';
+import { GlobalVariableType } from '@/interfaces/database/agent';
+import { useCallback } from 'react';
+import { FieldValues } from 'react-hook-form';
+import { useSaveGraph } from '../../hooks/use-save-graph';
+import { TypesWithArray } from '../constant';
+
+export const useHandleForm = () => {
+  const { data, refetch } = useFetchAgent();
+  const { saveGraph, loading } = useSaveGraph();
+  const handleObjectData = (value: any) => {
+    try {
+      return JSON.parse(value);
+    } catch (error) {
+      return value;
+    }
+  };
+  const handleSubmit = useCallback(async (fieldValue: FieldValues) => {
+    const param = {
+      ...(data.dsl?.variables || {}),
+      [fieldValue.name]: {
+        ...fieldValue,
+        value:
+          fieldValue.type === TypesWithArray.Object ||
+          fieldValue.type === TypesWithArray.ArrayObject
+            ? handleObjectData(fieldValue.value)
+            : fieldValue.value,
+      },
+    } as Record<string, GlobalVariableType>;
+
+    const res = await saveGraph(undefined, {
+      globalVariables: param,
+    });
+
+    if (res.code === 0) {
+      refetch();
+    }
+  }, []);
+
+  return { handleSubmit, loading };
+};
diff --git a/web/src/pages/agent/gobal-variable-sheet/hooks/use-object-fields.tsx b/web/src/pages/agent/gobal-variable-sheet/hooks/use-object-fields.tsx
new file mode 100644
index 000000000..d8600d568
--- /dev/null
+++ b/web/src/pages/agent/gobal-variable-sheet/hooks/use-object-fields.tsx
@@ -0,0 +1,246 @@
+import { BlockButton, Button } from '@/components/ui/button';
+import { Input } from '@/components/ui/input';
+import { Segmented } from '@/components/ui/segmented';
+import { Editor } from '@monaco-editor/react';
+import { t } from 'i18next';
+import { Trash2, X } from 'lucide-react';
+import { useCallback } from 'react';
+import { FieldValues } from 'react-hook-form';
+import { z } from 'zod';
+import { TypesWithArray } from '../constant';
+
+export const useObjectFields = () => {
+  const booleanRender = useCallback(
+    (field: FieldValues, className?: string) => {
+      const fieldValue = field.value ? true : false;
+      return (
+        <Segmented
+          options={
+            [
+              { value: true, label: 'True' },
+              { value: false, label: 'False' },
+            ] as any
+          }
+          sizeType="sm"
+          value={fieldValue}
+          onChange={field.onChange}
+          className={className}
+          itemClassName="justify-center flex-1"
+        ></Segmented>
+      );
+    },
+    [],
+  );
+
+  const objectRender = useCallback((field: FieldValues) => {
+    const fieldValue =
+      typeof field.value === 'object'
+        ? JSON.stringify(field.value, null, 2)
+        : JSON.stringify({}, null, 2);
+    console.log('object-render-field', field, fieldValue);
+    return (
+      <Editor
+        height={200}
+        defaultLanguage="json"
+        theme="vs-dark"
+        value={fieldValue}
+        onChange={field.onChange}
+      />
+    );
+  }, []);
+
+  const objectValidate = useCallback((value: any) => {
+    try {
+      if (!JSON.parse(value)) {
+        throw new Error(t('knowledgeDetails.formatTypeError'));
+      }
+      return true;
+    } catch (e) {
+      throw new Error(t('knowledgeDetails.formatTypeError'));
+    }
+  }, []);
+
+  const arrayStringRender = useCallback((field: FieldValues, type = 'text') => {
+    const values = Array.isArray(field.value)
+      ? field.value
+      : [type === 'number' ? 0 : ''];
+    return (
+      <>
+        {values?.map((item: any, index: number) => (
+          <div key={index} className="flex gap-1 items-center">
+            <Input
+              type={type}
+              value={item}
+              onChange={(e) => {
+                const newValues = [...values];
+                newValues[index] = e.target.value;
+                field.onChange(newValues);
+              }}
+            />
+            <Button
+              variant={'secondary'}
+              onClick={() => {
+                const newValues = [...values];
+                newValues.splice(index, 1);
+                field.onChange(newValues);
+              }}
+            >
+              <Trash2 />
+            </Button>
+          </div>
+        ))}
+        <BlockButton
+          type="button"
+          onClick={() => {
+            field.onChange([...field.value, '']);
+          }}
+        >
+          {t('flow.add')}
+        </BlockButton>
+      </>
+    );
+  }, []);
+
+  const arrayBooleanRender = useCallback(
+    (field: FieldValues) => {
+      // const values = field.value || [false];
+      const values = Array.isArray(field.value) ? field.value : [false];
+      return (
+        <div className="flex items-center gap-1 flex-wrap ">
+          {values?.map((item: any, index: number) => (
+            <div
+              key={index}
+              className="flex gap-1 items-center bg-bg-card rounded-lg border-[0.5px] border-border-button"
+            >
+              {booleanRender(
+                {
+                  value: item,
+                  onChange: (value) => {
+                    values[index] = !!value;
+                    field.onChange(values);
+                  },
+                },
+                'bg-transparent',
+              )}
+              <Button
+                variant={'transparent'}
+                className="border-none py-0 px-1"
+                onClick={() => {
+                  const newValues = [...values];
+                  newValues.splice(index, 1);
+                  field.onChange(newValues);
+                }}
+              >
+                <X />
+              </Button>
+            </div>
+          ))}
+          <BlockButton
+            className="w-auto"
+            type="button"
+            onClick={() => {
+              field.onChange([...field.value, false]);
+            }}
+          >
+            {t('flow.add')}
+          </BlockButton>
+        </div>
+      );
+    },
+    [booleanRender],
+  );
+
+  const arrayNumberRender = useCallback(
+    (field: FieldValues) => {
+      return arrayStringRender(field, 'number');
+    },
+    [arrayStringRender],
+  );
+
+  const arrayValidate = useCallback((value: any, type: string = 'string') => {
+    if (!Array.isArray(value) || !value.every((item) => typeof item === type)) {
+      throw new Error(t('flow.formatTypeError'));
+    }
+    return true;
+  }, []);
+
+  const arrayStringValidate = useCallback(
+    (value: any) => {
+      return arrayValidate(value, 'string');
+    },
+    [arrayValidate],
+  );
+
+  const arrayNumberValidate = useCallback(
+    (value: any) => {
+      return arrayValidate(value, 'number');
+    },
+    [arrayValidate],
+  );
+
+  const arrayBooleanValidate = useCallback(
+    (value: any) => {
+      return arrayValidate(value, 'boolean');
+    },
+    [arrayValidate],
+  );
+
+  const handleRender = (value: TypesWithArray) => {
+    switch (value) {
+      case TypesWithArray.Boolean:
+        return booleanRender;
+      case TypesWithArray.Object:
+      case TypesWithArray.ArrayObject:
+        return objectRender;
+      case TypesWithArray.ArrayString:
+        return arrayStringRender;
+      case TypesWithArray.ArrayNumber:
+        return arrayNumberRender;
+      case TypesWithArray.ArrayBoolean:
+        return arrayBooleanRender;
+      default:
+        return undefined;
+    }
+  };
+  const handleCustomValidate = (value: TypesWithArray) => {
+    switch (value) {
+      case TypesWithArray.Object:
+      case TypesWithArray.ArrayObject:
+        return objectValidate;
+      case TypesWithArray.ArrayString:
+        return arrayStringValidate;
+      case TypesWithArray.ArrayNumber:
+        return arrayNumberValidate;
+      case TypesWithArray.ArrayBoolean:
+        return arrayBooleanValidate;
+      default:
+        return undefined;
+    }
+  };
+  const handleCustomSchema = (value: TypesWithArray) => {
+    switch (value) {
+      case TypesWithArray.ArrayString:
+        return z.array(z.string());
+      case TypesWithArray.ArrayNumber:
+        return z.array(z.number());
+      case TypesWithArray.ArrayBoolean:
+        return z.array(z.boolean());
+      default:
+        return undefined;
+    }
+  };
+  return {
+    objectRender,
+    objectValidate,
+    arrayStringRender,
+    arrayStringValidate,
+    arrayNumberRender,
+    booleanRender,
+    arrayBooleanRender,
+    arrayNumberValidate,
+    arrayBooleanValidate,
+    handleRender,
+    handleCustomValidate,
+    handleCustomSchema,
+  };
+};
diff --git a/web/src/pages/agent/gobal-variable-sheet/index.tsx b/web/src/pages/agent/gobal-variable-sheet/index.tsx
index 454131638..51648b8d1 100644
--- a/web/src/pages/agent/gobal-variable-sheet/index.tsx
+++ b/web/src/pages/agent/gobal-variable-sheet/index.tsx
@@ -1,12 +1,6 @@
 import { ConfirmDeleteDialog } from '@/components/confirm-delete-dialog';
-import {
-  DynamicForm,
-  DynamicFormRef,
-  FormFieldConfig,
-  FormFieldType,
-} from '@/components/dynamic-form';
+import { FormFieldConfig } from '@/components/dynamic-form';
 import { BlockButton, Button } from '@/components/ui/button';
-import { Modal } from '@/components/ui/modal/modal';
 import {
   Sheet,
   SheetContent,
@@ -19,117 +13,65 @@ import { GlobalVariableType } from '@/interfaces/database/agent';
 import { cn } from '@/lib/utils';
 import { t } from 'i18next';
 import { Trash2 } from 'lucide-react';
-import { useEffect, useRef, useState } from 'react';
+import { useState } from 'react';
 import { FieldValues } from 'react-hook-form';
 import { useSaveGraph } from '../hooks/use-save-graph';
+import { AddVariableModal } from './component/add-variable-modal';
 import {
-  GobalFormFields,
-  GobalVariableFormDefaultValues,
+  GlobalFormFields,
+  GlobalVariableFormDefaultValues,
   TypeMaps,
   TypesWithArray,
-} from './contant';
+} from './constant';
+import { useObjectFields } from './hooks/use-object-fields';
 
-export type IGobalParamModalProps = {
+export type IGlobalParamModalProps = {
   data: any;
   hideModal: (open: boolean) => void;
 };
-export const GobalParamSheet = (props: IGobalParamModalProps) => {
+export const GlobalParamSheet = (props: IGlobalParamModalProps) => {
   const { hideModal } = props;
   const { data, refetch } = useFetchAgent();
-  const [fields, setFields] = useState<FormFieldConfig[]>(GobalFormFields);
   const { visible, showModal, hideModal: hideAddModal } = useSetModalState();
+  const [fields, setFields] = useState<FormFieldConfig[]>(GlobalFormFields);
   const [defaultValues, setDefaultValues] = useState<FieldValues>(
-    GobalVariableFormDefaultValues,
+    GlobalVariableFormDefaultValues,
   );
-  const formRef = useRef<DynamicFormRef>(null);
+  const { handleCustomValidate, handleCustomSchema, handleRender } =
+    useObjectFields();
+  const { saveGraph } = useSaveGraph();
 
-  const handleFieldUpdate = (
-    fieldName: string,
-    updatedField: Partial<FormFieldConfig>,
-  ) => {
-    setFields((prevFields) =>
-      prevFields.map((field) =>
-        field.name === fieldName ? { ...field, ...updatedField } : field,
-      ),
-    );
-  };
-
-  useEffect(() => {
-    const typefileld = fields.find((item) => item.name === 'type');
-
-    if (typefileld) {
-      typefileld.onChange = (value) => {
-        // setWatchType(value);
-        handleFieldUpdate('value', {
-          type: TypeMaps[value as keyof typeof TypeMaps],
-        });
-        const values = formRef.current?.getValues();
-        setTimeout(() => {
-          switch (value) {
-            case TypesWithArray.Boolean:
-              setDefaultValues({ ...values, value: false });
-              break;
-            case TypesWithArray.Number:
-              setDefaultValues({ ...values, value: 0 });
-              break;
-            default:
-              setDefaultValues({ ...values, value: '' });
-          }
-        }, 0);
-      };
-    }
-  }, [fields]);
-
-  const { saveGraph, loading } = useSaveGraph();
-
-  const handleSubmit = async (value: FieldValues) => {
-    const param = {
-      ...(data.dsl?.variables || {}),
-      [value.name]: value,
-    } as Record<string, GlobalVariableType>;
-
-    const res = await saveGraph(undefined, {
-      gobalVariables: param,
-    });
-
-    if (res.code === 0) {
-      refetch();
-    }
-    hideAddModal();
-  };
-
-  const handleDeleteGobalVariable = async (key: string) => {
+  const handleDeleteGlobalVariable = async (key: string) => {
     const param = {
       ...(data.dsl?.variables || {}),
     } as Record<string, GlobalVariableType>;
     delete param[key];
     const res = await saveGraph(undefined, {
-      gobalVariables: param,
+      globalVariables: param,
     });
-    console.log('delete gobal variable-->', res);
     if (res.code === 0) {
       refetch();
     }
   };
 
-  const handleEditGobalVariable = (item: FieldValues) => {
-    fields.forEach((field) => {
-      if (field.name === 'value') {
-        switch (item.type) {
-          // [TypesWithArray.String]: FormFieldType.Textarea,
-          // [TypesWithArray.Number]: FormFieldType.Number,
-          // [TypesWithArray.Boolean]: FormFieldType.Checkbox,
-          case TypesWithArray.Boolean:
-            field.type = FormFieldType.Checkbox;
-            break;
-          case TypesWithArray.Number:
-            field.type = FormFieldType.Number;
-            break;
-          default:
-            field.type = FormFieldType.Textarea;
-        }
+  const handleEditGlobalVariable = (item: FieldValues) => {
+    const newFields = fields.map((field) => {
+      let newField = field;
+      newField.render = undefined;
+      newField.schema = undefined;
+      newField.customValidate = undefined;
+      if (newField.name === 'value') {
+        newField = {
+          ...newField,
+          type: TypeMaps[item.type as keyof typeof TypeMaps],
+          render: handleRender(item.type),
+          customValidate: handleCustomValidate(item.type),
+          schema: handleCustomSchema(item.type),
+        };
       }
+      return newField;
     });
+    setFields(newFields);
     setDefaultValues(item);
     showModal();
   };
@@ -149,8 +91,8 @@ export const GobalParamSheet = (props: IGobalParamModalProps) => {
           <div className="px-5 pb-5">
             <BlockButton
               onClick={() => {
-                setFields(GobalFormFields);
-                setDefaultValues(GobalVariableFormDefaultValues);
+                setFields(GlobalFormFields);
+                setDefaultValues(GlobalVariableFormDefaultValues);
                 showModal();
               }}
             >
@@ -167,7 +109,7 @@ export const GobalParamSheet = (props: IGobalParamModalProps) => {
                     key={key}
                     className="flex items-center gap-3 min-h-14 justify-between px-5 py-3 border border-border-default rounded-lg  hover:bg-bg-card group"
                     onClick={() => {
-                      handleEditGobalVariable(item);
+                      handleEditGlobalVariable(item);
                     }}
                   >
                     <div className="flex flex-col">
@@ -177,13 +119,23 @@ export const GobalParamSheet = (props: IGobalParamModalProps) => {
                           {item.type}
                         </span>
                       </div>
-                      <div>
-                        <span className="text-text-primary">{item.value}</span>
-                      </div>
+                      {![
+                        TypesWithArray.Object,
+                        TypesWithArray.ArrayObject,
+                        TypesWithArray.ArrayString,
+                        TypesWithArray.ArrayNumber,
+                        TypesWithArray.ArrayBoolean,
+                      ].includes(item.type as TypesWithArray) && (
+                        <div>
+                          <span className="text-text-primary">
+                            {item.value}
+                          </span>
+                        </div>
+                      )}
                     </div>
                     <div>
                       <ConfirmDeleteDialog
-                        onOk={() => handleDeleteGobalVariable(key)}
+                        onOk={() => handleDeleteGlobalVariable(key)}
                       >
                         <Button
                           variant={'secondary'}
@@ -201,40 +153,14 @@ export const GobalParamSheet = (props: IGobalParamModalProps) => {
               })}
           </div>
         </SheetContent>
-        <Modal
-          title={t('flow.add') + t('flow.conversationVariable')}
-          open={visible}
-          onCancel={hideAddModal}
-          showfooter={false}
-        >
-          <DynamicForm.Root
-            ref={formRef}
-            fields={fields}
-            onSubmit={(data) => {
-              console.log(data);
-            }}
-            defaultValues={defaultValues}
-            onFieldUpdate={handleFieldUpdate}
-          >
-            <div className="flex items-center justify-end w-full gap-2">
-              <DynamicForm.CancelButton
-                handleCancel={() => {
-                  hideAddModal?.();
-                }}
-              />
-              <DynamicForm.SavingButton
-                submitLoading={loading || false}
-                buttonText={t('common.ok')}
-                submitFunc={(values: FieldValues) => {
-                  handleSubmit(values);
-                  // console.log(values);
-                  // console.log(nodes, edges);
-                  //   handleOk(values);
-                }}
-              />
-            </div>
-          </DynamicForm.Root>
-        </Modal>
+        <AddVariableModal
+          visible={visible}
+          hideModal={hideAddModal}
+          fields={fields}
+          setFields={setFields}
+          defaultValues={defaultValues}
+          setDefaultValues={setDefaultValues}
+        />
       </Sheet>
     </>
   );
diff --git a/web/src/pages/agent/hooks/use-build-dsl.ts b/web/src/pages/agent/hooks/use-build-dsl.ts
index 1a8569636..47ec1c225 100644
--- a/web/src/pages/agent/hooks/use-build-dsl.ts
+++ b/web/src/pages/agent/hooks/use-build-dsl.ts
@@ -4,7 +4,7 @@ import { RAGFlowNodeType } from '@/interfaces/database/flow';
 import { useCallback } from 'react';
 import { Operator } from '../constant';
 import useGraphStore from '../store';
-import { buildDslComponentsByGraph, buildDslGobalVariables } from '../utils';
+import { buildDslComponentsByGraph, buildDslGlobalVariables } from '../utils';
 
 export const useBuildDslData = () => {
   const { data } = useFetchAgent();
@@ -13,7 +13,7 @@ export const useBuildDslData = () => {
   const buildDslData = useCallback(
     (
       currentNodes?: RAGFlowNodeType[],
-      otherParam?: { gobalVariables: Record<string, GlobalVariableType> },
+      otherParam?: { globalVariables: Record<string, GlobalVariableType> },
     ) => {
       const nodesToProcess = currentNodes ?? nodes;
 
@@ -41,13 +41,13 @@ export const useBuildDslData = () => {
         data.dsl.components,
       );
 
-      const gobalVariables = buildDslGobalVariables(
+      const globalVariables = buildDslGlobalVariables(
         data.dsl,
-        otherParam?.gobalVariables,
+        otherParam?.globalVariables,
       );
       return {
         ...data.dsl,
-        ...gobalVariables,
+        ...globalVariables,
         graph: { nodes: filteredNodes, edges: filteredEdges },
         components: dslComponents,
       };
diff --git a/web/src/pages/agent/hooks/use-save-graph.ts b/web/src/pages/agent/hooks/use-save-graph.ts
index e59b99193..500baf716 100644
--- a/web/src/pages/agent/hooks/use-save-graph.ts
+++ b/web/src/pages/agent/hooks/use-save-graph.ts
@@ -21,7 +21,7 @@ export const useSaveGraph = (showMessage: boolean = true) => {
   const saveGraph = useCallback(
     async (
       currentNodes?: RAGFlowNodeType[],
-      otherParam?: { gobalVariables: Record<string, GlobalVariableType> },
+      otherParam?: { globalVariables: Record<string, GlobalVariableType> },
     ) => {
       return setAgent({
         id,
diff --git a/web/src/pages/agent/index.tsx b/web/src/pages/agent/index.tsx
index 21ecb22e7..b0d2f6f15 100644
--- a/web/src/pages/agent/index.tsx
+++ b/web/src/pages/agent/index.tsx
@@ -39,7 +39,7 @@ import { useParams } from 'umi';
 import AgentCanvas from './canvas';
 import { DropdownProvider } from './canvas/context';
 import { Operator } from './constant';
-import { GobalParamSheet } from './gobal-variable-sheet';
+import { GlobalParamSheet } from './gobal-variable-sheet';
 import { useCancelCurrentDataflow } from './hooks/use-cancel-dataflow';
 import { useHandleExportJsonFile } from './hooks/use-export-json';
 import { useFetchDataOnMount } from './hooks/use-fetch-data';
@@ -126,9 +126,9 @@ export default function Agent() {
   } = useSetModalState();
 
   const {
-    visible: gobalParamSheetVisible,
-    showModal: showGobalParamSheet,
-    hideModal: hideGobalParamSheet,
+    visible: globalParamSheetVisible,
+    showModal: showGlobalParamSheet,
+    hideModal: hideGlobalParamSheet,
   } = useSetModalState();
 
   const {
@@ -216,7 +216,7 @@ export default function Agent() {
           </ButtonLoading>
           <ButtonLoading
             variant={'secondary'}
-            onClick={() => showGobalParamSheet()}
+            onClick={() => showGlobalParamSheet()}
             loading={loading}
           >
             <MessageSquareCode /> {t('flow.conversationVariable')}
@@ -314,11 +314,11 @@ export default function Agent() {
           loading={pipelineRunning}
         ></PipelineRunSheet>
       )}
-      {gobalParamSheetVisible && (
-        <GobalParamSheet
+      {globalParamSheetVisible && (
+        <GlobalParamSheet
           data={{}}
-          hideModal={hideGobalParamSheet}
-        ></GobalParamSheet>
+          hideModal={hideGlobalParamSheet}
+        ></GlobalParamSheet>
       )}
     </section>
   );
diff --git a/web/src/pages/agent/utils.ts b/web/src/pages/agent/utils.ts
index 487067ed8..3312b7236 100644
--- a/web/src/pages/agent/utils.ts
+++ b/web/src/pages/agent/utils.ts
@@ -348,30 +348,30 @@ export const buildDslComponentsByGraph = (
   return components;
 };
 
-export const buildDslGobalVariables = (
+export const buildDslGlobalVariables = (
   dsl: DSL,
-  gobalVariables?: Record<string, GlobalVariableType>,
+  globalVariables?: Record<string, GlobalVariableType>,
 ) => {
-  if (!gobalVariables) {
+  if (!globalVariables) {
     return { globals: dsl.globals, variables: dsl.variables || {} };
   }
 
-  let gobalVariablesTemp: Record<string, any> = {};
-  let gobalSystem: Record<string, any> = {};
+  let globalVariablesTemp: Record<string, any> = {};
+  let globalSystem: Record<string, any> = {};
   Object.keys(dsl.globals)?.forEach((key) => {
     if (key.indexOf('sys') > -1) {
-      gobalSystem[key] = dsl.globals[key];
+      globalSystem[key] = dsl.globals[key];
     }
   });
-  Object.keys(gobalVariables).forEach((key) => {
-    gobalVariablesTemp['env.' + key] = gobalVariables[key].value;
+  Object.keys(globalVariables).forEach((key) => {
+    globalVariablesTemp['env.' + key] = globalVariables[key].value;
   });
 
-  const gobalVariablesResult = {
-    ...gobalSystem,
-    ...gobalVariablesTemp,
+  const globalVariablesResult = {
+    ...globalSystem,
+    ...globalVariablesTemp,
   };
-  return { globals: gobalVariablesResult, variables: gobalVariables };
+  return { globals: globalVariablesResult, variables: globalVariables };
 };
 
 export const receiveMessageError = (res: any) =>
diff --git a/web/src/pages/dataset/dataset-setting/configuration/common-item.tsx b/web/src/pages/dataset/dataset-setting/configuration/common-item.tsx
index c63309c50..c6d18af13 100644
--- a/web/src/pages/dataset/dataset-setting/configuration/common-item.tsx
+++ b/web/src/pages/dataset/dataset-setting/configuration/common-item.tsx
@@ -7,11 +7,14 @@ import {
   FormMessage,
 } from '@/components/ui/form';
 import { Radio } from '@/components/ui/radio';
+import { Spin } from '@/components/ui/spin';
 import { Switch } from '@/components/ui/switch';
 import { useTranslate } from '@/hooks/common-hooks';
 import { cn } from '@/lib/utils';
+import { useMemo, useState } from 'react';
 import { useFormContext } from 'react-hook-form';
 import {
+  useHandleKbEmbedding,
   useHasParsedDocument,
   useSelectChunkMethodList,
   useSelectEmbeddingModelOptions,
@@ -62,11 +65,17 @@ export function ChunkMethodItem(props: IProps) {
     />
   );
 }
-export function EmbeddingModelItem({ line = 1, isEdit = true }: IProps) {
+export function EmbeddingModelItem({ line = 1, isEdit }: IProps) {
   const { t } = useTranslate('knowledgeConfiguration');
   const form = useFormContext();
   const embeddingModelOptions = useSelectEmbeddingModelOptions();
+  const { handleChange } = useHandleKbEmbedding();
   const disabled = useHasParsedDocument(isEdit);
+  const oldValue = useMemo(() => {
+    const embdStr = form.getValues('embd_id');
+    return embdStr || '';
+  }, [form]);
+  const [loading, setLoading] = useState(false);
   return (
     <>
       <FormField
@@ -93,14 +102,33 @@ export function EmbeddingModelItem({ line = 1, isEdit = true }: IProps) {
                 className={cn('text-muted-foreground', { 'w-3/4': line === 1 })}
               >
                 <FormControl>
-                  <SelectWithSearch
-                    onChange={field.onChange}
-                    value={field.value}
-                    options={embeddingModelOptions}
-                    disabled={isEdit ? disabled : false}
-                    placeholder={t('embeddingModelPlaceholder')}
-                    triggerClassName="!bg-bg-base"
-                  />
+                  <Spin
+                    spinning={loading}
+                    className={cn(' rounded-lg after:bg-bg-base', {
+                      'opacity-20': loading,
+                    })}
+                  >
+                    <SelectWithSearch
+                      onChange={async (value) => {
+                        field.onChange(value);
+                        if (isEdit && disabled) {
+                          setLoading(true);
+                          const res = await handleChange({
+                            embed_id: value,
+                            callback: field.onChange,
+                          });
+                          if (res.code !== 0) {
+                            field.onChange(oldValue);
+                          }
+                          setLoading(false);
+                        }
+                      }}
+                      value={field.value}
+                      options={embeddingModelOptions}
+                      placeholder={t('embeddingModelPlaceholder')}
+                      triggerClassName="!bg-bg-base"
+                    />
+                  </Spin>
                 </FormControl>
               </div>
             </div>
diff --git a/web/src/pages/dataset/dataset-setting/general-form.tsx b/web/src/pages/dataset/dataset-setting/general-form.tsx
index b4a7b9635..110c03a3e 100644
--- a/web/src/pages/dataset/dataset-setting/general-form.tsx
+++ b/web/src/pages/dataset/dataset-setting/general-form.tsx
@@ -88,7 +88,7 @@ export function GeneralForm() {
         }}
       />
       <PermissionFormField></PermissionFormField>
-      <EmbeddingModelItem></EmbeddingModelItem>
+      <EmbeddingModelItem isEdit={true}></EmbeddingModelItem>
       <PageRankFormField></PageRankFormField>
 
       <TagItems></TagItems>
diff --git a/web/src/pages/dataset/dataset-setting/hooks.ts b/web/src/pages/dataset/dataset-setting/hooks.ts
index 605f91e4d..f9efe1d08 100644
--- a/web/src/pages/dataset/dataset-setting/hooks.ts
+++ b/web/src/pages/dataset/dataset-setting/hooks.ts
@@ -4,10 +4,12 @@ import { useSetModalState } from '@/hooks/common-hooks';
 import { useSelectLlmOptionsByModelType } from '@/hooks/llm-hooks';
 import { useFetchKnowledgeBaseConfiguration } from '@/hooks/use-knowledge-request';
 import { useSelectParserList } from '@/hooks/user-setting-hooks';
+import kbService from '@/services/knowledge-service';
 import { useIsFetching } from '@tanstack/react-query';
 import { pick } from 'lodash';
 import { useCallback, useEffect, useState } from 'react';
 import { UseFormReturn } from 'react-hook-form';
+import { useParams, useSearchParams } from 'umi';
 import { z } from 'zod';
 import { formSchema } from './form-schema';
 
@@ -98,3 +100,22 @@ export const useRenameKnowledgeTag = () => {
     showTagRenameModal: handleShowTagRenameModal,
   };
 };
+
+export const useHandleKbEmbedding = () => {
+  const { id } = useParams();
+  const [searchParams] = useSearchParams();
+  const knowledgeBaseId = searchParams.get('id') || id;
+  const handleChange = useCallback(
+    async ({ embed_id }: { embed_id: string }) => {
+      const res = await kbService.checkEmbedding({
+        kb_id: knowledgeBaseId,
+        embd_id: embed_id,
+      });
+      return res.data;
+    },
+    [knowledgeBaseId],
+  );
+  return {
+    handleChange,
+  };
+};
diff --git a/web/src/services/knowledge-service.ts b/web/src/services/knowledge-service.ts
index 350fa4e2a..01b8da127 100644
--- a/web/src/services/knowledge-service.ts
+++ b/web/src/services/knowledge-service.ts
@@ -47,6 +47,7 @@ const {
   traceGraphRag,
   runRaptor,
   traceRaptor,
+  check_embedding,
 } = api;
 
 const methods = {
@@ -214,6 +215,11 @@ const methods = {
     url: api.pipelineRerun,
     method: 'post',
   },
+
+  checkEmbedding: {
+    url: check_embedding,
+    method: 'post',
+  },
 };
 
 const kbService = registerServer<keyof typeof methods>(methods, request);
diff --git a/web/src/utils/api.ts b/web/src/utils/api.ts
index 0d97801ac..e0afdbeb3 100644
--- a/web/src/utils/api.ts
+++ b/web/src/utils/api.ts
@@ -49,6 +49,8 @@ export default {
   llm_tools: `${api_host}/plugin/llm_tools`,
 
   // knowledge base
+
+  check_embedding: `${api_host}/kb/check_embedding`,
   kb_list: `${api_host}/kb/list`,
   create_kb: `${api_host}/kb/create`,
   update_kb: `${api_host}/kb/update`,

From 5f59418ababc619aa61244dba6772dca424c507b Mon Sep 17 00:00:00 2001
From: redredrrred <1589289338@qq.com>
Date: Fri, 14 Nov 2025 13:59:03 +0800
Subject: [PATCH 02/15] Remove leftover account and password from the code
 (#11248)

Remove legacy accounts and passwords.

### What problem does this PR solve?

Remove leftover account and password in
agent/templates/sql_assistant.json

### Type of change

- [x] Other (please describe):
---
 agent/templates/sql_assistant.json | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/agent/templates/sql_assistant.json b/agent/templates/sql_assistant.json
index 92804abc6..6e7140196 100644
--- a/agent/templates/sql_assistant.json
+++ b/agent/templates/sql_assistant.json
@@ -83,10 +83,10 @@
                                     "value": []
                                 }
                             },
-                            "password": "20010812Yy!",
+                            "password": "",
                             "port": 3306,
                             "sql": "{Agent:WickedGoatsDivide@content}",
-                            "username": "13637682833@163.com"
+                            "username": ""
                         }
                     },
                     "upstream": [
@@ -527,10 +527,10 @@
                                         "value": []
                                     }
                                 },
-                                "password": "20010812Yy!",
+                                "password": "",
                                 "port": 3306,
                                 "sql": "{Agent:WickedGoatsDivide@content}",
-                                "username": "13637682833@163.com"
+                                "username": ""
                             },
                             "label": "ExeSQL",
                             "name": "ExeSQL"

From e27ff8d3d42ce726941f8494a1a428ebe76587de Mon Sep 17 00:00:00 2001
From: Billy Bao <newyorkupperbay@gmail.com>
Date: Fri, 14 Nov 2025 13:59:54 +0800
Subject: [PATCH 03/15] Fix: rerank algorithm (#11266)

### What problem does this PR solve?

Fix: rerank algorithm #11234

### Type of change

- [x] Bug Fix (non-breaking change which fixes an issue)
---
 rag/nlp/search.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/rag/nlp/search.py b/rag/nlp/search.py
index f8b3d513f..4dbd9945c 100644
--- a/rag/nlp/search.py
+++ b/rag/nlp/search.py
@@ -347,7 +347,7 @@ class Dealer:
         ## For rank feature(tag_fea) scores.
         rank_fea = self._rank_feature_scores(rank_feature, sres)
 
-        return tkweight * (np.array(tksim)+rank_fea) + vtweight * vtsim, tksim, vtsim
+        return tkweight * np.array(tksim) + vtweight * vtsim + rank_fea, tksim, vtsim
 
     def hybrid_similarity(self, ans_embd, ins_embd, ans, inst):
         return self.qryr.hybrid_similarity(ans_embd,

From b5f2cf16bcad7b1f9f9f10ff11323352680d02ff Mon Sep 17 00:00:00 2001
From: Lynn <lynn_inf@hotmail.com>
Date: Fri, 14 Nov 2025 15:52:28 +0800
Subject: [PATCH 04/15] Fix: check task executor alive and display status
 (#11270)

### What problem does this PR solve?

Correctly check task executor alive and display status.

### Type of change

- [x] Bug Fix (non-breaking change which fixes an issue)
---
 admin/client/admin_client.py | 9 ++++++---
 api/utils/health_utils.py    | 3 ++-
 rag/utils/redis_conn.py      | 2 +-
 3 files changed, 9 insertions(+), 5 deletions(-)

diff --git a/admin/client/admin_client.py b/admin/client/admin_client.py
index b52e67494..0d04cb3b2 100644
--- a/admin/client/admin_client.py
+++ b/admin/client/admin_client.py
@@ -393,7 +393,9 @@ class AdminCLI(Cmd):
                 print(f"Can't access {self.host}, port: {self.port}")
 
     def _format_service_detail_table(self, data):
-        if not any([isinstance(v, list) for v in data.values()]):
+        if isinstance(data, list):
+            return data
+        if not all([isinstance(v, list) for v in data.values()]):
             # normal table
             return data
         # handle task_executor heartbeats map, for example {'name': [{'done': 2, 'now': timestamp1}, {'done': 3, 'now': timestamp2}]
@@ -404,7 +406,7 @@ class AdminCLI(Cmd):
             task_executor_list.append({
                 "task_executor_name": k,
                 **heartbeats[0],
-            })
+            } if heartbeats else {"task_executor_name": k})
         return task_executor_list
 
     def _print_table_simple(self, data):
@@ -415,7 +417,8 @@ class AdminCLI(Cmd):
             # handle single row data
             data = [data]
 
-        columns = list(data[0].keys())
+        columns = list(set().union(*(d.keys() for d in data)))
+        columns.sort()
         col_widths = {}
 
         def get_string_width(text):
diff --git a/api/utils/health_utils.py b/api/utils/health_utils.py
index 88e5aaebb..0a7ab6e7a 100644
--- a/api/utils/health_utils.py
+++ b/api/utils/health_utils.py
@@ -173,7 +173,8 @@ def check_task_executor_alive():
             heartbeats = [json.loads(heartbeat) for heartbeat in heartbeats]
             task_executor_heartbeats[task_executor_id] = heartbeats
         if task_executor_heartbeats:
-            return {"status": "alive", "message": task_executor_heartbeats}
+            status = "alive" if any(task_executor_heartbeats.values()) else "timeout"
+            return {"status": status, "message": task_executor_heartbeats}
         else:
             return {"status": "timeout", "message": "Not found any task executor."}
     except Exception as e:
diff --git a/rag/utils/redis_conn.py b/rag/utils/redis_conn.py
index 58b0fe15b..a8bc43b57 100644
--- a/rag/utils/redis_conn.py
+++ b/rag/utils/redis_conn.py
@@ -110,7 +110,7 @@ class RedisDB:
         info = self.REDIS.info()
         return {
             'redis_version': info["redis_version"],
-            'server_mode': info["server_mode"],
+            'server_mode': info["server_mode"] if "server_mode" in info else info.get("redis_mode", ""),
             'used_memory': info["used_memory_human"],
             'total_system_memory': info["total_system_memory_human"],
             'mem_fragmentation_ratio': info["mem_fragmentation_ratio"],

From 12db62b9c736c8b9efba3ef58fa7151a0c50099b Mon Sep 17 00:00:00 2001
From: Stephen Hu <812791840@qq.com>
Date: Fri, 14 Nov 2025 16:32:35 +0800
Subject: [PATCH 05/15] Refactor: improve mineru_parser get property logic
 (#11268)

### What problem does this PR solve?

improve mineru_parser get property logic

### Type of change

- [x] Refactoring
---
 deepdoc/parser/mineru_parser.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/deepdoc/parser/mineru_parser.py b/deepdoc/parser/mineru_parser.py
index 3d4c9f149..bb663de0d 100644
--- a/deepdoc/parser/mineru_parser.py
+++ b/deepdoc/parser/mineru_parser.py
@@ -434,7 +434,7 @@ class MinerUParser(RAGFlowPdfParser):
                     if not section.strip():
                         section = "FAILED TO PARSE TABLE"
                 case MinerUContentType.IMAGE:
-                    section = "".join(output["image_caption"]) + "\n" + "".join(output["image_footnote"])
+                    section = "".join(output.get(["image_caption"],[])) + "\n" + "".join(output.get(["image_footnote"],[]))
                 case MinerUContentType.EQUATION:
                     section = output["text"]
                 case MinerUContentType.CODE:

From db4fd19c8269a64f8d213a64c37f41e5325f22cd Mon Sep 17 00:00:00 2001
From: buua436 <66937541+buua436@users.noreply.github.com>
Date: Fri, 14 Nov 2025 16:33:20 +0800
Subject: [PATCH 06/15] Feat:new component list operations (#11276)

### What problem does this PR solve?
issue:
https://github.com/infiniflow/ragflow/issues/10427
change:
new component list operations

### Type of change

- [x] New Feature (non-breaking change which adds functionality)
---
 agent/component/list_operations.py            | 149 ++++++++++++++++++
 web/src/constants/agent.tsx                   |   1 +
 web/src/locales/en.ts                         |  15 ++
 web/src/locales/zh.ts                         |  15 ++
 web/src/pages/agent/canvas/index.tsx          |   2 +
 .../node/dropdown/accordion-operators.tsx     |   1 +
 .../canvas/node/list-operations-node.tsx      |  22 +++
 web/src/pages/agent/constant/index.tsx        |  31 ++++
 .../agent/form-sheet/form-config-map.tsx      |   4 +
 .../agent/form/list-operations-form/index.tsx | 140 ++++++++++++++++
 web/src/pages/agent/hooks/use-add-node.ts     |   2 +
 web/src/pages/agent/operator-icon.tsx         |   3 +-
 web/src/pages/agent/utils.ts                  |   1 -
 13 files changed, 384 insertions(+), 2 deletions(-)
 create mode 100644 agent/component/list_operations.py
 create mode 100644 web/src/pages/agent/canvas/node/list-operations-node.tsx
 create mode 100644 web/src/pages/agent/form/list-operations-form/index.tsx

diff --git a/agent/component/list_operations.py b/agent/component/list_operations.py
new file mode 100644
index 000000000..c29d79ea6
--- /dev/null
+++ b/agent/component/list_operations.py
@@ -0,0 +1,149 @@
+from abc import ABC
+import os
+from agent.component.base import ComponentBase, ComponentParamBase
+from api.utils.api_utils import timeout
+
+class ListOperationsParam(ComponentParamBase):
+    """
+    Define the List Operations component parameters.
+    """
+    def __init__(self):
+        super().__init__()
+        self.query = ""
+        self.operations = "topN"
+        self.n=0
+        self.sort_method = "asc"
+        self.filter = {
+            "operator": "=",
+            "value": ""
+        }
+        self.outputs = {
+            "result": {
+                "value": [],
+                "type": "Array of ?"
+            },
+            "first": {
+                "value": "",
+                "type": "?"
+            },
+            "last": {
+                "value": "",
+                "type": "?"
+            }
+        }
+    
+    def check(self):
+        self.check_empty(self.query, "query")
+        self.check_valid_value(self.operations, "Support operations", ["topN","head","tail","filter","sort","drop_duplicates"])
+
+    def get_input_form(self) -> dict[str, dict]:
+        return {}
+    
+
+class ListOperations(ComponentBase,ABC):
+    component_name = "ListOperations"
+
+    @timeout(int(os.environ.get("COMPONENT_EXEC_TIMEOUT", 10*60)))
+    def _invoke(self, **kwargs):
+        self.input_objects=[]
+        inputs = getattr(self._param, "query", None)
+        self.inputs=self._canvas.get_variable_value(inputs)
+        self.set_input_value(inputs, self.inputs)
+        if self._param.operations == "topN":
+            self._topN()
+        elif self._param.operations == "head":
+            self._head()
+        elif self._param.operations == "tail":
+            self._tail()
+        elif self._param.operations == "filter":
+            self._filter()
+        elif self._param.operations == "sort":
+            self._sort()
+        elif self._param.operations == "drop_duplicates":
+            self._drop_duplicates()
+
+
+    def _coerce_n(self):
+        try:
+            return int(getattr(self._param, "n", 0))
+        except Exception:
+            return 0
+        
+    def _set_outputs(self, outputs):
+        self._param.outputs["result"]["value"] = outputs
+        self._param.outputs["first"]["value"] = outputs[0] if outputs else None
+        self._param.outputs["last"]["value"]  = outputs[-1] if outputs else None
+        
+    def _topN(self):
+        n = self._coerce_n()
+        if n < 1:
+            outputs = []
+        else:
+            n = min(n, len(self.inputs))
+            outputs = self.inputs[:n]
+        self._set_outputs(outputs)
+
+    def _head(self):
+        n = self._coerce_n()
+        if 1 <= n <= len(self.inputs):
+            outputs = [self.inputs[n - 1]]
+        else:
+            outputs = []
+        self._set_outputs(outputs)
+
+    def _tail(self):
+        n = self._coerce_n()
+        if 1 <= n <= len(self.inputs):
+            outputs = [self.inputs[-n]]
+        else:
+            outputs = []
+        self._set_outputs(outputs)
+
+    def _filter(self):
+        self._set_outputs([i for i in self.inputs if self._eval(self._norm(i),self._param.filter["operator"],self._param.filter["value"])])
+
+    def _norm(self,v):
+        s = "" if v is None else str(v)
+        return s
+    
+    def _eval(self, v, operator, value):
+        if operator == "=":
+            return v == value
+        elif operator == "≠":
+            return v != value
+        elif operator == "contains":
+            return value in v
+        elif operator == "start with":
+            return v.startswith(value)
+        elif operator == "end with":
+            return v.endswith(value)
+        else:
+            return False
+
+    def _sort(self):
+        if self._param.sort_method == "asc":
+            self._set_outputs(sorted(self.inputs))
+        elif self._param.sort_method == "desc":
+            self._set_outputs(sorted(self.inputs, reverse=True))
+
+    def _drop_duplicates(self):
+        seen = set()
+        outs = []
+        for item in self.inputs:
+            k = self._hashable(item)
+            if k in seen:
+                continue
+            seen.add(k)
+            outs.append(item)
+        self._set_outputs(outs)
+
+    def _hashable(self,x):
+        if isinstance(x, dict):
+            return tuple(sorted((k, self._hashable(v)) for k, v in x.items()))
+        if isinstance(x, (list, tuple)):
+            return tuple(self._hashable(v) for v in x)
+        if isinstance(x, set):
+            return tuple(sorted(self._hashable(v) for v in x))
+        return x
+    def thoughts(self) -> str:
+        return "ListOperation in progress"
diff --git a/web/src/constants/agent.tsx b/web/src/constants/agent.tsx
index 6ee8ab516..3a8411ce3 100644
--- a/web/src/constants/agent.tsx
+++ b/web/src/constants/agent.tsx
@@ -109,6 +109,7 @@ export enum Operator {
   SearXNG = 'SearXNG',
   Placeholder = 'Placeholder',
   DataOperations = 'DataOperations',
+  ListOperations = 'ListOperations',
   VariableAssigner = 'VariableAssigner',
   VariableAggregator = 'VariableAggregator',
   File = 'File', // pipeline
diff --git a/web/src/locales/en.ts b/web/src/locales/en.ts
index 9a0569ab5..b9f374f7c 100644
--- a/web/src/locales/en.ts
+++ b/web/src/locales/en.ts
@@ -1591,6 +1591,8 @@ This delimiter is used to split the input text into several text pieces echo of
       codeDescription: 'It allows developers to write custom Python logic.',
       dataOperations: 'Data operations',
       dataOperationsDescription: 'Perform various operations on a Data object.',
+      listOperations: 'List operations',
+      listOperationsDescription: 'Perform operations on a list.',
       variableAssigner: 'Variable assigner',
       variableAssignerDescription:
         'This component performs operations on Data objects, including extracting, filtering, and editing keys and values in the Data.',
@@ -1806,6 +1808,19 @@ Important structured information may include: names, dates, locations, events, k
         removeKeys: 'Remove keys',
         renameKeys: 'Rename keys',
       },
+      ListOperationsOptions: {
+        topN: 'Top N',
+        head: 'Head',
+        tail: 'Tail',
+        sort: 'Sort',
+        filter: 'Filter',
+        dropDuplicates: 'Drop duplicates',
+      },
+      sortMethod: 'Sort method',
+      SortMethodOptions: {
+        asc: 'Ascending',
+        desc: 'Descending',
+      },
     },
     llmTools: {
       bad_calculator: {
diff --git a/web/src/locales/zh.ts b/web/src/locales/zh.ts
index c065986f2..ce21c5a30 100644
--- a/web/src/locales/zh.ts
+++ b/web/src/locales/zh.ts
@@ -1508,6 +1508,8 @@ General：实体和关系提取提示来自 GitHub - microsoft/graphrag：基于
       codeDescription: '它允许开发人员编写自定义 Python 逻辑。',
       dataOperations: '数据操作',
       dataOperationsDescription: '对数据对象执行各种操作。',
+      listOperations: '列表操作',
+      listOperationsDescription: '对列表对象执行各种操作。',
       variableAssigner: '变量赋值器',
       variableAssignerDescription:
         '此组件对数据对象执行操作，包括提取、筛选和编辑数据中的键和值。',
@@ -1679,6 +1681,19 @@ Tokenizer 会根据所选方式将内容存储为对应的数据结构。`,
         removeKeys: '删除键',
         renameKeys: '重命名键',
       },
+      ListOperationsOptions: {
+        topN: '取前N项',
+        head: '取前第N项',
+        tail: '取后第N项',
+        sort: '排序',
+        filter: '筛选',
+        dropDuplicates: '去重',
+      },
+      sortMethod: '排序方式',
+      SortMethodOptions: {
+        asc: '升序',
+        desc: '降序',
+      },
     },
     footer: {
       profile: 'All rights reserved @ React',
diff --git a/web/src/pages/agent/canvas/index.tsx b/web/src/pages/agent/canvas/index.tsx
index 5f78e8185..f2fc983e2 100644
--- a/web/src/pages/agent/canvas/index.tsx
+++ b/web/src/pages/agent/canvas/index.tsx
@@ -61,6 +61,7 @@ import { FileNode } from './node/file-node';
 import { InvokeNode } from './node/invoke-node';
 import { IterationNode, IterationStartNode } from './node/iteration-node';
 import { KeywordNode } from './node/keyword-node';
+import { ListOperationsNode } from './node/list-operations-node';
 import { MessageNode } from './node/message-node';
 import NoteNode from './node/note-node';
 import ParserNode from './node/parser-node';
@@ -101,6 +102,7 @@ export const nodeTypes: NodeTypes = {
   splitterNode: SplitterNode,
   contextNode: ExtractorNode,
   dataOperationsNode: DataOperationsNode,
+  listOperationsNode: ListOperationsNode,
   variableAssignerNode: VariableAssignerNode,
   variableAggregatorNode: VariableAggregatorNode,
 };
diff --git a/web/src/pages/agent/canvas/node/dropdown/accordion-operators.tsx b/web/src/pages/agent/canvas/node/dropdown/accordion-operators.tsx
index 232ab78ff..8fd96f55f 100644
--- a/web/src/pages/agent/canvas/node/dropdown/accordion-operators.tsx
+++ b/web/src/pages/agent/canvas/node/dropdown/accordion-operators.tsx
@@ -79,6 +79,7 @@ export function AccordionOperators({
               Operator.Code,
               Operator.StringTransform,
               Operator.DataOperations,
+              Operator.ListOperations,
               // Operator.VariableAssigner,
               Operator.VariableAggregator,
             ]}
diff --git a/web/src/pages/agent/canvas/node/list-operations-node.tsx b/web/src/pages/agent/canvas/node/list-operations-node.tsx
new file mode 100644
index 000000000..5b2778c92
--- /dev/null
+++ b/web/src/pages/agent/canvas/node/list-operations-node.tsx
@@ -0,0 +1,22 @@
+import { BaseNode } from '@/interfaces/database/agent';
+import { NodeProps } from '@xyflow/react';
+import { camelCase } from 'lodash';
+import { useTranslation } from 'react-i18next';
+import { RagNode } from '.';
+import { ListOperationsFormSchemaType } from '../../form/list-operations-form';
+import { LabelCard } from './card';
+
+export function ListOperationsNode({
+  ...props
+}: NodeProps<BaseNode<ListOperationsFormSchemaType>>) {
+  const { data } = props;
+  const { t } = useTranslation();
+
+  return (
+    <RagNode {...props}>
+      <LabelCard>
+        {t(`flow.ListOperationsOptions.${camelCase(data.form?.operations)}`)}
+      </LabelCard>
+    </RagNode>
+  );
+}
diff --git a/web/src/pages/agent/constant/index.tsx b/web/src/pages/agent/constant/index.tsx
index 45341abf4..7aad5e4a3 100644
--- a/web/src/pages/agent/constant/index.tsx
+++ b/web/src/pages/agent/constant/index.tsx
@@ -595,6 +595,35 @@ export const initialDataOperationsValues = {
     },
   },
 };
+export enum SortMethod {
+  Asc = 'asc',
+  Desc = 'desc',
+}
+
+export enum ListOperations {
+  TopN = 'topN',
+  Head = 'head',
+  Tail = 'tail',
+  Filter = 'filter',
+  Sort = 'sort',
+  DropDuplicates = 'drop_duplicates',
+}
+
+export const initialListOperationsValues = {
+  query: '',
+  operations: ListOperations.TopN,
+  outputs: {
+    result: {
+      type: 'Array<?>',
+    },
+    first: {
+      type: '?',
+    },
+    last: {
+      type: '?',
+    },
+  },
+};
 
 export const initialVariableAssignerValues = {};
 
@@ -673,6 +702,7 @@ export const RestrictedUpstreamMap = {
   [Operator.Tool]: [Operator.Begin],
   [Operator.Placeholder]: [Operator.Begin],
   [Operator.DataOperations]: [Operator.Begin],
+  [Operator.ListOperations]: [Operator.Begin],
   [Operator.Parser]: [Operator.Begin], // pipeline
   [Operator.Splitter]: [Operator.Begin],
   [Operator.HierarchicalMerger]: [Operator.Begin],
@@ -729,6 +759,7 @@ export const NodeMap = {
   [Operator.HierarchicalMerger]: 'splitterNode',
   [Operator.Extractor]: 'contextNode',
   [Operator.DataOperations]: 'dataOperationsNode',
+  [Operator.ListOperations]: 'listOperationsNode',
   [Operator.VariableAssigner]: 'variableAssignerNode',
   [Operator.VariableAggregator]: 'variableAggregatorNode',
 };
diff --git a/web/src/pages/agent/form-sheet/form-config-map.tsx b/web/src/pages/agent/form-sheet/form-config-map.tsx
index c291e4e05..37ab4cf2f 100644
--- a/web/src/pages/agent/form-sheet/form-config-map.tsx
+++ b/web/src/pages/agent/form-sheet/form-config-map.tsx
@@ -21,6 +21,7 @@ import IterationForm from '../form/iteration-form';
 import IterationStartForm from '../form/iteration-start-from';
 import Jin10Form from '../form/jin10-form';
 import KeywordExtractForm from '../form/keyword-extract-form';
+import ListOperationsForm from '../form/list-operations-form';
 import MessageForm from '../form/message-form';
 import ParserForm from '../form/parser-form';
 import PubMedForm from '../form/pubmed-form';
@@ -184,6 +185,9 @@ export const FormConfigMap = {
   [Operator.DataOperations]: {
     component: DataOperationsForm,
   },
+  [Operator.ListOperations]: {
+    component: ListOperationsForm,
+  },
   [Operator.VariableAssigner]: {
     component: VariableAssignerForm,
   },
diff --git a/web/src/pages/agent/form/list-operations-form/index.tsx b/web/src/pages/agent/form/list-operations-form/index.tsx
new file mode 100644
index 000000000..5803fe055
--- /dev/null
+++ b/web/src/pages/agent/form/list-operations-form/index.tsx
@@ -0,0 +1,140 @@
+import NumberInput from '@/components/originui/number-input';
+import { SelectWithSearch } from '@/components/originui/select-with-search';
+import { RAGFlowFormItem } from '@/components/ragflow-form';
+import {
+  Form,
+  FormControl,
+  FormField,
+  FormItem,
+  FormLabel,
+  FormMessage,
+} from '@/components/ui/form';
+import { Separator } from '@/components/ui/separator';
+import { useBuildSwitchOperatorOptions } from '@/hooks/logic-hooks/use-build-operator-options';
+import { buildOptions } from '@/utils/form';
+import { zodResolver } from '@hookform/resolvers/zod';
+import { memo } from 'react';
+import { useForm, useWatch } from 'react-hook-form';
+import { useTranslation } from 'react-i18next';
+import { z } from 'zod';
+import {
+  DataOperationsOperatorOptions,
+  JsonSchemaDataType,
+  ListOperations,
+  SortMethod,
+  initialListOperationsValues,
+} from '../../constant';
+import { useFormValues } from '../../hooks/use-form-values';
+import { useWatchFormChange } from '../../hooks/use-watch-form-change';
+import { INextOperatorForm } from '../../interface';
+import { buildOutputList } from '../../utils/build-output-list';
+import { FormWrapper } from '../components/form-wrapper';
+import { Output, OutputSchema } from '../components/output';
+import { PromptEditor } from '../components/prompt-editor';
+import { QueryVariable } from '../components/query-variable';
+
+export const RetrievalPartialSchema = {
+  query: z.string(),
+  operations: z.string(),
+  n: z.number().int().min(0).optional(),
+  sort_method: z.string().optional(),
+  filter: z
+    .object({
+      value: z.string().optional(),
+      operator: z.string().optional(),
+    })
+    .optional(),
+  ...OutputSchema,
+};
+
+export const FormSchema = z.object(RetrievalPartialSchema);
+
+export type ListOperationsFormSchemaType = z.infer<typeof FormSchema>;
+
+const outputList = buildOutputList(initialListOperationsValues.outputs);
+
+function ListOperationsForm({ node }: INextOperatorForm) {
+  const { t } = useTranslation();
+
+  const defaultValues = useFormValues(initialListOperationsValues, node);
+
+  const form = useForm<ListOperationsFormSchemaType>({
+    defaultValues: defaultValues,
+    mode: 'onChange',
+    resolver: zodResolver(FormSchema),
+    shouldUnregister: true,
+  });
+
+  const operations = useWatch({ control: form.control, name: 'operations' });
+
+  const ListOperationsOptions = buildOptions(
+    ListOperations,
+    t,
+    `flow.ListOperationsOptions`,
+    true,
+  );
+  const SortMethodOptions = buildOptions(
+    SortMethod,
+    t,
+    `flow.SortMethodOptions`,
+    true,
+  );
+  const operatorOptions = useBuildSwitchOperatorOptions(
+    DataOperationsOperatorOptions,
+  );
+  useWatchFormChange(node?.id, form, true);
+
+  return (
+    <Form {...form}>
+      <FormWrapper>
+        <QueryVariable
+          name="query"
+          className="flex-1"
+          types={[JsonSchemaDataType.Array]}
+        ></QueryVariable>
+        <Separator />
+        <RAGFlowFormItem name="operations" label={t('flow.operations')}>
+          <SelectWithSearch options={ListOperationsOptions} />
+        </RAGFlowFormItem>
+        {[
+          ListOperations.TopN,
+          ListOperations.Head,
+          ListOperations.Tail,
+        ].includes(operations as ListOperations) && (
+          <FormField
+            control={form.control}
+            name="n"
+            render={({ field }) => (
+              <FormItem>
+                <FormLabel>{t('flowNum')}</FormLabel>
+                <FormControl>
+                  <NumberInput {...field} className="w-full"></NumberInput>
+                </FormControl>
+                <FormMessage />
+              </FormItem>
+            )}
+          />
+        )}
+        {[ListOperations.Sort].includes(operations as ListOperations) && (
+          <RAGFlowFormItem name="sort_method" label={t('flow.sortMethod')}>
+            <SelectWithSearch options={SortMethodOptions} />
+          </RAGFlowFormItem>
+        )}
+        {[ListOperations.Filter].includes(operations as ListOperations) && (
+          <div className="flex items-center gap-2">
+            <RAGFlowFormItem name="filter.operator" className="flex-1">
+              <SelectWithSearch options={operatorOptions}></SelectWithSearch>
+            </RAGFlowFormItem>
+            <Separator className="w-2" />
+            <RAGFlowFormItem name="filter.value" className="flex-1">
+              <PromptEditor showToolbar={false} multiLine={false} />
+            </RAGFlowFormItem>
+          </div>
+        )}
+        <Output list={outputList} isFormRequired></Output>
+      </FormWrapper>
+    </Form>
+  );
+}
+
+export default memo(ListOperationsForm);
diff --git a/web/src/pages/agent/hooks/use-add-node.ts b/web/src/pages/agent/hooks/use-add-node.ts
index ed092a01b..44091f1b1 100644
--- a/web/src/pages/agent/hooks/use-add-node.ts
+++ b/web/src/pages/agent/hooks/use-add-node.ts
@@ -31,6 +31,7 @@ import {
   initialIterationValues,
   initialJin10Values,
   initialKeywordExtractValues,
+  initialListOperationsValues,
   initialMessageValues,
   initialNoteValues,
   initialParserValues,
@@ -129,6 +130,7 @@ export const useInitializeOperatorParams = () => {
         prompts: t('flow.prompts.user.summary'),
       },
       [Operator.DataOperations]: initialDataOperationsValues,
+      [Operator.ListOperations]: initialListOperationsValues,
       [Operator.VariableAssigner]: initialVariableAssignerValues,
       [Operator.VariableAggregator]: initialVariableAggregatorValues,
     };
diff --git a/web/src/pages/agent/operator-icon.tsx b/web/src/pages/agent/operator-icon.tsx
index a7ece8ead..44fe9d01a 100644
--- a/web/src/pages/agent/operator-icon.tsx
+++ b/web/src/pages/agent/operator-icon.tsx
@@ -14,7 +14,7 @@ import { ReactComponent as YahooFinanceIcon } from '@/assets/svg/yahoo-finance.s
 
 import { IconFont } from '@/components/icon-font';
 import { cn } from '@/lib/utils';
-import { Equal, FileCode, HousePlus, Variable } from 'lucide-react';
+import { Columns3, Equal, FileCode, HousePlus, Variable } from 'lucide-react';
 import { Operator } from './constant';
 
 interface IProps {
@@ -57,6 +57,7 @@ export const SVGIconMap = {
 };
 export const LucideIconMap = {
   [Operator.DataOperations]: FileCode,
+  [Operator.ListOperations]: Columns3,
   [Operator.VariableAssigner]: Equal,
   [Operator.VariableAggregator]: Variable,
 };
diff --git a/web/src/pages/agent/utils.ts b/web/src/pages/agent/utils.ts
index 3312b7236..a7d4248ff 100644
--- a/web/src/pages/agent/utils.ts
+++ b/web/src/pages/agent/utils.ts
@@ -328,7 +328,6 @@ export const buildDslComponentsByGraph = (
         case Operator.DataOperations:
           params = transformDataOperationsParams(params);
           break;
-
         default:
           break;
       }

From 996b5fe14ec40ac56deb4021111341f941862581 Mon Sep 17 00:00:00 2001
From: chanx <1243304602@qq.com>
Date: Fri, 14 Nov 2025 19:50:01 +0800
Subject: [PATCH 07/15] Fix: Added the ability to download files in the agent
 message reply function. (#11281)

### What problem does this PR solve?

Fix: Added the ability to download files in the agent message reply
function.

### Type of change

- [x] Bug Fix (non-breaking change which fixes an issue)
---
 .../components/next-message-item/index.tsx    | 30 +++++++++++++-
 web/src/hooks/use-send-message.ts             |  7 +++-
 web/src/interfaces/database/chat.ts           |  3 ++
 web/src/locales/en.ts                         |  2 +
 web/src/locales/zh.ts                         |  2 +
 .../agent/chat/use-send-agent-message.ts      |  8 +++-
 web/src/pages/agent/constant/index.tsx        |  8 ++++
 .../pages/agent/form/message-form/index.tsx   | 41 ++++++++++++++++++-
 .../agent/form/message-form/use-values.ts     |  3 +-
 web/src/services/file-manager-service.ts      |  7 ++++
 web/src/utils/api.ts                          |  2 +
 11 files changed, 108 insertions(+), 5 deletions(-)

diff --git a/web/src/components/next-message-item/index.tsx b/web/src/components/next-message-item/index.tsx
index 5dd6cdf60..706553b67 100644
--- a/web/src/components/next-message-item/index.tsx
+++ b/web/src/components/next-message-item/index.tsx
@@ -18,8 +18,10 @@ import { cn } from '@/lib/utils';
 import { AgentChatContext } from '@/pages/agent/context';
 import { WorkFlowTimeline } from '@/pages/agent/log-sheet/workflow-timeline';
 import { IMessage } from '@/pages/chat/interface';
+import { downloadFile } from '@/services/file-manager-service';
+import { downloadFileFromBlob } from '@/utils/file-util';
 import { isEmpty } from 'lodash';
-import { Atom, ChevronDown, ChevronUp } from 'lucide-react';
+import { Atom, ChevronDown, ChevronUp, Download } from 'lucide-react';
 import MarkdownContent from '../next-markdown-content';
 import { RAGFlowAvatar } from '../ragflow-avatar';
 import { useTheme } from '../theme-provider';
@@ -245,6 +247,32 @@ function MessageItem({
             {isUser && (
               <UploadedMessageFiles files={item.files}></UploadedMessageFiles>
             )}
+            {isAssistant && item.attachment && item.attachment.doc_id && (
+              <div className="w-full flex items-center justify-end">
+                <Button
+                  variant="link"
+                  className="p-1 m-0 h-auto text-text-sub-title-invert"
+                  onClick={async () => {
+                    if (item.attachment?.doc_id) {
+                      try {
+                        const response = await downloadFile({
+                          docId: item.attachment.doc_id,
+                          ext: item.attachment.format,
+                        });
+                        const blob = new Blob([response.data], {
+                          type: response.data.type,
+                        });
+                        downloadFileFromBlob(blob, item.attachment.file_name);
+                      } catch (error) {
+                        console.error('Download failed:', error);
+                      }
+                    }
+                  }}
+                >
+                  <Download size={16} />
+                </Button>
+              </div>
+            )}
           </section>
         </div>
       </section>
diff --git a/web/src/hooks/use-send-message.ts b/web/src/hooks/use-send-message.ts
index 8d602f2e0..e956217f3 100644
--- a/web/src/hooks/use-send-message.ts
+++ b/web/src/hooks/use-send-message.ts
@@ -44,9 +44,14 @@ export interface IInputData {
   inputs: Record<string, BeginQuery>;
   tips: string;
 }
-
+export interface IAttachment {
+  doc_id: string;
+  format: string;
+  file_name: string;
+}
 export interface IMessageData {
   content: string;
+  outputs: any;
   start_to_think?: boolean;
   end_to_think?: boolean;
 }
diff --git a/web/src/interfaces/database/chat.ts b/web/src/interfaces/database/chat.ts
index 62bcb4696..eb6eebe89 100644
--- a/web/src/interfaces/database/chat.ts
+++ b/web/src/interfaces/database/chat.ts
@@ -1,4 +1,5 @@
 import { MessageType } from '@/constants/chat';
+import { IAttachment } from '@/hooks/use-send-message';
 
 export interface PromptConfig {
   empty_response: string;
@@ -97,6 +98,7 @@ export interface Message {
   data?: any;
   files?: File[];
   chatBoxId?: string;
+  attachment?: IAttachment;
 }
 
 export interface IReferenceChunk {
@@ -126,6 +128,7 @@ export interface IReferenceObject {
 
 export interface IAnswer {
   answer: string;
+  attachment?: IAttachment;
   reference?: IReference;
   conversationId?: string;
   prompt?: string;
diff --git a/web/src/locales/en.ts b/web/src/locales/en.ts
index b9f374f7c..e2035a378 100644
--- a/web/src/locales/en.ts
+++ b/web/src/locales/en.ts
@@ -1009,6 +1009,8 @@ Example: general/v2/`,
       pleaseUploadAtLeastOneFile: 'Please upload at least one file',
     },
     flow: {
+      downloadFileTypeTip: 'The file type to download',
+      downloadFileType: 'Download file type',
       formatTypeError: 'Format or type error',
       variableNameMessage:
         'Variable name can only contain letters and underscores',
diff --git a/web/src/locales/zh.ts b/web/src/locales/zh.ts
index ce21c5a30..301719117 100644
--- a/web/src/locales/zh.ts
+++ b/web/src/locales/zh.ts
@@ -956,6 +956,8 @@ General：实体和关系提取提示来自 GitHub - microsoft/graphrag：基于
       pleaseUploadAtLeastOneFile: '请上传至少一个文件',
     },
     flow: {
+      downloadFileTypeTip: '文件下载的类型',
+      downloadFileType: '文件类型',
       formatTypeError: '格式或类型错误',
       variableNameMessage: '名称只能包含字母和下划线',
       variableDescription: '变量的描述',
diff --git a/web/src/pages/agent/chat/use-send-agent-message.ts b/web/src/pages/agent/chat/use-send-agent-message.ts
index a0460fd71..5fc49d4ce 100644
--- a/web/src/pages/agent/chat/use-send-agent-message.ts
+++ b/web/src/pages/agent/chat/use-send-agent-message.ts
@@ -5,6 +5,7 @@ import {
   useSelectDerivedMessages,
 } from '@/hooks/logic-hooks';
 import {
+  IAttachment,
   IEventList,
   IInputEvent,
   IMessageEndData,
@@ -75,9 +76,13 @@ export function findMessageFromList(eventList: IEventList) {
     nextContent += '</think>';
   }
 
+  const workflowFinished = eventList.find(
+    (x) => x.event === MessageEventType.WorkflowFinished,
+  ) as IMessageEvent;
   return {
     id: eventList[0]?.message_id,
     content: nextContent,
+    attachment: workflowFinished?.data?.outputs?.attachment || {},
   };
 }
 
@@ -388,12 +393,13 @@ export const useSendAgentMessage = ({
   }, [sendMessageInTaskMode]);
 
   useEffect(() => {
-    const { content, id } = findMessageFromList(answerList);
+    const { content, id, attachment } = findMessageFromList(answerList);
     const inputAnswer = findInputFromList(answerList);
     const answer = content || getLatestError(answerList);
     if (answerList.length > 0) {
       addNewestOneAnswer({
         answer: answer ?? '',
+        attachment: attachment as IAttachment,
         id: id,
         ...inputAnswer,
       });
diff --git a/web/src/pages/agent/constant/index.tsx b/web/src/pages/agent/constant/index.tsx
index 7aad5e4a3..3a161d87d 100644
--- a/web/src/pages/agent/constant/index.tsx
+++ b/web/src/pages/agent/constant/index.tsx
@@ -417,6 +417,7 @@ export const initialIterationValues = {
   items_ref: '',
   outputs: {},
 };
+
 export const initialIterationStartValues = {
   outputs: {
     item: {
@@ -845,3 +846,10 @@ export enum JsonSchemaDataType {
   Array = 'array',
   Object = 'object',
 }
+
+export enum ExportFileType {
+  PDF = 'pdf',
+  HTML = 'html',
+  Markdown = 'md',
+  DOCX = 'docx',
+}
diff --git a/web/src/pages/agent/form/message-form/index.tsx b/web/src/pages/agent/form/message-form/index.tsx
index e93735ee7..31b52659e 100644
--- a/web/src/pages/agent/form/message-form/index.tsx
+++ b/web/src/pages/agent/form/message-form/index.tsx
@@ -8,12 +8,14 @@ import {
   FormLabel,
   FormMessage,
 } from '@/components/ui/form';
+import { RAGFlowSelect } from '@/components/ui/select';
 import { zodResolver } from '@hookform/resolvers/zod';
 import { X } from 'lucide-react';
 import { memo } from 'react';
 import { useFieldArray, useForm } from 'react-hook-form';
 import { useTranslation } from 'react-i18next';
 import { z } from 'zod';
+import { ExportFileType } from '../../constant';
 import { INextOperatorForm } from '../../interface';
 import { FormWrapper } from '../components/form-wrapper';
 import { PromptEditor } from '../components/prompt-editor';
@@ -33,10 +35,14 @@ function MessageForm({ node }: INextOperatorForm) {
         }),
       )
       .optional(),
+    output_format: z.string().optional(),
   });
 
   const form = useForm({
-    defaultValues: values,
+    defaultValues: {
+      ...values,
+      output_format: values.output_format,
+    },
     resolver: zodResolver(FormSchema),
   });
 
@@ -50,6 +56,39 @@ function MessageForm({ node }: INextOperatorForm) {
   return (
     <Form {...form}>
       <FormWrapper>
+        <FormContainer>
+          <FormItem>
+            <FormLabel tooltip={t('flow.downloadFileTypeTip')}>
+              {t('flow.downloadFileType')}
+            </FormLabel>
+            <FormField
+              control={form.control}
+              name={`output_format`}
+              render={({ field }) => (
+                <FormItem className="flex-1">
+                  <FormControl>
+                    <RAGFlowSelect
+                      options={Object.keys(ExportFileType).map(
+                        (key: string) => {
+                          return {
+                            value:
+                              ExportFileType[
+                                key as keyof typeof ExportFileType
+                              ],
+                            label: key,
+                          };
+                        },
+                      )}
+                      {...field}
+                      onValueChange={field.onChange}
+                      placeholder={t('flow.messagePlaceholder')}
+                    ></RAGFlowSelect>
+                  </FormControl>
+                </FormItem>
+              )}
+            />
+          </FormItem>
+        </FormContainer>
         <FormContainer>
           <FormItem>
             <FormLabel tooltip={t('flow.msgTip')}>{t('flow.msg')}</FormLabel>
diff --git a/web/src/pages/agent/form/message-form/use-values.ts b/web/src/pages/agent/form/message-form/use-values.ts
index 6a90881be..0cece91fc 100644
--- a/web/src/pages/agent/form/message-form/use-values.ts
+++ b/web/src/pages/agent/form/message-form/use-values.ts
@@ -1,7 +1,7 @@
 import { RAGFlowNodeType } from '@/interfaces/database/flow';
 import { isEmpty } from 'lodash';
 import { useMemo } from 'react';
-import { initialMessageValues } from '../../constant';
+import { ExportFileType, initialMessageValues } from '../../constant';
 import { convertToObjectArray } from '../../utils';
 
 export function useValues(node?: RAGFlowNodeType) {
@@ -15,6 +15,7 @@ export function useValues(node?: RAGFlowNodeType) {
     return {
       ...formData,
       content: convertToObjectArray(formData.content),
+      output_format: formData.output_format || ExportFileType.PDF,
     };
   }, [node]);
 
diff --git a/web/src/services/file-manager-service.ts b/web/src/services/file-manager-service.ts
index 8342117c9..8c5eb6c4e 100644
--- a/web/src/services/file-manager-service.ts
+++ b/web/src/services/file-manager-service.ts
@@ -13,6 +13,7 @@ const {
   get_document_file,
   getFile,
   moveFile,
+  get_document_file_download,
 } = api;
 
 const methods = {
@@ -65,4 +66,10 @@ const fileManagerService = registerServer<keyof typeof methods>(
   request,
 );
 
+export const downloadFile = (data: { docId: string; ext: string }) => {
+  return request.get(get_document_file_download(data.docId), {
+    params: { ext: data.ext },
+    responseType: 'blob',
+  });
+};
 export default fileManagerService;
diff --git a/web/src/utils/api.ts b/web/src/utils/api.ts
index e0afdbeb3..c4ce8205f 100644
--- a/web/src/utils/api.ts
+++ b/web/src/utils/api.ts
@@ -100,6 +100,8 @@ export default {
   document_change_parser: `${api_host}/document/change_parser`,
   document_thumbnails: `${api_host}/document/thumbnails`,
   get_document_file: `${api_host}/document/get`,
+  get_document_file_download: (docId: string) =>
+    `${api_host}/document/download/${docId}`,
   document_upload: `${api_host}/document/upload`,
   web_crawl: `${api_host}/document/web_crawl`,
   document_infos: `${api_host}/document/infos`,

From cd55f6c1b822d84e23a2199ae5f71eac5671d736 Mon Sep 17 00:00:00 2001
From: buua436 <66937541+buua436@users.noreply.github.com>
Date: Fri, 14 Nov 2025 19:50:29 +0800
Subject: [PATCH 08/15] Fix:ListOperations does not support sorting arrays of
 objects. (#11278)

### What problem does this PR solve?

pr:
#11276
change:
ListOperations does not support sorting arrays of objects.

### Type of change

- [x] Bug Fix (non-breaking change which fixes an issue)
---
 agent/component/list_operations.py | 25 +++++++++++++++++++++----
 1 file changed, 21 insertions(+), 4 deletions(-)

diff --git a/agent/component/list_operations.py b/agent/component/list_operations.py
index c29d79ea6..9ae8c2e04 100644
--- a/agent/component/list_operations.py
+++ b/agent/component/list_operations.py
@@ -121,10 +121,26 @@ class ListOperations(ComponentBase,ABC):
             return False
 
     def _sort(self):
-        if self._param.sort_method == "asc":
-            self._set_outputs(sorted(self.inputs))
-        elif self._param.sort_method == "desc":
-            self._set_outputs(sorted(self.inputs, reverse=True))
+        items = self.inputs or []
+        method = getattr(self._param, "sort_method", "asc") or "asc"
+        reverse = method == "desc"
+
+        if not items:
+            self._set_outputs([])
+            return
+
+        first = items[0]
+
+        if isinstance(first, dict):
+            outputs = sorted(
+                items,
+                key=lambda x: self._hashable(x),
+                reverse=reverse,
+            )
+        else:
+            outputs = sorted(items, reverse=reverse)
+
+        self._set_outputs(outputs)
 
     def _drop_duplicates(self):
         seen = set()
@@ -145,5 +161,6 @@ class ListOperations(ComponentBase,ABC):
         if isinstance(x, set):
             return tuple(sorted(self._hashable(v) for v in x))
         return x
+    
     def thoughts(self) -> str:
         return "ListOperation in progress"

From 68e3b33ae4b8043620d2ea901174ec75054b1a15 Mon Sep 17 00:00:00 2001
From: Billy Bao <newyorkupperbay@gmail.com>
Date: Fri, 14 Nov 2025 19:52:11 +0800
Subject: [PATCH 09/15] Feat: extract message output to file (#11251)

### What problem does this PR solve?

Feat: extract message output to file

### Type of change

- [x] New Feature (non-breaking change which adds functionality)
---
 Dockerfile                 |  4 ++-
 agent/canvas.py            |  4 +++
 agent/component/message.py | 70 +++++++++++++++++++++++++++++++++++++-
 api/apps/document_app.py   | 17 +++++++++
 pyproject.toml             |  1 +
 uv.lock                    | 10 ++++++
 6 files changed, 104 insertions(+), 2 deletions(-)

diff --git a/Dockerfile b/Dockerfile
index b16a0d7d5..239330183 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -51,7 +51,9 @@ RUN --mount=type=cache,id=ragflow_apt,target=/var/cache/apt,sharing=locked \
     apt install -y libpython3-dev libgtk-4-1 libnss3 xdg-utils libgbm-dev && \
     apt install -y libjemalloc-dev && \
     apt install -y python3-pip pipx nginx unzip curl wget git vim less && \
-    apt install -y ghostscript
+    apt install -y ghostscript && \
+    apt install -y pandoc && \
+    apt install -y texlive
 
 RUN if [ "$NEED_MIRROR" == "1" ]; then \
         pip3 config set global.index-url https://pypi.tuna.tsinghua.edu.cn/simple && \
diff --git a/agent/canvas.py b/agent/canvas.py
index bc7a45e3e..f262cd597 100644
--- a/agent/canvas.py
+++ b/agent/canvas.py
@@ -408,6 +408,10 @@ class Canvas(Graph):
                     else:
                         yield decorate("message", {"content": cpn_obj.output("content")})
                         cite = re.search(r"\[ID:[ 0-9]+\]",  cpn_obj.output("content"))
+
+                    if isinstance(cpn_obj.output("attachment"), tuple):
+                        yield decorate("message", {"attachment": cpn_obj.output("attachment")})
+                        
                     yield decorate("message_end", {"reference": self.get_reference() if cite else None})
 
                     while partials:
diff --git a/agent/component/message.py b/agent/component/message.py
index 641198083..555534610 100644
--- a/agent/component/message.py
+++ b/agent/component/message.py
@@ -17,6 +17,9 @@ import json
 import os
 import random
 import re
+import pypandoc
+import logging
+import tempfile
 from functools import partial
 from typing import Any
 
@@ -24,7 +27,8 @@ from agent.component.base import ComponentBase, ComponentParamBase
 from jinja2 import Template as Jinja2Template
 
 from common.connection_utils import timeout
-
+from common.misc_utils import get_uuid
+from common import settings
 
 class MessageParam(ComponentParamBase):
     """
@@ -34,6 +38,7 @@ class MessageParam(ComponentParamBase):
         super().__init__()
         self.content = []
         self.stream = True
+        self.output_format = None  # default output format
         self.outputs = {
             "content": {
                 "type": "str"
@@ -133,6 +138,7 @@ class Message(ComponentBase):
             yield rand_cnt[s: ]
 
         self.set_output("content", all_content)
+        self._convert_content(all_content)
 
     def _is_jinjia2(self, content:str) -> bool:
         patt = [
@@ -164,6 +170,68 @@ class Message(ComponentBase):
             content = re.sub(n, v, content)
 
         self.set_output("content", content)
+        self._convert_content(content)
 
     def thoughts(self) -> str:
         return ""
+
+    def _convert_content(self, content):
+        doc_id = get_uuid()
+        
+        if self._param.output_format.lower() not in {"markdown", "html", "pdf", "docx"}:
+            self._param.output_format = "markdown"
+
+        try:
+            if self._param.output_format in {"markdown", "html"}:
+                if isinstance(content, str):
+                    converted = pypandoc.convert_text(
+                        content,
+                        to=self._param.output_format,
+                        format="markdown",
+                    )
+                else:
+                    converted = pypandoc.convert_file(
+                        content,
+                        to=self._param.output_format,
+                        format="markdown",
+                    )
+
+                binary_content = converted.encode("utf-8")
+
+            else:  # pdf, docx
+                with tempfile.NamedTemporaryFile(suffix=f".{self._param.output_format}", delete=False) as tmp:
+                    tmp_name = tmp.name
+
+                try:
+                    if isinstance(content, str):
+                        pypandoc.convert_text(
+                            content,
+                            to=self._param.output_format,
+                            format="markdown",
+                            outputfile=tmp_name,
+                        )
+                    else:
+                        pypandoc.convert_file(
+                            content,
+                            to=self._param.output_format,
+                            format="markdown",
+                            outputfile=tmp_name,
+                        )
+
+                    with open(tmp_name, "rb") as f:
+                        binary_content = f.read()
+
+                finally:
+                    if os.path.exists(tmp_name):
+                        os.remove(tmp_name)
+
+            settings.STORAGE_IMPL.put(self._canvas._tenant_id, doc_id, binary_content)
+            self.set_output("attachment", {
+                "doc_id":doc_id, 
+                "format":self._param.output_format, 
+                "file_name":f"{doc_id[:8]}.{self._param.output_format}"})
+
+            logging.info(f"Converted content uploaded as {doc_id} (format={self._param.output_format})")
+
+        except Exception as e:
+            logging.error(f"Error converting content to {self._param.output_format}: {e}")
\ No newline at end of file
diff --git a/api/apps/document_app.py b/api/apps/document_app.py
index 12c19f978..8cea336de 100644
--- a/api/apps/document_app.py
+++ b/api/apps/document_app.py
@@ -508,6 +508,7 @@ def get(doc_id):
         ext = ext.group(1) if ext else None
         if ext:
             if doc.type == FileType.VISUAL.value:
+
                 content_type = CONTENT_TYPE_MAP.get(ext, f"image/{ext}")
             else:
                 content_type = CONTENT_TYPE_MAP.get(ext, f"application/{ext}")
@@ -517,6 +518,22 @@ def get(doc_id):
         return server_error_response(e)
 
 
+@manager.route("/download/<attachment_id>", methods=["GET"])  # noqa: F821
+@login_required
+def download_attachment(attachment_id):
+    try:
+        ext = request.args.get("ext", "markdown")
+        data = settings.STORAGE_IMPL.get(current_user.id, attachment_id)
+        # data = settings.STORAGE_IMPL.get("eb500d50bb0411f0907561d2782adda5", attachment_id)
+        response = flask.make_response(data)
+        response.headers.set("Content-Type", CONTENT_TYPE_MAP.get(ext, f"application/{ext}"))
+
+        return response
+
+    except Exception as e:
+        return server_error_response(e)
+
+
 @manager.route("/change_parser", methods=["POST"])  # noqa: F821
 @login_required
 @validate_request("doc_id")
diff --git a/pyproject.toml b/pyproject.toml
index 2ec792b90..c1210dfb4 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -145,6 +145,7 @@ dependencies = [
     "markdownify>=1.2.0",
     "captcha>=0.7.1",
     "pip>=25.2",
+    "pypandoc>=1.16",
 ]
 
 [dependency-groups]
diff --git a/uv.lock b/uv.lock
index 166b34ce4..474ca510b 100644
--- a/uv.lock
+++ b/uv.lock
@@ -4892,6 +4892,14 @@ wheels = [
     { url = "https://pypi.tuna.tsinghua.edu.cn/packages/80/28/2659c02301b9500751f8d42f9a6632e1508aa5120de5e43042b8b30f8d5d/pyopenssl-25.1.0-py3-none-any.whl", hash = "sha256:2b11f239acc47ac2e5aca04fd7fa829800aeee22a2eb30d744572a157bd8a1ab", size = 56771, upload-time = "2025-05-17T16:28:29.197Z" },
 ]
 
+[[package]]
+name = "pypandoc"
+version = "1.16"
+source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" }
+wheels = [
+    { url = "https://pypi.tuna.tsinghua.edu.cn/packages/24/77/af1fc54740a0712988f9518e629d38edc7b8ffccd7549203f19c3d8a2db6/pypandoc-1.16-py3-none-any.whl", hash = "sha256:868f390d48388743e7a5885915cbbaa005dea36a825ecdfd571f8c523416c822", size = 19425, upload-time = "2025-11-08T15:44:38.429Z" },
+]
+
 [[package]]
 name = "pyparsing"
 version = "3.2.3"
@@ -5292,6 +5300,7 @@ dependencies = [
     { name = "pyicu" },
     { name = "pymysql" },
     { name = "pyodbc" },
+    { name = "pypandoc" },
     { name = "pypdf" },
     { name = "pypdf2" },
     { name = "python-calamine" },
@@ -5447,6 +5456,7 @@ requires-dist = [
     { name = "pyicu", specifier = ">=2.15.3,<3.0.0" },
     { name = "pymysql", specifier = ">=1.1.1,<2.0.0" },
     { name = "pyodbc", specifier = ">=5.2.0,<6.0.0" },
+    { name = "pypandoc", specifier = ">=1.16" },
     { name = "pypdf", specifier = "==6.0.0" },
     { name = "pypdf2", specifier = ">=3.0.1,<4.0.0" },
     { name = "python-calamine", specifier = ">=0.4.0" },

From b1a1eedf5382512ec9ec737abb17174006209026 Mon Sep 17 00:00:00 2001
From: Billy Bao <newyorkupperbay@gmail.com>
Date: Fri, 14 Nov 2025 19:52:58 +0800
Subject: [PATCH 10/15] Doc: add default username & pwd (#11283)

### What problem does this PR solve?
Doc: add default username & pwd

### Type of change

- [x] Documentation Update

---------

Co-authored-by: writinwaters <93570324+writinwaters@users.noreply.github.com>
---
 docs/guides/accessing_admin_ui.md | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/docs/guides/accessing_admin_ui.md b/docs/guides/accessing_admin_ui.md
index 52ff4d6c7..23521244b 100644
--- a/docs/guides/accessing_admin_ui.md
+++ b/docs/guides/accessing_admin_ui.md
@@ -12,6 +12,10 @@ The RAGFlow Admin UI is a web-based interface that provides comprehensive system
 
 To access the RAGFlow admin UI, append `/admin` to the web UI's address, e.g. `http://[RAGFLOW_WEB_UI_ADDR]/admin`, replace `[RAGFLOW_WEB_UI_ADDR]` with real RAGFlow web UI address.
 
+### Default Credentials
+| Username | Password |
+|----------|----------|
+| admin@ragflow.io   | admin |
 
 ## Admin UI Overview
 

From e841b09d631f2426067a2e45f25721e8d9ca9285 Mon Sep 17 00:00:00 2001
From: Jin Hai <haijin.chn@gmail.com>
Date: Fri, 14 Nov 2025 20:39:54 +0800
Subject: [PATCH 11/15] Remove unused code and fix performance issue (#11284)

### What problem does this PR solve?

1. remove redundant code
2. fix miner performance issue

### Type of change

- [x] Bug Fix (non-breaking change which fixes an issue)
- [x] Refactoring

Signed-off-by: Jin Hai <haijin.chn@gmail.com>
---
 agent/canvas.py         |  2 --
 agent/component/base.py | 11 +++++++----
 2 files changed, 7 insertions(+), 6 deletions(-)

diff --git a/agent/canvas.py b/agent/canvas.py
index f262cd597..e18cb8d26 100644
--- a/agent/canvas.py
+++ b/agent/canvas.py
@@ -298,8 +298,6 @@ class Canvas(Graph):
                     for kk, vv in kwargs["webhook_payload"].items():
                         self.components[k]["obj"].set_output(kk, vv)
 
-            self.components[k]["obj"].reset(True)
-
         for k in kwargs.keys():
             if k in ["query", "user_id", "files"] and kwargs[k]:
                 if k == "files":
diff --git a/agent/component/base.py b/agent/component/base.py
index 31ad46820..0864ccb9e 100644
--- a/agent/component/base.py
+++ b/agent/component/base.py
@@ -463,12 +463,15 @@ class ComponentBase(ABC):
         return self._param.outputs.get("_ERROR", {}).get("value")
 
     def reset(self, only_output=False):
-        for k in self._param.outputs.keys():
-            self._param.outputs[k]["value"] = None
+        outputs: dict = self._param.outputs # for better performance
+        for k in outputs.keys():
+            outputs[k]["value"] = None
         if only_output:
             return
-        for k in self._param.inputs.keys():
-            self._param.inputs[k]["value"] = None
+
+        inputs: dict = self._param.inputs # for better performance
+        for k in inputs.keys():
+            inputs[k]["value"] = None
         self._param.debug_inputs = {}
 
     def get_input(self, key: str=None) -> Union[Any, dict[str, Any]]:

From 61cf430dbb8507ee7d53cc5fe35a23ee8e271e55 Mon Sep 17 00:00:00 2001
From: Jin Hai <haijin.chn@gmail.com>
Date: Sun, 16 Nov 2025 19:29:20 +0800
Subject: [PATCH 12/15] Minor tweats (#11271)

### What problem does this PR solve?

As title.

### Type of change

- [x] Refactoring

---------

Signed-off-by: Jin Hai <haijin.chn@gmail.com>
---
 api/db/db_models.py                      |  5 +++--
 api/db/services/connector_service.py     |  6 ++---
 api/db/services/dialog_service.py        |  8 ++++---
 api/db/services/document_service.py      | 28 ++++++++++++------------
 api/db/services/knowledgebase_service.py |  1 +
 api/utils/email_templates.py             | 16 ++++++++++++++
 api/utils/json_encode.py                 | 16 ++++++++++++++
 7 files changed, 58 insertions(+), 22 deletions(-)

diff --git a/api/db/db_models.py b/api/db/db_models.py
index 68bf37ce4..2b4c4a0ef 100644
--- a/api/db/db_models.py
+++ b/api/db/db_models.py
@@ -305,6 +305,7 @@ class RetryingPooledMySQLDatabase(PooledMySQLDatabase):
                     time.sleep(self.retry_delay * (2 ** attempt))
                 else:
                     raise
+        return None
 
 
 class RetryingPooledPostgresqlDatabase(PooledPostgresqlDatabase):
@@ -772,7 +773,7 @@ class Document(DataBaseModel):
     thumbnail = TextField(null=True, help_text="thumbnail base64 string")
     kb_id = CharField(max_length=256, null=False, index=True)
     parser_id = CharField(max_length=32, null=False, help_text="default parser ID", index=True)
-    pipeline_id = CharField(max_length=32, null=True, help_text="pipleline ID", index=True)
+    pipeline_id = CharField(max_length=32, null=True, help_text="pipeline ID", index=True)
     parser_config = JSONField(null=False, default={"pages": [[1, 1000000]]})
     source_type = CharField(max_length=128, null=False, default="local", help_text="where dose this document come from", index=True)
     type = CharField(max_length=32, null=False, help_text="file extension", index=True)
@@ -876,7 +877,7 @@ class Dialog(DataBaseModel):
 class Conversation(DataBaseModel):
     id = CharField(max_length=32, primary_key=True)
     dialog_id = CharField(max_length=32, null=False, index=True)
-    name = CharField(max_length=255, null=True, help_text="converastion name", index=True)
+    name = CharField(max_length=255, null=True, help_text="conversation name", index=True)
     message = JSONField(null=True)
     reference = JSONField(null=True, default=[])
     user_id = CharField(max_length=255, null=True, help_text="user_id", index=True)
diff --git a/api/db/services/connector_service.py b/api/db/services/connector_service.py
index 3e65c87da..2f29c3324 100644
--- a/api/db/services/connector_service.py
+++ b/api/db/services/connector_service.py
@@ -70,7 +70,7 @@ class ConnectorService(CommonService):
     def rebuild(cls, kb_id:str, connector_id: str, tenant_id:str):
         e, conn = cls.get_by_id(connector_id)
         if not e:
-            return
+            return None
         SyncLogsService.filter_delete([SyncLogs.connector_id==connector_id, SyncLogs.kb_id==kb_id])
         docs = DocumentService.query(source_type=f"{conn.source}/{conn.id}", kb_id=kb_id)
         err = FileService.delete_docs([d.id for d in docs], tenant_id)
@@ -125,11 +125,11 @@ class SyncLogsService(CommonService):
             )
 
         query = query.distinct().order_by(cls.model.update_time.desc())
-        totbal = query.count()
+        total = query.count()
         if page_number:
             query = query.paginate(page_number, items_per_page)
 
-        return list(query.dicts()), totbal
+        return list(query.dicts()), total
 
     @classmethod
     def start(cls, id, connector_id):
diff --git a/api/db/services/dialog_service.py b/api/db/services/dialog_service.py
index f54ebf709..d2f3b9bc1 100644
--- a/api/db/services/dialog_service.py
+++ b/api/db/services/dialog_service.py
@@ -342,7 +342,7 @@ def chat(dialog, messages, stream=True, **kwargs):
     if not dialog.kb_ids and not dialog.prompt_config.get("tavily_api_key"):
         for ans in chat_solo(dialog, messages, stream):
             yield ans
-        return
+        return None
 
     chat_start_ts = timer()
 
@@ -386,7 +386,7 @@ def chat(dialog, messages, stream=True, **kwargs):
         ans = use_sql(questions[-1], field_map, dialog.tenant_id, chat_mdl, prompt_config.get("quote", True), dialog.kb_ids)
         if ans:
             yield ans
-            return
+            return None
 
     for p in prompt_config["parameters"]:
         if p["key"] == "knowledge":
@@ -617,6 +617,8 @@ def chat(dialog, messages, stream=True, **kwargs):
         res["audio_binary"] = tts(tts_mdl, answer)
         yield res
 
+    return None
+
 
 def use_sql(question, field_map, tenant_id, chat_mdl, quota=True, kb_ids=None):
     sys_prompt = """
@@ -745,7 +747,7 @@ Please write the SQL, only SQL, without any other explanations or text.
 
 def tts(tts_mdl, text):
     if not tts_mdl or not text:
-        return
+        return None
     bin = b""
     for chunk in tts_mdl.tts(text):
         bin += chunk
diff --git a/api/db/services/document_service.py b/api/db/services/document_service.py
index 530133164..0abf1b1f3 100644
--- a/api/db/services/document_service.py
+++ b/api/db/services/document_service.py
@@ -113,7 +113,7 @@ class DocumentService(CommonService):
     def check_doc_health(cls, tenant_id: str, filename):
         import os
         MAX_FILE_NUM_PER_USER = int(os.environ.get("MAX_FILE_NUM_PER_USER", 0))
-        if MAX_FILE_NUM_PER_USER > 0 and DocumentService.get_doc_count(tenant_id) >= MAX_FILE_NUM_PER_USER:
+        if 0 < MAX_FILE_NUM_PER_USER <= DocumentService.get_doc_count(tenant_id):
             raise RuntimeError("Exceed the maximum file number of a free user!")
         if len(filename.encode("utf-8")) > FILE_NAME_LEN_LIMIT:
             raise RuntimeError("Exceed the maximum length of file name!")
@@ -464,7 +464,7 @@ class DocumentService(CommonService):
             cls.model.id == doc_id, Knowledgebase.status == StatusEnum.VALID.value)
         docs = docs.dicts()
         if not docs:
-            return
+            return None
         return docs[0]["tenant_id"]
 
     @classmethod
@@ -473,7 +473,7 @@ class DocumentService(CommonService):
         docs = cls.model.select(cls.model.kb_id).where(cls.model.id == doc_id)
         docs = docs.dicts()
         if not docs:
-            return
+            return None
         return docs[0]["kb_id"]
 
     @classmethod
@@ -486,7 +486,7 @@ class DocumentService(CommonService):
             cls.model.name == name, Knowledgebase.status == StatusEnum.VALID.value)
         docs = docs.dicts()
         if not docs:
-            return
+            return None
         return docs[0]["tenant_id"]
 
     @classmethod
@@ -533,7 +533,7 @@ class DocumentService(CommonService):
             cls.model.id == doc_id, Knowledgebase.status == StatusEnum.VALID.value)
         docs = docs.dicts()
         if not docs:
-            return
+            return None
         return docs[0]["embd_id"]
 
     @classmethod
@@ -569,7 +569,7 @@ class DocumentService(CommonService):
             .where(cls.model.name == doc_name)
         doc_id = doc_id.dicts()
         if not doc_id:
-            return
+            return None
         return doc_id[0]["id"]
 
     @classmethod
@@ -715,7 +715,7 @@ class DocumentService(CommonService):
                     prg = 1
                     status = TaskStatus.DONE.value
 
-                # only for special task and parsed docs and unfinised
+                # only for special task and parsed docs and unfinished
                 freeze_progress = special_task_running and doc_progress >= 1 and not finished
                 msg = "\n".join(sorted(msg))
                 info = {
@@ -974,13 +974,13 @@ def doc_upload_and_parse(conversation_id, file_objs, user_id):
 
     def embedding(doc_id, cnts, batch_size=16):
         nonlocal embd_mdl, chunk_counts, token_counts
-        vects = []
+        vectors = []
         for i in range(0, len(cnts), batch_size):
             vts, c = embd_mdl.encode(cnts[i: i + batch_size])
-            vects.extend(vts.tolist())
+            vectors.extend(vts.tolist())
             chunk_counts[doc_id] += len(cnts[i:i + batch_size])
             token_counts[doc_id] += c
-        return vects
+        return vectors
 
     idxnm = search.index_name(kb.tenant_id)
     try_create_idx = True
@@ -1011,15 +1011,15 @@ def doc_upload_and_parse(conversation_id, file_objs, user_id):
             except Exception:
                 logging.exception("Mind map generation error")
 
-        vects = embedding(doc_id, [c["content_with_weight"] for c in cks])
-        assert len(cks) == len(vects)
+        vectors = embedding(doc_id, [c["content_with_weight"] for c in cks])
+        assert len(cks) == len(vectors)
         for i, d in enumerate(cks):
-            v = vects[i]
+            v = vectors[i]
             d["q_%d_vec" % len(v)] = v
         for b in range(0, len(cks), es_bulk_size):
             if try_create_idx:
                 if not settings.docStoreConn.indexExist(idxnm, kb_id):
-                    settings.docStoreConn.createIdx(idxnm, kb_id, len(vects[0]))
+                    settings.docStoreConn.createIdx(idxnm, kb_id, len(vectors[0]))
                 try_create_idx = False
             settings.docStoreConn.insert(cks[b:b + es_bulk_size], idxnm, kb_id)
 
diff --git a/api/db/services/knowledgebase_service.py b/api/db/services/knowledgebase_service.py
index 03179da49..ca30ca074 100644
--- a/api/db/services/knowledgebase_service.py
+++ b/api/db/services/knowledgebase_service.py
@@ -424,6 +424,7 @@ class KnowledgebaseService(CommonService):
 
         # Default parser_config (align with kb_app.create) — do not accept external overrides
         payload["parser_config"] = get_parser_config(parser_id, kwargs.get("parser_config"))
+
         return payload
 
 
diff --git a/api/utils/email_templates.py b/api/utils/email_templates.py
index 10473908a..34201ee38 100644
--- a/api/utils/email_templates.py
+++ b/api/utils/email_templates.py
@@ -1,3 +1,19 @@
+#
+#  Copyright 2025 The InfiniFlow Authors. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+#
+
 """
 Reusable HTML email templates and registry.
 """
diff --git a/api/utils/json_encode.py b/api/utils/json_encode.py
index b21addd4f..fa5ea973a 100644
--- a/api/utils/json_encode.py
+++ b/api/utils/json_encode.py
@@ -1,3 +1,19 @@
+#
+#  Copyright 2025 The InfiniFlow Authors. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+#
+
 import datetime
 import json
 from enum import Enum, IntEnum

From 13e212c8561dda5a8f2cc31df64eea5354abbc4f Mon Sep 17 00:00:00 2001
From: Yongteng Lei <yongtengrey@outlook.com>
Date: Mon, 17 Nov 2025 09:38:04 +0800
Subject: [PATCH 13/15] Feat: add Jira connector (#11285)

### What problem does this PR solve?

Add Jira connector.

<img width="978" height="925" alt="image"
src="https://github.com/user-attachments/assets/78bb5c77-2710-4569-a76e-9087ca23b227"
/>

---

<img width="1903" height="489" alt="image"
src="https://github.com/user-attachments/assets/193bc5c5-f751-4bd5-883a-2173282c2b96"
/>

---

<img width="1035" height="925" alt="image"
src="https://github.com/user-attachments/assets/1a0aec19-30eb-4ada-9283-61d1c915f59d"
/>

---

<img width="1905" height="601" alt="image"
src="https://github.com/user-attachments/assets/3dde1062-3f27-4717-8e09-fd5fd5e64171"
/>

### Type of change

- [x] New Feature (non-breaking change which adds functionality)
---
 common/data_source/__init__.py                |   2 +-
 common/data_source/config.py                  |  17 +
 common/data_source/confluence_connector.py    |   1 +
 common/data_source/jira/__init__.py           |   0
 common/data_source/jira/connector.py          | 973 ++++++++++++++++++
 common/data_source/jira/utils.py              | 149 +++
 common/data_source/jira_connector.py          | 112 --
 common/data_source/utils.py                   |  40 +-
 common/log_utils.py                           |   2 +-
 rag/svr/sync_data_source.py                   | 208 ++--
 web/src/assets/svg/data-source/jira.svg       |  16 +
 web/src/locales/en.ts                         |  27 +
 web/src/locales/zh.ts                         |  17 +
 .../user-setting/data-source/contant.tsx      | 130 ++-
 .../pages/user-setting/data-source/index.tsx  |   6 +
 15 files changed, 1521 insertions(+), 179 deletions(-)
 create mode 100644 common/data_source/jira/__init__.py
 create mode 100644 common/data_source/jira/connector.py
 create mode 100644 common/data_source/jira/utils.py
 delete mode 100644 common/data_source/jira_connector.py
 create mode 100644 web/src/assets/svg/data-source/jira.svg

diff --git a/common/data_source/__init__.py b/common/data_source/__init__.py
index 0802a5285..611c3c61a 100644
--- a/common/data_source/__init__.py
+++ b/common/data_source/__init__.py
@@ -11,7 +11,7 @@ from .confluence_connector import ConfluenceConnector
 from .discord_connector import DiscordConnector
 from .dropbox_connector import DropboxConnector
 from .google_drive.connector import GoogleDriveConnector
-from .jira_connector import JiraConnector
+from .jira.connector import JiraConnector
 from .sharepoint_connector import SharePointConnector
 from .teams_connector import TeamsConnector
 from .config import BlobType, DocumentSource
diff --git a/common/data_source/config.py b/common/data_source/config.py
index 02684dbac..e4040f85e 100644
--- a/common/data_source/config.py
+++ b/common/data_source/config.py
@@ -13,6 +13,7 @@ def get_current_tz_offset() -> int:
     return round(time_diff.total_seconds() / 3600)
 
 
+ONE_MINUTE = 60
 ONE_HOUR = 3600
 ONE_DAY = ONE_HOUR * 24
 
@@ -42,6 +43,7 @@ class DocumentSource(str, Enum):
     OCI_STORAGE = "oci_storage"
     SLACK = "slack"
     CONFLUENCE = "confluence"
+    JIRA = "jira"
     GOOGLE_DRIVE = "google_drive"
     GMAIL = "gmail"
     DISCORD = "discord"
@@ -178,6 +180,21 @@ GOOGLE_DRIVE_CONNECTOR_SIZE_THRESHOLD = int(
     os.environ.get("GOOGLE_DRIVE_CONNECTOR_SIZE_THRESHOLD", 10 * 1024 * 1024)
 )
 
+JIRA_CONNECTOR_LABELS_TO_SKIP = [
+    ignored_tag
+    for ignored_tag in os.environ.get("JIRA_CONNECTOR_LABELS_TO_SKIP", "").split(",")
+    if ignored_tag
+]
+JIRA_CONNECTOR_MAX_TICKET_SIZE = int(
+    os.environ.get("JIRA_CONNECTOR_MAX_TICKET_SIZE", 100 * 1024)
+)
+JIRA_SYNC_TIME_BUFFER_SECONDS = int(
+    os.environ.get("JIRA_SYNC_TIME_BUFFER_SECONDS", ONE_MINUTE)
+)
+JIRA_TIMEZONE_OFFSET = float(
+    os.environ.get("JIRA_TIMEZONE_OFFSET", get_current_tz_offset())
+)
+
 OAUTH_SLACK_CLIENT_ID = os.environ.get("OAUTH_SLACK_CLIENT_ID", "")
 OAUTH_SLACK_CLIENT_SECRET = os.environ.get("OAUTH_SLACK_CLIENT_SECRET", "")
 OAUTH_CONFLUENCE_CLOUD_CLIENT_ID = os.environ.get(
diff --git a/common/data_source/confluence_connector.py b/common/data_source/confluence_connector.py
index aed16ad2b..821f79862 100644
--- a/common/data_source/confluence_connector.py
+++ b/common/data_source/confluence_connector.py
@@ -1788,6 +1788,7 @@ class ConfluenceConnector(
         cql_url = self.confluence_client.build_cql_url(
             page_query, expand=",".join(_PAGE_EXPANSION_FIELDS)
         )
+        logging.info(f"[Confluence Connector] Building CQL URL {cql_url}")
         return update_param_in_path(cql_url, "limit", str(limit))
 
     @override
diff --git a/common/data_source/jira/__init__.py b/common/data_source/jira/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/common/data_source/jira/connector.py b/common/data_source/jira/connector.py
new file mode 100644
index 000000000..4635d72f3
--- /dev/null
+++ b/common/data_source/jira/connector.py
@@ -0,0 +1,973 @@
+"""Checkpointed Jira connector that emits markdown blobs for each issue."""
+
+from __future__ import annotations
+
+import argparse
+import copy
+import logging
+import os
+import re
+from collections.abc import Callable, Generator, Iterable, Iterator, Sequence
+from datetime import datetime, timedelta, timezone
+from typing import Any
+from zoneinfo import ZoneInfo, ZoneInfoNotFoundError
+
+from jira import JIRA
+from jira.resources import Issue
+from pydantic import Field
+
+from common.data_source.config import (
+    INDEX_BATCH_SIZE,
+    JIRA_CONNECTOR_LABELS_TO_SKIP,
+    JIRA_CONNECTOR_MAX_TICKET_SIZE,
+    JIRA_TIMEZONE_OFFSET,
+    ONE_HOUR,
+    DocumentSource,
+)
+from common.data_source.exceptions import (
+    ConnectorMissingCredentialError,
+    ConnectorValidationError,
+    InsufficientPermissionsError,
+    UnexpectedValidationError,
+)
+from common.data_source.interfaces import (
+    CheckpointedConnectorWithPermSync,
+    CheckpointOutputWrapper,
+    SecondsSinceUnixEpoch,
+    SlimConnectorWithPermSync,
+)
+from common.data_source.jira.utils import (
+    JIRA_CLOUD_API_VERSION,
+    JIRA_SERVER_API_VERSION,
+    build_issue_url,
+    extract_body_text,
+    extract_named_value,
+    extract_user,
+    format_attachments,
+    format_comments,
+    parse_jira_datetime,
+    should_skip_issue,
+)
+from common.data_source.models import (
+    ConnectorCheckpoint,
+    ConnectorFailure,
+    Document,
+    DocumentFailure,
+    SlimDocument,
+)
+from common.data_source.utils import is_atlassian_cloud_url, is_atlassian_date_error, scoped_url
+
+logger = logging.getLogger(__name__)
+
+_DEFAULT_FIELDS = "summary,description,updated,created,status,priority,assignee,reporter,labels,issuetype,project,comment,attachment"
+_SLIM_FIELDS = "key,project"
+_MAX_RESULTS_FETCH_IDS = 5000
+_JIRA_SLIM_PAGE_SIZE = 500
+_JIRA_FULL_PAGE_SIZE = 50
+_DEFAULT_ATTACHMENT_SIZE_LIMIT = 10 * 1024 * 1024  # 10MB
+
+
+class JiraCheckpoint(ConnectorCheckpoint):
+    """Checkpoint that tracks which slice of the current JQL result set was emitted."""
+
+    start_at: int = 0
+    cursor: str | None = None
+    ids_done: bool = False
+    all_issue_ids: list[list[str]] = Field(default_factory=list)
+
+
+_TZ_OFFSET_PATTERN = re.compile(r"([+-])(\d{2})(:?)(\d{2})$")
+
+
+class JiraConnector(CheckpointedConnectorWithPermSync, SlimConnectorWithPermSync):
+    """Retrieve Jira issues and emit them as markdown documents."""
+
+    def __init__(
+        self,
+        jira_base_url: str,
+        project_key: str | None = None,
+        jql_query: str | None = None,
+        batch_size: int = INDEX_BATCH_SIZE,
+        include_comments: bool = True,
+        include_attachments: bool = False,
+        labels_to_skip: Sequence[str] | None = None,
+        comment_email_blacklist: Sequence[str] | None = None,
+        scoped_token: bool = False,
+        attachment_size_limit: int | None = None,
+        timezone_offset: float | None = None,
+    ) -> None:
+        if not jira_base_url:
+            raise ConnectorValidationError("Jira base URL must be provided.")
+
+        self.jira_base_url = jira_base_url.rstrip("/")
+        self.project_key = project_key
+        self.jql_query = jql_query
+        self.batch_size = batch_size
+        self.include_comments = include_comments
+        self.include_attachments = include_attachments
+        configured_labels = labels_to_skip or JIRA_CONNECTOR_LABELS_TO_SKIP
+        self.labels_to_skip = {label.lower() for label in configured_labels}
+        self.comment_email_blacklist = {email.lower() for email in comment_email_blacklist or []}
+        self.scoped_token = scoped_token
+        self.jira_client: JIRA | None = None
+
+        self.max_ticket_size = JIRA_CONNECTOR_MAX_TICKET_SIZE
+        self.attachment_size_limit = attachment_size_limit if attachment_size_limit and attachment_size_limit > 0 else _DEFAULT_ATTACHMENT_SIZE_LIMIT
+        self._fields_param = _DEFAULT_FIELDS
+        self._slim_fields = _SLIM_FIELDS
+
+        tz_offset_value = float(timezone_offset) if timezone_offset is not None else float(JIRA_TIMEZONE_OFFSET)
+        self.timezone_offset = tz_offset_value
+        self.timezone = timezone(offset=timedelta(hours=tz_offset_value))
+        self._timezone_overridden = timezone_offset is not None
+
+    # -------------------------------------------------------------------------
+    # Connector lifecycle helpers
+    # -------------------------------------------------------------------------
+
+    def load_credentials(self, credentials: dict[str, Any]) -> dict[str, Any] | None:
+        """Instantiate the Jira client using either an API token or username/password."""
+        jira_url_for_client = self.jira_base_url
+        if self.scoped_token:
+            if is_atlassian_cloud_url(self.jira_base_url):
+                try:
+                    jira_url_for_client = scoped_url(self.jira_base_url, "jira")
+                except ValueError as exc:
+                    raise ConnectorValidationError(str(exc)) from exc
+            else:
+                logger.warning(f"[Jira] Scoped token requested but Jira base URL {self.jira_base_url} does not appear to be an Atlassian Cloud domain; scoped token ignored.")
+
+        user_email = credentials.get("jira_user_email") or credentials.get("username")
+        api_token = credentials.get("jira_api_token") or credentials.get("token") or credentials.get("api_token")
+        password = credentials.get("jira_password") or credentials.get("password")
+        rest_api_version = credentials.get("rest_api_version")
+
+        if not rest_api_version:
+            rest_api_version = JIRA_CLOUD_API_VERSION if api_token else JIRA_SERVER_API_VERSION
+        options: dict[str, Any] = {"rest_api_version": rest_api_version}
+
+        try:
+            if user_email and api_token:
+                self.jira_client = JIRA(
+                    server=jira_url_for_client,
+                    basic_auth=(user_email, api_token),
+                    options=options,
+                )
+            elif api_token:
+                self.jira_client = JIRA(
+                    server=jira_url_for_client,
+                    token_auth=api_token,
+                    options=options,
+                )
+            elif user_email and password:
+                self.jira_client = JIRA(
+                    server=jira_url_for_client,
+                    basic_auth=(user_email, password),
+                    options=options,
+                )
+            else:
+                raise ConnectorMissingCredentialError("Jira credentials must include either an API token or username/password.")
+        except Exception as exc:  # pragma: no cover - jira lib raises many types
+            raise ConnectorMissingCredentialError(f"Jira: {exc}") from exc
+        self._sync_timezone_from_server()
+        return None
+
+    def validate_connector_settings(self) -> None:
+        """Validate connectivity by fetching basic Jira info."""
+        if not self.jira_client:
+            raise ConnectorMissingCredentialError("Jira")
+
+        try:
+            if self.jql_query:
+                dummy_checkpoint = self.build_dummy_checkpoint()
+                checkpoint_callback = self._make_checkpoint_callback(dummy_checkpoint)
+                iterator = self._perform_jql_search(
+                    jql=self.jql_query,
+                    start=0,
+                    max_results=1,
+                    fields="key",
+                    all_issue_ids=dummy_checkpoint.all_issue_ids,
+                    checkpoint_callback=checkpoint_callback,
+                    next_page_token=dummy_checkpoint.cursor,
+                    ids_done=dummy_checkpoint.ids_done,
+                )
+                next(iter(iterator), None)
+            elif self.project_key:
+                self.jira_client.project(self.project_key)
+            else:
+                self.jira_client.projects()
+        except Exception as exc:  # pragma: no cover - dependent on Jira responses
+            self._handle_validation_error(exc)
+
+    # -------------------------------------------------------------------------
+    # Checkpointed connector implementation
+    # -------------------------------------------------------------------------
+
+    def load_from_checkpoint(
+        self,
+        start: SecondsSinceUnixEpoch,
+        end: SecondsSinceUnixEpoch,
+        checkpoint: JiraCheckpoint,
+    ) -> Generator[Document | ConnectorFailure, None, JiraCheckpoint]:
+        """Load Jira issues, emitting a Document per issue."""
+        try:
+            return (yield from self._load_with_retry(start, end, checkpoint))
+        except Exception as exc:
+            logger.exception(f"[Jira] Jira query ultimately failed: {exc}")
+            yield ConnectorFailure(
+                failure_message=f"Failed to query Jira: {exc}",
+                exception=exc,
+            )
+            return JiraCheckpoint(has_more=False, start_at=checkpoint.start_at)
+
+    def load_from_checkpoint_with_perm_sync(
+        self,
+        start: SecondsSinceUnixEpoch,
+        end: SecondsSinceUnixEpoch,
+        checkpoint: JiraCheckpoint,
+    ) -> Generator[Document | ConnectorFailure, None, JiraCheckpoint]:
+        """Permissions are not synced separately, so reuse the standard loader."""
+        return (yield from self.load_from_checkpoint(start=start, end=end, checkpoint=checkpoint))
+
+    def _load_with_retry(
+        self,
+        start: SecondsSinceUnixEpoch,
+        end: SecondsSinceUnixEpoch,
+        checkpoint: JiraCheckpoint,
+    ) -> Generator[Document | ConnectorFailure, None, JiraCheckpoint]:
+        if not self.jira_client:
+            raise ConnectorMissingCredentialError("Jira")
+
+        attempt_start = start
+        retried_with_buffer = False
+        attempt = 0
+
+        while True:
+            attempt += 1
+            jql = self._build_jql(attempt_start, end)
+            logger.info(f"[Jira] Executing Jira JQL attempt {attempt} (start={attempt_start}, end={end}, buffered_retry={retried_with_buffer}): {jql}")
+            try:
+                return (yield from self._load_from_checkpoint_internal(jql, checkpoint, start_filter=start))
+            except Exception as exc:
+                if attempt_start is not None and not retried_with_buffer and is_atlassian_date_error(exc):
+                    attempt_start = attempt_start - ONE_HOUR
+                    retried_with_buffer = True
+                    logger.info(f"[Jira] Atlassian date error detected; retrying with start={attempt_start}.")
+                    continue
+                raise
+
+    def _handle_validation_error(self, exc: Exception) -> None:
+        status_code = getattr(exc, "status_code", None)
+        if status_code == 401:
+            raise InsufficientPermissionsError("Jira credential appears to be invalid or expired (HTTP 401).") from exc
+        if status_code == 403:
+            raise InsufficientPermissionsError("Jira token does not have permission to access the requested resources (HTTP 403).") from exc
+        if status_code == 404:
+            raise ConnectorValidationError("Jira resource not found (HTTP 404).") from exc
+        if status_code == 429:
+            raise ConnectorValidationError("Jira rate limit exceeded during validation (HTTP 429).") from exc
+
+        message = getattr(exc, "text", str(exc))
+        if not message:
+            raise UnexpectedValidationError("Unexpected Jira validation error.") from exc
+
+        raise ConnectorValidationError(f"Jira validation failed: {message}") from exc
+
+    def _load_from_checkpoint_internal(
+        self,
+        jql: str,
+        checkpoint: JiraCheckpoint,
+        start_filter: SecondsSinceUnixEpoch | None = None,
+    ) -> Generator[Document | ConnectorFailure, None, JiraCheckpoint]:
+        assert self.jira_client, "load_credentials must be called before loading issues."
+
+        page_size = self._full_page_size()
+        new_checkpoint = copy.deepcopy(checkpoint)
+        starting_offset = new_checkpoint.start_at or 0
+        current_offset = starting_offset
+        checkpoint_callback = self._make_checkpoint_callback(new_checkpoint)
+
+        issue_iter = self._perform_jql_search(
+            jql=jql,
+            start=current_offset,
+            max_results=page_size,
+            fields=self._fields_param,
+            all_issue_ids=new_checkpoint.all_issue_ids,
+            checkpoint_callback=checkpoint_callback,
+            next_page_token=new_checkpoint.cursor,
+            ids_done=new_checkpoint.ids_done,
+        )
+
+        start_cutoff = float(start_filter) if start_filter is not None else None
+
+        for issue in issue_iter:
+            current_offset += 1
+            issue_key = getattr(issue, "key", "unknown")
+            if should_skip_issue(issue, self.labels_to_skip):
+                continue
+
+            issue_updated = parse_jira_datetime(issue.raw.get("fields", {}).get("updated"))
+            if start_cutoff is not None and issue_updated is not None and issue_updated.timestamp() <= start_cutoff:
+                # Jira JQL only supports minute precision, so we discard already-processed
+                # issues here based on the original second-level cutoff.
+                continue
+
+            try:
+                document = self._issue_to_document(issue)
+            except Exception as exc:  # pragma: no cover - defensive
+                logger.exception(f"[Jira] Failed to convert Jira issue {issue_key}: {exc}")
+                yield ConnectorFailure(
+                    failure_message=f"Failed to convert Jira issue {issue_key}: {exc}",
+                    failed_document=DocumentFailure(
+                        document_id=issue_key,
+                        document_link=build_issue_url(self.jira_base_url, issue_key),
+                    ),
+                    exception=exc,
+                )
+                continue
+
+            if document is not None:
+                yield document
+                if self.include_attachments:
+                    for attachment_document in self._attachment_documents(issue):
+                        if attachment_document is not None:
+                            yield attachment_document
+
+        self._update_checkpoint_for_next_run(
+            checkpoint=new_checkpoint,
+            current_offset=current_offset,
+            starting_offset=starting_offset,
+            page_size=page_size,
+        )
+        new_checkpoint.start_at = current_offset
+        return new_checkpoint
+
+    def build_dummy_checkpoint(self) -> JiraCheckpoint:
+        """Create an empty checkpoint used to kick off ingestion."""
+        return JiraCheckpoint(has_more=True, start_at=0)
+
+    def validate_checkpoint_json(self, checkpoint_json: str) -> JiraCheckpoint:
+        """Validate a serialized checkpoint."""
+        return JiraCheckpoint.model_validate_json(checkpoint_json)
+
+    # -------------------------------------------------------------------------
+    # Slim connector implementation
+    # -------------------------------------------------------------------------
+
+    def retrieve_all_slim_docs_perm_sync(
+        self,
+        start: SecondsSinceUnixEpoch | None = None,
+        end: SecondsSinceUnixEpoch | None = None,
+        callback: Any = None,  # noqa: ARG002 - maintained for interface compatibility
+    ) -> Generator[list[SlimDocument], None, None]:
+        """Return lightweight references to Jira issues (used for permission syncing)."""
+        if not self.jira_client:
+            raise ConnectorMissingCredentialError("Jira")
+
+        start_ts = start if start is not None else 0
+        end_ts = end if end is not None else datetime.now(timezone.utc).timestamp()
+        jql = self._build_jql(start_ts, end_ts)
+
+        checkpoint = self.build_dummy_checkpoint()
+        checkpoint_callback = self._make_checkpoint_callback(checkpoint)
+        prev_offset = 0
+        current_offset = 0
+        slim_batch: list[SlimDocument] = []
+
+        while checkpoint.has_more:
+            for issue in self._perform_jql_search(
+                jql=jql,
+                start=current_offset,
+                max_results=_JIRA_SLIM_PAGE_SIZE,
+                fields=self._slim_fields,
+                all_issue_ids=checkpoint.all_issue_ids,
+                checkpoint_callback=checkpoint_callback,
+                next_page_token=checkpoint.cursor,
+                ids_done=checkpoint.ids_done,
+            ):
+                current_offset += 1
+                if should_skip_issue(issue, self.labels_to_skip):
+                    continue
+
+                doc_id = build_issue_url(self.jira_base_url, issue.key)
+                slim_batch.append(SlimDocument(id=doc_id))
+
+                if len(slim_batch) >= _JIRA_SLIM_PAGE_SIZE:
+                    yield slim_batch
+                    slim_batch = []
+
+            self._update_checkpoint_for_next_run(
+                checkpoint=checkpoint,
+                current_offset=current_offset,
+                starting_offset=prev_offset,
+                page_size=_JIRA_SLIM_PAGE_SIZE,
+            )
+            prev_offset = current_offset
+
+        if slim_batch:
+            yield slim_batch
+
+    # -------------------------------------------------------------------------
+    # Internal helpers
+    # -------------------------------------------------------------------------
+
+    def _build_jql(self, start: SecondsSinceUnixEpoch, end: SecondsSinceUnixEpoch) -> str:
+        clauses: list[str] = []
+        if self.jql_query:
+            clauses.append(f"({self.jql_query})")
+        elif self.project_key:
+            clauses.append(f'project = "{self.project_key}"')
+        else:
+            raise ConnectorValidationError("Either project_key or jql_query must be provided for Jira connector.")
+
+        if self.labels_to_skip:
+            labels = ", ".join(f'"{label}"' for label in self.labels_to_skip)
+            clauses.append(f"labels NOT IN ({labels})")
+
+        if start is not None:
+            clauses.append(f'updated >= "{self._format_jql_time(start)}"')
+        if end is not None:
+            clauses.append(f'updated <= "{self._format_jql_time(end)}"')
+
+        if not clauses:
+            raise ConnectorValidationError("Unable to build Jira JQL query.")
+
+        jql = " AND ".join(clauses)
+        if "order by" not in jql.lower():
+            jql = f"{jql} ORDER BY updated ASC"
+        return jql
+
+    def _format_jql_time(self, timestamp: SecondsSinceUnixEpoch) -> str:
+        dt_utc = datetime.fromtimestamp(float(timestamp), tz=timezone.utc)
+        dt_local = dt_utc.astimezone(self.timezone)
+        # Jira only accepts minute-precision timestamps in JQL, so we format accordingly
+        # and rely on a post-query second-level filter to avoid duplicates.
+        return dt_local.strftime("%Y-%m-%d %H:%M")
+
+    def _issue_to_document(self, issue: Issue) -> Document | None:
+        fields = issue.raw.get("fields", {})
+        summary = fields.get("summary") or ""
+        description_text = extract_body_text(fields.get("description"))
+        comments_text = (
+            format_comments(
+                fields.get("comment"),
+                blacklist=self.comment_email_blacklist,
+            )
+            if self.include_comments
+            else ""
+        )
+        attachments_text = format_attachments(fields.get("attachment"))
+
+        reporter_name, reporter_email = extract_user(fields.get("reporter"))
+        assignee_name, assignee_email = extract_user(fields.get("assignee"))
+        status = extract_named_value(fields.get("status"))
+        priority = extract_named_value(fields.get("priority"))
+        issue_type = extract_named_value(fields.get("issuetype"))
+        project = fields.get("project") or {}
+
+        issue_url = build_issue_url(self.jira_base_url, issue.key)
+
+        metadata_lines = [
+            f"key: {issue.key}",
+            f"url: {issue_url}",
+            f"summary: {summary}",
+            f"status: {status or 'Unknown'}",
+            f"priority: {priority or 'Unspecified'}",
+            f"issue_type: {issue_type or 'Unknown'}",
+            f"project: {project.get('name') or ''}",
+            f"project_key: {project.get('key') or self.project_key or ''}",
+        ]
+
+        if reporter_name:
+            metadata_lines.append(f"reporter: {reporter_name}")
+        if reporter_email:
+            metadata_lines.append(f"reporter_email: {reporter_email}")
+        if assignee_name:
+            metadata_lines.append(f"assignee: {assignee_name}")
+        if assignee_email:
+            metadata_lines.append(f"assignee_email: {assignee_email}")
+        if fields.get("labels"):
+            metadata_lines.append(f"labels: {', '.join(fields.get('labels'))}")
+
+        created_dt = parse_jira_datetime(fields.get("created"))
+        updated_dt = parse_jira_datetime(fields.get("updated")) or created_dt or datetime.now(timezone.utc)
+        metadata_lines.append(f"created: {created_dt.isoformat() if created_dt else ''}")
+        metadata_lines.append(f"updated: {updated_dt.isoformat() if updated_dt else ''}")
+
+        sections: list[str] = [
+            "---",
+            "\n".join(filter(None, metadata_lines)),
+            "---",
+            "",
+            "## Description",
+            description_text or "No description provided.",
+        ]
+
+        if comments_text:
+            sections.extend(["", "## Comments", comments_text])
+        if attachments_text:
+            sections.extend(["", "## Attachments", attachments_text])
+
+        blob_text = "\n".join(sections).strip() + "\n"
+        blob = blob_text.encode("utf-8")
+
+        if len(blob) > self.max_ticket_size:
+            logger.info(f"[Jira] Skipping {issue.key} because it exceeds the maximum size of {self.max_ticket_size} bytes.")
+            return None
+
+        semantic_identifier = f"{issue.key}: {summary}" if summary else issue.key
+
+        return Document(
+            id=issue_url,
+            source=DocumentSource.JIRA,
+            semantic_identifier=semantic_identifier,
+            extension=".md",
+            blob=blob,
+            doc_updated_at=updated_dt,
+            size_bytes=len(blob),
+        )
+
+    def _attachment_documents(self, issue: Issue) -> Iterable[Document]:
+        attachments = issue.raw.get("fields", {}).get("attachment") or []
+        for attachment in attachments:
+            try:
+                document = self._attachment_to_document(issue, attachment)
+                if document is not None:
+                    yield document
+            except Exception as exc:  # pragma: no cover - defensive
+                failed_id = attachment.get("id") or attachment.get("filename")
+                issue_key = getattr(issue, "key", "unknown")
+                logger.warning(f"[Jira] Failed to process attachment {failed_id} for issue {issue_key}: {exc}")
+
+    def _attachment_to_document(self, issue: Issue, attachment: dict[str, Any]) -> Document | None:
+        if not self.include_attachments:
+            return None
+
+        filename = attachment.get("filename")
+        content_url = attachment.get("content")
+        if not filename or not content_url:
+            return None
+
+        try:
+            attachment_size = int(attachment.get("size", 0))
+        except (TypeError, ValueError):
+            attachment_size = 0
+        if attachment_size and attachment_size > self.attachment_size_limit:
+            logger.info(f"[Jira] Skipping attachment {filename} on {issue.key} because reported size exceeds limit ({self.attachment_size_limit} bytes).")
+            return None
+
+        blob = self._download_attachment(content_url)
+        if blob is None:
+            return None
+
+        if len(blob) > self.attachment_size_limit:
+            logger.info(f"[Jira] Skipping attachment {filename} on {issue.key} because it exceeds the size limit ({self.attachment_size_limit} bytes).")
+            return None
+
+        attachment_time = parse_jira_datetime(attachment.get("created")) or parse_jira_datetime(attachment.get("updated"))
+        updated_dt = attachment_time or parse_jira_datetime(issue.raw.get("fields", {}).get("updated")) or datetime.now(timezone.utc)
+
+        extension = os.path.splitext(filename)[1] or ""
+        document_id = f"{issue.key}::attachment::{attachment.get('id') or filename}"
+        semantic_identifier = f"{issue.key} attachment: {filename}"
+
+        return Document(
+            id=document_id,
+            source=DocumentSource.JIRA,
+            semantic_identifier=semantic_identifier,
+            extension=extension,
+            blob=blob,
+            doc_updated_at=updated_dt,
+            size_bytes=len(blob),
+        )
+
+    def _download_attachment(self, url: str) -> bytes | None:
+        if not self.jira_client:
+            raise ConnectorMissingCredentialError("Jira")
+        response = self.jira_client._session.get(url)
+        response.raise_for_status()
+        return response.content
+
+    def _sync_timezone_from_server(self) -> None:
+        if self._timezone_overridden or not self.jira_client:
+            return
+        try:
+            server_info = self.jira_client.server_info()
+        except Exception as exc:  # pragma: no cover - defensive
+            logger.info(f"[Jira] Unable to determine timezone from server info; continuing with offset {self.timezone_offset}. Error: {exc}")
+            return
+
+        detected_offset = self._extract_timezone_offset(server_info)
+        if detected_offset is None or detected_offset == self.timezone_offset:
+            return
+
+        self.timezone_offset = detected_offset
+        self.timezone = timezone(offset=timedelta(hours=detected_offset))
+        logger.info(f"[Jira] Timezone offset adjusted to {detected_offset} hours using Jira server info.")
+
+    def _extract_timezone_offset(self, server_info: dict[str, Any]) -> float | None:
+        server_time_raw = server_info.get("serverTime")
+        if isinstance(server_time_raw, str):
+            offset = self._parse_offset_from_datetime_string(server_time_raw)
+            if offset is not None:
+                return offset
+
+        tz_name = server_info.get("timeZone")
+        if isinstance(tz_name, str):
+            offset = self._offset_from_zone_name(tz_name)
+            if offset is not None:
+                return offset
+        return None
+
+    @staticmethod
+    def _parse_offset_from_datetime_string(value: str) -> float | None:
+        normalized = JiraConnector._normalize_datetime_string(value)
+        try:
+            dt = datetime.fromisoformat(normalized)
+        except ValueError:
+            return None
+
+        if dt.tzinfo is None:
+            return 0.0
+
+        offset = dt.tzinfo.utcoffset(dt)
+        if offset is None:
+            return None
+        return offset.total_seconds() / 3600.0
+
+    @staticmethod
+    def _normalize_datetime_string(value: str) -> str:
+        trimmed = (value or "").strip()
+        if trimmed.endswith("Z"):
+            return f"{trimmed[:-1]}+00:00"
+
+        match = _TZ_OFFSET_PATTERN.search(trimmed)
+        if match and match.group(3) != ":":
+            sign, hours, _, minutes = match.groups()
+            trimmed = f"{trimmed[: match.start()]}{sign}{hours}:{minutes}"
+        return trimmed
+
+    @staticmethod
+    def _offset_from_zone_name(name: str) -> float | None:
+        try:
+            tz = ZoneInfo(name)
+        except (ZoneInfoNotFoundError, ValueError):
+            return None
+        reference = datetime.now(tz)
+        offset = reference.utcoffset()
+        if offset is None:
+            return None
+        return offset.total_seconds() / 3600.0
+
+    def _is_cloud_client(self) -> bool:
+        if not self.jira_client:
+            return False
+        rest_version = str(self.jira_client._options.get("rest_api_version", "")).strip()
+        return rest_version == str(JIRA_CLOUD_API_VERSION)
+
+    def _full_page_size(self) -> int:
+        return max(1, min(self.batch_size, _JIRA_FULL_PAGE_SIZE))
+
+    def _perform_jql_search(
+        self,
+        *,
+        jql: str,
+        start: int,
+        max_results: int,
+        fields: str | None = None,
+        all_issue_ids: list[list[str]] | None = None,
+        checkpoint_callback: Callable[[Iterable[list[str]], str | None], None] | None = None,
+        next_page_token: str | None = None,
+        ids_done: bool = False,
+    ) -> Iterable[Issue]:
+        assert self.jira_client, "Jira client not initialized."
+        is_cloud = self._is_cloud_client()
+        if is_cloud:
+            if all_issue_ids is None:
+                raise ValueError("all_issue_ids is required for Jira Cloud searches.")
+            yield from self._perform_jql_search_v3(
+                jql=jql,
+                max_results=max_results,
+                fields=fields,
+                all_issue_ids=all_issue_ids,
+                checkpoint_callback=checkpoint_callback,
+                next_page_token=next_page_token,
+                ids_done=ids_done,
+            )
+        else:
+            yield from self._perform_jql_search_v2(
+                jql=jql,
+                start=start,
+                max_results=max_results,
+                fields=fields,
+            )
+
+    def _perform_jql_search_v3(
+        self,
+        *,
+        jql: str,
+        max_results: int,
+        all_issue_ids: list[list[str]],
+        fields: str | None = None,
+        checkpoint_callback: Callable[[Iterable[list[str]], str | None], None] | None = None,
+        next_page_token: str | None = None,
+        ids_done: bool = False,
+    ) -> Iterable[Issue]:
+        assert self.jira_client, "Jira client not initialized."
+
+        if not ids_done:
+            new_ids, page_token = self._enhanced_search_ids(jql, next_page_token)
+            if checkpoint_callback is not None and new_ids:
+                checkpoint_callback(
+                    self._chunk_issue_ids(new_ids, max_results),
+                    page_token,
+                )
+            elif checkpoint_callback is not None:
+                checkpoint_callback([], page_token)
+
+        if all_issue_ids:
+            issue_ids = all_issue_ids.pop()
+            if issue_ids:
+                yield from self._bulk_fetch_issues(issue_ids, fields)
+
+    def _perform_jql_search_v2(
+        self,
+        *,
+        jql: str,
+        start: int,
+        max_results: int,
+        fields: str | None = None,
+    ) -> Iterable[Issue]:
+        assert self.jira_client, "Jira client not initialized."
+
+        issues = self.jira_client.search_issues(
+            jql_str=jql,
+            startAt=start,
+            maxResults=max_results,
+            fields=fields or self._fields_param,
+            expand="renderedFields",
+        )
+        for issue in issues:
+            yield issue
+
+    def _enhanced_search_ids(
+        self,
+        jql: str,
+        next_page_token: str | None,
+    ) -> tuple[list[str], str | None]:
+        assert self.jira_client, "Jira client not initialized."
+        enhanced_search_path = self.jira_client._get_url("search/jql")
+        params: dict[str, str | int | None] = {
+            "jql": jql,
+            "maxResults": _MAX_RESULTS_FETCH_IDS,
+            "nextPageToken": next_page_token,
+            "fields": "id",
+        }
+        response = self.jira_client._session.get(enhanced_search_path, params=params)
+        response.raise_for_status()
+        data = response.json()
+        return [str(issue["id"]) for issue in data.get("issues", [])], data.get("nextPageToken")
+
+    def _bulk_fetch_issues(
+        self,
+        issue_ids: list[str],
+        fields: str | None,
+    ) -> Iterable[Issue]:
+        assert self.jira_client, "Jira client not initialized."
+        if not issue_ids:
+            return []
+
+        bulk_fetch_path = self.jira_client._get_url("issue/bulkfetch")
+        payload: dict[str, Any] = {"issueIdsOrKeys": issue_ids}
+        payload["fields"] = fields.split(",") if fields else ["*all"]
+
+        response = self.jira_client._session.post(bulk_fetch_path, json=payload)
+        response.raise_for_status()
+        data = response.json()
+        return [Issue(self.jira_client._options, self.jira_client._session, raw=issue) for issue in data.get("issues", [])]
+
+    @staticmethod
+    def _chunk_issue_ids(issue_ids: list[str], chunk_size: int) -> Iterable[list[str]]:
+        if chunk_size <= 0:
+            chunk_size = _JIRA_FULL_PAGE_SIZE
+
+        for idx in range(0, len(issue_ids), chunk_size):
+            yield issue_ids[idx : idx + chunk_size]
+
+    def _make_checkpoint_callback(self, checkpoint: JiraCheckpoint) -> Callable[[Iterable[list[str]], str | None], None]:
+        def checkpoint_callback(
+            issue_ids: Iterable[list[str]] | list[list[str]],
+            page_token: str | None,
+        ) -> None:
+            for id_batch in issue_ids:
+                checkpoint.all_issue_ids.append(list(id_batch))
+            checkpoint.cursor = page_token
+            checkpoint.ids_done = page_token is None
+
+        return checkpoint_callback
+
+    def _update_checkpoint_for_next_run(
+        self,
+        *,
+        checkpoint: JiraCheckpoint,
+        current_offset: int,
+        starting_offset: int,
+        page_size: int,
+    ) -> None:
+        if self._is_cloud_client():
+            checkpoint.has_more = bool(checkpoint.all_issue_ids) or not checkpoint.ids_done
+        else:
+            checkpoint.has_more = current_offset - starting_offset == page_size
+            checkpoint.cursor = None
+            checkpoint.ids_done = True
+            checkpoint.all_issue_ids = []
+
+
+def iterate_jira_documents(
+    connector: "JiraConnector",
+    start: SecondsSinceUnixEpoch,
+    end: SecondsSinceUnixEpoch,
+    iteration_limit: int = 100_000,
+) -> Iterator[Document]:
+    """Yield documents without materializing the entire result set."""
+
+    checkpoint = connector.build_dummy_checkpoint()
+    iterations = 0
+
+    while checkpoint.has_more:
+        wrapper = CheckpointOutputWrapper[JiraCheckpoint]()
+        generator = wrapper(connector.load_from_checkpoint(start=start, end=end, checkpoint=checkpoint))
+
+        for document, failure, next_checkpoint in generator:
+            if failure is not None:
+                failure_message = getattr(failure, "failure_message", str(failure))
+                raise RuntimeError(f"Failed to load Jira documents: {failure_message}")
+            if document is not None:
+                yield document
+            if next_checkpoint is not None:
+                checkpoint = next_checkpoint
+
+        iterations += 1
+        if iterations > iteration_limit:
+            raise RuntimeError("Too many iterations while loading Jira documents.")
+
+
+def test_jira(
+    *,
+    base_url: str,
+    project_key: str | None = None,
+    jql_query: str | None = None,
+    credentials: dict[str, Any],
+    batch_size: int = INDEX_BATCH_SIZE,
+    start_ts: float | None = None,
+    end_ts: float | None = None,
+    connector_options: dict[str, Any] | None = None,
+) -> list[Document]:
+    """Programmatic entry point that mirrors the CLI workflow."""
+
+    connector_kwargs = connector_options.copy() if connector_options else {}
+    connector = JiraConnector(
+        jira_base_url=base_url,
+        project_key=project_key,
+        jql_query=jql_query,
+        batch_size=batch_size,
+        **connector_kwargs,
+    )
+    connector.load_credentials(credentials)
+    connector.validate_connector_settings()
+
+    now_ts = datetime.now(timezone.utc).timestamp()
+    start = start_ts if start_ts is not None else 0.0
+    end = end_ts if end_ts is not None else now_ts
+
+    documents = list(iterate_jira_documents(connector, start=start, end=end))
+    logger.info(f"[Jira] Fetched {len(documents)} Jira documents.")
+    for doc in documents[:5]:
+        logger.info(f"[Jira] Document {doc.semantic_identifier} ({doc.id}) size={doc.size_bytes} bytes")
+    return documents
+
+
+def _build_arg_parser() -> argparse.ArgumentParser:
+    parser = argparse.ArgumentParser(description="Fetch Jira issues and print summary statistics.")
+    parser.add_argument("--base-url", dest="base_url", default=os.environ.get("JIRA_BASE_URL"))
+    parser.add_argument("--project", dest="project_key", default=os.environ.get("JIRA_PROJECT_KEY"))
+    parser.add_argument("--jql", dest="jql_query", default=os.environ.get("JIRA_JQL"))
+    parser.add_argument("--email", dest="user_email", default=os.environ.get("JIRA_USER_EMAIL"))
+    parser.add_argument("--token", dest="api_token", default=os.environ.get("JIRA_API_TOKEN"))
+    parser.add_argument("--password", dest="password", default=os.environ.get("JIRA_PASSWORD"))
+    parser.add_argument("--batch-size", dest="batch_size", type=int, default=int(os.environ.get("JIRA_BATCH_SIZE", INDEX_BATCH_SIZE)))
+    parser.add_argument("--include_comments", dest="include_comments", type=bool, default=True)
+    parser.add_argument("--include_attachments", dest="include_attachments", type=bool, default=True)
+    parser.add_argument("--attachment_size_limit", dest="attachment_size_limit", type=float, default=_DEFAULT_ATTACHMENT_SIZE_LIMIT)
+    parser.add_argument("--start-ts", dest="start_ts", type=float, default=None, help="Epoch seconds inclusive lower bound for updated issues.")
+    parser.add_argument("--end-ts", dest="end_ts", type=float, default=9999999999, help="Epoch seconds inclusive upper bound for updated issues.")
+    return parser
+
+
+def main(config: dict[str, Any] | None = None) -> None:
+    if config is None:
+        args = _build_arg_parser().parse_args()
+        config = {
+            "base_url": args.base_url,
+            "project_key": args.project_key,
+            "jql_query": args.jql_query,
+            "batch_size": args.batch_size,
+            "start_ts": args.start_ts,
+            "end_ts": args.end_ts,
+            "include_comments": args.include_comments,
+            "include_attachments": args.include_attachments,
+            "attachment_size_limit": args.attachment_size_limit,
+            "credentials": {
+                "jira_user_email": args.user_email,
+                "jira_api_token": args.api_token,
+                "jira_password": args.password,
+            },
+        }
+
+    base_url = config.get("base_url")
+    credentials = config.get("credentials", {})
+
+    print(f"[Jira] {config=}", flush=True)
+    print(f"[Jira] {credentials=}", flush=True)
+
+    if not base_url:
+        raise RuntimeError("Jira base URL must be provided via config or CLI arguments.")
+    if not (credentials.get("jira_api_token") or (credentials.get("jira_user_email") and credentials.get("jira_password"))):
+        raise RuntimeError("Provide either an API token or both email/password for Jira authentication.")
+
+    connector_options = {
+        key: value
+        for key, value in (
+            ("include_comments", config.get("include_comments")),
+            ("include_attachments", config.get("include_attachments")),
+            ("attachment_size_limit", config.get("attachment_size_limit")),
+            ("labels_to_skip", config.get("labels_to_skip")),
+            ("comment_email_blacklist", config.get("comment_email_blacklist")),
+            ("scoped_token", config.get("scoped_token")),
+            ("timezone_offset", config.get("timezone_offset")),
+        )
+        if value is not None
+    }
+
+    documents = test_jira(
+        base_url=base_url,
+        project_key=config.get("project_key"),
+        jql_query=config.get("jql_query"),
+        credentials=credentials,
+        batch_size=config.get("batch_size", INDEX_BATCH_SIZE),
+        start_ts=config.get("start_ts"),
+        end_ts=config.get("end_ts"),
+        connector_options=connector_options,
+    )
+
+    preview_count = min(len(documents), 5)
+    for idx in range(preview_count):
+        doc = documents[idx]
+        print(f"[Jira] [Sample {idx + 1}] {doc.semantic_identifier} | id={doc.id} | size={doc.size_bytes} bytes")
+
+    print(f"[Jira] Jira connector test completed. Documents fetched: {len(documents)}")
+
+
+if __name__ == "__main__":  # pragma: no cover - manual execution path
+    logging.basicConfig(level=logging.DEBUG, format="%(asctime)s %(levelname)s %(name)s %(message)s")
+    main()
diff --git a/common/data_source/jira/utils.py b/common/data_source/jira/utils.py
new file mode 100644
index 000000000..62219d36d
--- /dev/null
+++ b/common/data_source/jira/utils.py
@@ -0,0 +1,149 @@
+"""Helper utilities for the Jira connector."""
+
+from __future__ import annotations
+
+import os
+from collections.abc import Collection
+from datetime import datetime, timezone
+from typing import Any, Iterable
+
+from jira.resources import Issue
+
+from common.data_source.utils import datetime_from_string
+
+JIRA_SERVER_API_VERSION = os.environ.get("JIRA_SERVER_API_VERSION", "2")
+JIRA_CLOUD_API_VERSION = os.environ.get("JIRA_CLOUD_API_VERSION", "3")
+
+
+def build_issue_url(base_url: str, issue_key: str) -> str:
+    """Return the canonical UI URL for a Jira issue."""
+    return f"{base_url.rstrip('/')}/browse/{issue_key}"
+
+
+def parse_jira_datetime(value: Any) -> datetime | None:
+    """Best-effort parse of Jira datetime values to aware UTC datetimes."""
+    if value is None:
+        return None
+    if isinstance(value, datetime):
+        return value.astimezone(timezone.utc) if value.tzinfo else value.replace(tzinfo=timezone.utc)
+    if isinstance(value, str):
+        return datetime_from_string(value)
+    return None
+
+
+def extract_named_value(value: Any) -> str | None:
+    """Extract a readable string out of Jira's typed objects."""
+    if value is None:
+        return None
+    if isinstance(value, str):
+        return value
+    if isinstance(value, dict):
+        return value.get("name") or value.get("value")
+    return getattr(value, "name", None)
+
+
+def extract_user(value: Any) -> tuple[str | None, str | None]:
+    """Return display name + email tuple for a Jira user blob."""
+    if value is None:
+        return None, None
+    if isinstance(value, dict):
+        return value.get("displayName"), value.get("emailAddress")
+
+    display = getattr(value, "displayName", None)
+    email = getattr(value, "emailAddress", None)
+    return display, email
+
+
+def extract_text_from_adf(adf: Any) -> str:
+    """Flatten Atlassian Document Format (ADF) structures to text."""
+    texts: list[str] = []
+
+    def _walk(node: Any) -> None:
+        if node is None:
+            return
+        if isinstance(node, dict):
+            node_type = node.get("type")
+            if node_type == "text":
+                texts.append(node.get("text", ""))
+            for child in node.get("content", []):
+                _walk(child)
+        elif isinstance(node, list):
+            for child in node:
+                _walk(child)
+
+    _walk(adf)
+    return "\n".join(part for part in texts if part)
+
+
+def extract_body_text(value: Any) -> str:
+    """Normalize Jira description/comments (raw/adf/str) into plain text."""
+    if value is None:
+        return ""
+    if isinstance(value, str):
+        return value.strip()
+    if isinstance(value, dict):
+        return extract_text_from_adf(value).strip()
+    return str(value).strip()
+
+
+def format_comments(
+    comment_block: Any,
+    *,
+    blacklist: Collection[str],
+) -> str:
+    """Convert Jira comments into a markdown-ish bullet list."""
+    if not isinstance(comment_block, dict):
+        return ""
+
+    comments = comment_block.get("comments") or []
+    lines: list[str] = []
+    normalized_blacklist = {email.lower() for email in blacklist if email}
+
+    for comment in comments:
+        author = comment.get("author") or {}
+        author_email = (author.get("emailAddress") or "").lower()
+        if author_email and author_email in normalized_blacklist:
+            continue
+
+        author_name = author.get("displayName") or author.get("name") or author_email or "Unknown"
+        created = parse_jira_datetime(comment.get("created"))
+        created_str = created.isoformat() if created else "Unknown time"
+        body = extract_body_text(comment.get("body"))
+        if not body:
+            continue
+
+        lines.append(f"- {author_name} ({created_str}):\n{body}")
+
+    return "\n\n".join(lines)
+
+
+def format_attachments(attachments: Any) -> str:
+    """List Jira attachments as bullet points."""
+    if not isinstance(attachments, list):
+        return ""
+
+    attachment_lines: list[str] = []
+    for attachment in attachments:
+        filename = attachment.get("filename")
+        if not filename:
+            continue
+        size = attachment.get("size")
+        size_text = f" ({size} bytes)" if isinstance(size, int) else ""
+        content_url = attachment.get("content") or ""
+        url_suffix = f" -> {content_url}" if content_url else ""
+        attachment_lines.append(f"- {filename}{size_text}{url_suffix}")
+
+    return "\n".join(attachment_lines)
+
+
+def should_skip_issue(issue: Issue, labels_to_skip: set[str]) -> bool:
+    """Return True if the issue contains any label from the skip list."""
+    if not labels_to_skip:
+        return False
+
+    fields = getattr(issue, "raw", {}).get("fields", {})
+    labels: Iterable[str] = fields.get("labels") or []
+    for label in labels:
+        if (label or "").lower() in labels_to_skip:
+            return True
+    return False
diff --git a/common/data_source/jira_connector.py b/common/data_source/jira_connector.py
deleted file mode 100644
index 4d6f1160e..000000000
--- a/common/data_source/jira_connector.py
+++ /dev/null
@@ -1,112 +0,0 @@
-"""Jira connector"""
-
-from typing import Any
-
-from jira import JIRA
-
-from common.data_source.config import INDEX_BATCH_SIZE
-from common.data_source.exceptions import (
-    ConnectorValidationError,
-    InsufficientPermissionsError,
-    UnexpectedValidationError, ConnectorMissingCredentialError
-)
-from common.data_source.interfaces import (
-    CheckpointedConnectorWithPermSync,
-    SecondsSinceUnixEpoch,
-    SlimConnectorWithPermSync
-)
-from common.data_source.models import (
-    ConnectorCheckpoint
-)
-
-
-class JiraConnector(CheckpointedConnectorWithPermSync, SlimConnectorWithPermSync):
-    """Jira connector for accessing Jira issues and projects"""
-
-    def __init__(self, batch_size: int = INDEX_BATCH_SIZE) -> None:
-        self.batch_size = batch_size
-        self.jira_client: JIRA | None = None
-
-    def load_credentials(self, credentials: dict[str, Any]) -> dict[str, Any] | None:
-        """Load Jira credentials"""
-        try:
-            url = credentials.get("url")
-            username = credentials.get("username")
-            password = credentials.get("password")
-            token = credentials.get("token")
-            
-            if not url:
-                raise ConnectorMissingCredentialError("Jira URL is required")
-            
-            if token:
-                # API token authentication
-                self.jira_client = JIRA(server=url, token_auth=token)
-            elif username and password:
-                # Basic authentication
-                self.jira_client = JIRA(server=url, basic_auth=(username, password))
-            else:
-                raise ConnectorMissingCredentialError("Jira credentials are incomplete")
-            
-            return None
-        except Exception as e:
-            raise ConnectorMissingCredentialError(f"Jira: {e}")
-
-    def validate_connector_settings(self) -> None:
-        """Validate Jira connector settings"""
-        if not self.jira_client:
-            raise ConnectorMissingCredentialError("Jira")
-        
-        try:
-            # Test connection by getting server info
-            self.jira_client.server_info()
-        except Exception as e:
-            if "401" in str(e) or "403" in str(e):
-                raise InsufficientPermissionsError("Invalid credentials or insufficient permissions")
-            elif "404" in str(e):
-                raise ConnectorValidationError("Jira instance not found")
-            else:
-                raise UnexpectedValidationError(f"Jira validation error: {e}")
-
-    def poll_source(self, start: SecondsSinceUnixEpoch, end: SecondsSinceUnixEpoch) -> Any:
-        """Poll Jira for recent issues"""
-        # Simplified implementation - in production this would handle actual polling
-        return []
-
-    def load_from_checkpoint(
-        self,
-        start: SecondsSinceUnixEpoch,
-        end: SecondsSinceUnixEpoch,
-        checkpoint: ConnectorCheckpoint,
-    ) -> Any:
-        """Load documents from checkpoint"""
-        # Simplified implementation
-        return []
-
-    def load_from_checkpoint_with_perm_sync(
-        self,
-        start: SecondsSinceUnixEpoch,
-        end: SecondsSinceUnixEpoch,
-        checkpoint: ConnectorCheckpoint,
-    ) -> Any:
-        """Load documents from checkpoint with permission sync"""
-        # Simplified implementation
-        return []
-
-    def build_dummy_checkpoint(self) -> ConnectorCheckpoint:
-        """Build dummy checkpoint"""
-        return ConnectorCheckpoint()
-
-    def validate_checkpoint_json(self, checkpoint_json: str) -> ConnectorCheckpoint:
-        """Validate checkpoint JSON"""
-        # Simplified implementation
-        return ConnectorCheckpoint()
-
-    def retrieve_all_slim_docs_perm_sync(
-        self,
-        start: SecondsSinceUnixEpoch | None = None,
-        end: SecondsSinceUnixEpoch | None = None,
-        callback: Any = None,
-    ) -> Any:
-        """Retrieve all simplified documents with permission sync"""
-        # Simplified implementation
-        return []
\ No newline at end of file
diff --git a/common/data_source/utils.py b/common/data_source/utils.py
index 7c2cdf898..b42c3833b 100644
--- a/common/data_source/utils.py
+++ b/common/data_source/utils.py
@@ -48,17 +48,35 @@ from common.data_source.exceptions import RateLimitTriedTooManyTimesError
 from common.data_source.interfaces import CT, CheckpointedConnector, CheckpointOutputWrapper, ConfluenceUser, LoadFunction, OnyxExtensionType, SecondsSinceUnixEpoch, TokenResponse
 from common.data_source.models import BasicExpertInfo, Document
 
+_TZ_SUFFIX_PATTERN = re.compile(r"([+-])([\d:]+)$")
+
 
 def datetime_from_string(datetime_string: str) -> datetime:
     datetime_string = datetime_string.strip()
 
+    match_jira_format = _TZ_SUFFIX_PATTERN.search(datetime_string)
+    if match_jira_format:
+        sign, tz_field = match_jira_format.groups()
+        digits = tz_field.replace(":", "")
+
+        if digits.isdigit() and 1 <= len(digits) <= 4:
+            if len(digits) >= 3:
+                hours = digits[:-2].rjust(2, "0")
+                minutes = digits[-2:]
+            else:
+                hours = digits.rjust(2, "0")
+                minutes = "00"
+
+            normalized = f"{sign}{hours}:{minutes}"
+            datetime_string = f"{datetime_string[: match_jira_format.start()]}{normalized}"
+
     # Handle the case where the datetime string ends with 'Z' (Zulu time)
-    if datetime_string.endswith('Z'):
-        datetime_string = datetime_string[:-1] + '+00:00'
+    if datetime_string.endswith("Z"):
+        datetime_string = datetime_string[:-1] + "+00:00"
 
     # Handle timezone format "+0000" -> "+00:00"
-    if datetime_string.endswith('+0000'):
-        datetime_string = datetime_string[:-5] + '+00:00'
+    if datetime_string.endswith("+0000"):
+        datetime_string = datetime_string[:-5] + "+00:00"
 
     datetime_object = datetime.fromisoformat(datetime_string)
 
@@ -480,7 +498,7 @@ def get_file_ext(file_name: str) -> str:
 
 
 def is_accepted_file_ext(file_ext: str, extension_type: OnyxExtensionType) -> bool:
-    image_extensions = {'.jpg', '.jpeg', '.png', '.gif', '.bmp', '.tiff', '.webp'}
+    image_extensions = {".jpg", ".jpeg", ".png", ".gif", ".bmp", ".tiff", ".webp"}
     text_extensions = {".txt", ".md", ".mdx", ".conf", ".log", ".json", ".csv", ".tsv", ".xml", ".yml", ".yaml", ".sql"}
     document_extensions = {".pdf", ".docx", ".pptx", ".xlsx", ".eml", ".epub", ".html"}
 
@@ -902,6 +920,18 @@ def load_all_docs_from_checkpoint_connector(
     )
 
 
+_ATLASSIAN_CLOUD_DOMAINS = (".atlassian.net", ".jira.com", ".jira-dev.com")
+
+
+def is_atlassian_cloud_url(url: str) -> bool:
+    try:
+        host = urlparse(url).hostname or ""
+    except ValueError:
+        return False
+    host = host.lower()
+    return any(host.endswith(domain) for domain in _ATLASSIAN_CLOUD_DOMAINS)
+
+
 def get_cloudId(base_url: str) -> str:
     tenant_info_url = urljoin(base_url, "/_edge/tenant_info")
     response = requests.get(tenant_info_url, timeout=10)
diff --git a/common/log_utils.py b/common/log_utils.py
index e2110ebeb..abbcd286b 100644
--- a/common/log_utils.py
+++ b/common/log_utils.py
@@ -80,4 +80,4 @@ def log_exception(e, *args):
             raise Exception(a.text)
         else:
             logging.error(str(a))
-    raise e
\ No newline at end of file
+    raise e
diff --git a/rag/svr/sync_data_source.py b/rag/svr/sync_data_source.py
index 3dc9a7a3c..6925eb5f7 100644
--- a/rag/svr/sync_data_source.py
+++ b/rag/svr/sync_data_source.py
@@ -20,33 +20,40 @@
 
 
 import copy
+import faulthandler
+import logging
+import os
+import signal
 import sys
 import threading
 import time
 import traceback
+from datetime import datetime, timezone
 from typing import Any
 
+import trio
+
 from api.db.services.connector_service import ConnectorService, SyncLogsService
 from api.db.services.knowledgebase_service import KnowledgebaseService
-from common.log_utils import init_root_logger
-from common.config_utils import show_configs
-from common.data_source import BlobStorageConnector, NotionConnector, DiscordConnector, GoogleDriveConnector
-import logging
-import os
-from datetime import datetime, timezone
-import signal
-import trio
-import faulthandler
-from common.constants import FileSource, TaskStatus
 from common import settings
-from common.versions import get_ragflow_version
+from common.config_utils import show_configs
+from common.constants import FileSource, TaskStatus
+from common.data_source import (
+    BlobStorageConnector,
+    DiscordConnector,
+    GoogleDriveConnector,
+    JiraConnector,
+    NotionConnector,
+)
+from common.data_source.config import INDEX_BATCH_SIZE
 from common.data_source.confluence_connector import ConfluenceConnector
 from common.data_source.interfaces import CheckpointOutputWrapper
 from common.data_source.utils import load_all_docs_from_checkpoint_connector
-from common.data_source.config import INDEX_BATCH_SIZE
+from common.log_utils import init_root_logger
 from common.signal_utils import start_tracemalloc_and_snapshot, stop_tracemalloc
+from common.versions import get_ragflow_version
 
-MAX_CONCURRENT_TASKS = int(os.environ.get('MAX_CONCURRENT_TASKS', "5"))
+MAX_CONCURRENT_TASKS = int(os.environ.get("MAX_CONCURRENT_TASKS", "5"))
 task_limiter = trio.Semaphore(MAX_CONCURRENT_TASKS)
 
 
@@ -72,31 +79,32 @@ class SyncBase:
                         min_update = min([doc.doc_updated_at for doc in document_batch])
                         max_update = max([doc.doc_updated_at for doc in document_batch])
                         next_update = max([next_update, max_update])
-                        docs = [{
-                            "id": doc.id,
-                            "connector_id": task["connector_id"],
-                            "source": self.SOURCE_NAME,
-                            "semantic_identifier": doc.semantic_identifier,
-                            "extension": doc.extension,
-                            "size_bytes": doc.size_bytes,
-                            "doc_updated_at": doc.doc_updated_at,
-                            "blob": doc.blob
-                        } for doc in document_batch]
+                        docs = [
+                            {
+                                "id": doc.id,
+                                "connector_id": task["connector_id"],
+                                "source": self.SOURCE_NAME,
+                                "semantic_identifier": doc.semantic_identifier,
+                                "extension": doc.extension,
+                                "size_bytes": doc.size_bytes,
+                                "doc_updated_at": doc.doc_updated_at,
+                                "blob": doc.blob,
+                            }
+                            for doc in document_batch
+                        ]
 
                         e, kb = KnowledgebaseService.get_by_id(task["kb_id"])
                         err, dids = SyncLogsService.duplicate_and_parse(kb, docs, task["tenant_id"], f"{self.SOURCE_NAME}/{task['connector_id']}", task["auto_parse"])
                         SyncLogsService.increase_docs(task["id"], min_update, max_update, len(docs), "\n".join(err), len(err))
                         doc_num += len(docs)
 
-                    logging.info("{} docs synchronized till {}".format(doc_num, next_update))
+                    prefix = "[Jira] " if self.SOURCE_NAME == FileSource.JIRA else ""
+                    logging.info(f"{prefix}{doc_num} docs synchronized till {next_update}")
                     SyncLogsService.done(task["id"], task["connector_id"])
                     task["poll_range_start"] = next_update
 
         except Exception as ex:
-            msg = '\n'.join([
-                ''.join(traceback.format_exception_only(None, ex)).strip(),
-                ''.join(traceback.format_exception(None, ex, ex.__traceback__)).strip()
-            ])
+            msg = "\n".join(["".join(traceback.format_exception_only(None, ex)).strip(), "".join(traceback.format_exception(None, ex, ex.__traceback__)).strip()])
             SyncLogsService.update_by_id(task["id"], {"status": TaskStatus.FAIL, "full_exception_trace": msg, "error_msg": str(ex)})
 
         SyncLogsService.schedule(task["connector_id"], task["kb_id"], task["poll_range_start"])
@@ -109,21 +117,16 @@ class S3(SyncBase):
     SOURCE_NAME: str = FileSource.S3
 
     async def _generate(self, task: dict):
-        self.connector = BlobStorageConnector(
-            bucket_type=self.conf.get("bucket_type", "s3"),
-            bucket_name=self.conf["bucket_name"],
-            prefix=self.conf.get("prefix", "")
-        )
+        self.connector = BlobStorageConnector(bucket_type=self.conf.get("bucket_type", "s3"), bucket_name=self.conf["bucket_name"], prefix=self.conf.get("prefix", ""))
         self.connector.load_credentials(self.conf["credentials"])
-        document_batch_generator = self.connector.load_from_state() if task["reindex"]=="1" or not task["poll_range_start"] \
-            else  self.connector.poll_source(task["poll_range_start"].timestamp(), datetime.now(timezone.utc).timestamp())
+        document_batch_generator = (
+            self.connector.load_from_state()
+            if task["reindex"] == "1" or not task["poll_range_start"]
+            else self.connector.poll_source(task["poll_range_start"].timestamp(), datetime.now(timezone.utc).timestamp())
+        )
 
-        begin_info = "totally" if task["reindex"]=="1" or not task["poll_range_start"] else "from {}".format(task["poll_range_start"])
-        logging.info("Connect to {}: {}(prefix/{}) {}".format(self.conf.get("bucket_type", "s3"),
-                                                                  self.conf["bucket_name"],
-                                                                  self.conf.get("prefix", ""),
-                                                                  begin_info
-                                                                  ))
+        begin_info = "totally" if task["reindex"] == "1" or not task["poll_range_start"] else "from {}".format(task["poll_range_start"])
+        logging.info("Connect to {}: {}(prefix/{}) {}".format(self.conf.get("bucket_type", "s3"), self.conf["bucket_name"], self.conf.get("prefix", ""), begin_info))
         return document_batch_generator
 
 
@@ -131,8 +134,8 @@ class Confluence(SyncBase):
     SOURCE_NAME: str = FileSource.CONFLUENCE
 
     async def _generate(self, task: dict):
-        from common.data_source.interfaces import StaticCredentialsProvider
         from common.data_source.config import DocumentSource
+        from common.data_source.interfaces import StaticCredentialsProvider
 
         self.connector = ConfluenceConnector(
             wiki_base=self.conf["wiki_base"],
@@ -141,11 +144,7 @@ class Confluence(SyncBase):
             # page_id=self.conf.get("page_id", ""),
         )
 
-        credentials_provider = StaticCredentialsProvider(
-            tenant_id=task["tenant_id"],
-            connector_name=DocumentSource.CONFLUENCE,
-            credential_json=self.conf["credentials"]
-        )
+        credentials_provider = StaticCredentialsProvider(tenant_id=task["tenant_id"], connector_name=DocumentSource.CONFLUENCE, credential_json=self.conf["credentials"])
         self.connector.set_credentials_provider(credentials_provider)
 
         # Determine the time range for synchronization based on reindex or poll_range_start
@@ -174,10 +173,13 @@ class Notion(SyncBase):
     async def _generate(self, task: dict):
         self.connector = NotionConnector(root_page_id=self.conf["root_page_id"])
         self.connector.load_credentials(self.conf["credentials"])
-        document_generator = self.connector.load_from_state() if task["reindex"]=="1" or not task["poll_range_start"] \
-            else  self.connector.poll_source(task["poll_range_start"].timestamp(), datetime.now(timezone.utc).timestamp())
+        document_generator = (
+            self.connector.load_from_state()
+            if task["reindex"] == "1" or not task["poll_range_start"]
+            else self.connector.poll_source(task["poll_range_start"].timestamp(), datetime.now(timezone.utc).timestamp())
+        )
 
-        begin_info = "totally" if task["reindex"]=="1" or not task["poll_range_start"] else "from {}".format(task["poll_range_start"])
+        begin_info = "totally" if task["reindex"] == "1" or not task["poll_range_start"] else "from {}".format(task["poll_range_start"])
         logging.info("Connect to Notion: root({}) {}".format(self.conf["root_page_id"], begin_info))
         return document_generator
 
@@ -194,13 +196,16 @@ class Discord(SyncBase):
             server_ids=server_ids.split(",") if server_ids else [],
             channel_names=channel_names.split(",") if channel_names else [],
             start_date=datetime(1970, 1, 1, tzinfo=timezone.utc).strftime("%Y-%m-%d"),
-            batch_size=self.conf.get("batch_size", 1024)
+            batch_size=self.conf.get("batch_size", 1024),
         )
         self.connector.load_credentials(self.conf["credentials"])
-        document_generator = self.connector.load_from_state() if task["reindex"]=="1" or not task["poll_range_start"] \
-            else  self.connector.poll_source(task["poll_range_start"].timestamp(), datetime.now(timezone.utc).timestamp())
+        document_generator = (
+            self.connector.load_from_state()
+            if task["reindex"] == "1" or not task["poll_range_start"]
+            else self.connector.poll_source(task["poll_range_start"].timestamp(), datetime.now(timezone.utc).timestamp())
+        )
 
-        begin_info = "totally" if task["reindex"]=="1" or not task["poll_range_start"] else "from {}".format(task["poll_range_start"])
+        begin_info = "totally" if task["reindex"] == "1" or not task["poll_range_start"] else "from {}".format(task["poll_range_start"])
         logging.info("Connect to Discord: servers({}),  channel({}) {}".format(server_ids, channel_names, begin_info))
         return document_generator
 
@@ -285,7 +290,7 @@ class GoogleDrive(SyncBase):
             admin_email = self.connector.primary_admin_email
         except RuntimeError:
             admin_email = "unknown"
-        logging.info("Connect to Google Drive as %s %s", admin_email, begin_info)
+        logging.info(f"Connect to Google Drive as {admin_email} {begin_info}")
         return document_batches()
 
     def _persist_rotated_credentials(self, connector_id: str, credentials: dict[str, Any]) -> None:
@@ -303,7 +308,93 @@ class Jira(SyncBase):
     SOURCE_NAME: str = FileSource.JIRA
 
     async def _generate(self, task: dict):
-        pass
+        connector_kwargs = {
+            "jira_base_url": self.conf["base_url"],
+            "project_key": self.conf.get("project_key"),
+            "jql_query": self.conf.get("jql_query"),
+            "batch_size": self.conf.get("batch_size", INDEX_BATCH_SIZE),
+            "include_comments": self.conf.get("include_comments", True),
+            "include_attachments": self.conf.get("include_attachments", False),
+            "labels_to_skip": self._normalize_list(self.conf.get("labels_to_skip")),
+            "comment_email_blacklist": self._normalize_list(self.conf.get("comment_email_blacklist")),
+            "scoped_token": self.conf.get("scoped_token", False),
+            "attachment_size_limit": self.conf.get("attachment_size_limit"),
+            "timezone_offset": self.conf.get("timezone_offset"),
+        }
+
+        self.connector = JiraConnector(**connector_kwargs)
+
+        credentials = self.conf.get("credentials")
+        if not credentials:
+            raise ValueError("Jira connector is missing credentials.")
+
+        self.connector.load_credentials(credentials)
+        self.connector.validate_connector_settings()
+
+        if task["reindex"] == "1" or not task["poll_range_start"]:
+            start_time = 0.0
+            begin_info = "totally"
+        else:
+            start_time = task["poll_range_start"].timestamp()
+            begin_info = f"from {task['poll_range_start']}"
+
+        end_time = datetime.now(timezone.utc).timestamp()
+
+        raw_batch_size = self.conf.get("sync_batch_size") or self.conf.get("batch_size") or INDEX_BATCH_SIZE
+        try:
+            batch_size = int(raw_batch_size)
+        except (TypeError, ValueError):
+            batch_size = INDEX_BATCH_SIZE
+        if batch_size <= 0:
+            batch_size = INDEX_BATCH_SIZE
+
+        def document_batches():
+            checkpoint = self.connector.build_dummy_checkpoint()
+            pending_docs = []
+            iterations = 0
+            iteration_limit = 100_000
+
+            while checkpoint.has_more:
+                wrapper = CheckpointOutputWrapper()
+                generator = wrapper(
+                    self.connector.load_from_checkpoint(
+                        start_time,
+                        end_time,
+                        checkpoint,
+                    )
+                )
+                for document, failure, next_checkpoint in generator:
+                    if failure is not None:
+                        logging.warning(
+                            f"[Jira] Jira connector failure: {getattr(failure, 'failure_message', failure)}"
+                        )
+                        continue
+                    if document is not None:
+                        pending_docs.append(document)
+                        if len(pending_docs) >= batch_size:
+                            yield pending_docs
+                            pending_docs = []
+                    if next_checkpoint is not None:
+                        checkpoint = next_checkpoint
+
+                iterations += 1
+                if iterations > iteration_limit:
+                    logging.error(f"[Jira] Task {task.get('id')} exceeded iteration limit ({iteration_limit}).")
+                    raise RuntimeError("Too many iterations while loading Jira documents.")
+
+            if pending_docs:
+                yield pending_docs
+
+        logging.info(f"[Jira] Connect to Jira {connector_kwargs['jira_base_url']} {begin_info}")
+        return document_batches()
+
+    @staticmethod
+    def _normalize_list(values: Any) -> list[str] | None:
+        if values is None:
+            return None
+        if isinstance(values, str):
+            values = [item.strip() for item in values.split(",")]
+        return [str(value).strip() for value in values if value is not None and str(value).strip()]
 
 
 class SharePoint(SyncBase):
@@ -337,9 +428,10 @@ func_factory = {
     FileSource.JIRA: Jira,
     FileSource.SHAREPOINT: SharePoint,
     FileSource.SLACK: Slack,
-    FileSource.TEAMS: Teams
+    FileSource.TEAMS: Teams,
 }
 
+
 async def dispatch_tasks():
     async with trio.open_nursery() as nursery:
         while True:
@@ -385,7 +477,7 @@ async def main():
                                   __/ |
                                  |___/
     """)
-    logging.info(f'RAGFlow version: {get_ragflow_version()}')
+    logging.info(f"RAGFlow version: {get_ragflow_version()}")
     show_configs()
     settings.init_settings()
     if sys.platform != "win32":
diff --git a/web/src/assets/svg/data-source/jira.svg b/web/src/assets/svg/data-source/jira.svg
new file mode 100644
index 000000000..8f9cd8b97
--- /dev/null
+++ b/web/src/assets/svg/data-source/jira.svg
@@ -0,0 +1,16 @@
+<svg xmlns="http://www.w3.org/2000/svg" width="128" height="128" viewBox="0 0 128 128">
+  <defs>
+    <linearGradient id="jira-a" x1="88.136" y1="39.092" x2="68.472" y2="59.368" gradientUnits="userSpaceOnUse">
+      <stop offset="0.176" stop-color="#0052cc"/>
+      <stop offset="1" stop-color="#2684ff"/>
+    </linearGradient>
+    <linearGradient id="jira-b" x1="66.564" y1="62.256" x2="43.828" y2="84.376" gradientUnits="userSpaceOnUse">
+      <stop offset="0.176" stop-color="#0052cc"/>
+      <stop offset="1" stop-color="#2684ff"/>
+    </linearGradient>
+  </defs>
+
+  <path d="M108.023 16H61.805c0 11.52 9.324 20.848 20.847 20.848h8.5v8.226c0 11.52 9.328 20.848 20.848 20.848V19.977A3.98 3.98 0 00108.023 16z" fill="#2684ff"/>
+  <path d="M85.121 39.04H38.902c0 11.519 9.325 20.847 20.844 20.847h8.504v8.226c0 11.52 9.328 20.848 20.848 20.848V43.016a3.983 3.983 0 00-3.977-3.977z" fill="url(#jira-a)"/>
+  <path d="M62.219 62.078H16c0 11.524 9.324 20.848 20.848 20.848h8.5v8.23c0 11.52 9.328 20.844 20.847 20.844V66.059a3.984 3.984 0 00-3.976-3.98z" fill="url(#jira-b)"/>
+</svg>
diff --git a/web/src/locales/en.ts b/web/src/locales/en.ts
index e2035a378..350a64db8 100644
--- a/web/src/locales/en.ts
+++ b/web/src/locales/en.ts
@@ -732,6 +732,33 @@ Example: general/v2/`,
         'Comma-separated emails whose “My Drive” contents should be indexed (include the primary admin).',
       google_driveSharedFoldersTip:
         'Comma-separated Google Drive folder links to crawl.',
+      jiraDescription:
+        'Connect your Jira workspace to sync issues, comments, and attachments.',
+      jiraBaseUrlTip:
+        'Base URL of your Jira site (e.g., https://your-domain.atlassian.net).',
+      jiraProjectKeyTip:
+        'Optional: limit syncing to a single project key (e.g., ENG).',
+      jiraJqlTip:
+        'Optional JQL filter. Leave blank to rely on project/time filters.',
+      jiraBatchSizeTip:
+        'Maximum number of issues requested from Jira per batch.',
+      jiraCommentsTip:
+        'Include Jira comments in the generated markdown document.',
+      jiraAttachmentsTip:
+        'Download attachments as separate documents during sync.',
+      jiraAttachmentSizeTip:
+        'Attachments larger than this number of bytes will be skipped.',
+      jiraLabelsTip:
+        'Labels that should be skipped while indexing (comma separated).',
+      jiraBlacklistTip:
+        'Comments whose author email matches these entries will be ignored.',
+      jiraScopedTokenTip:
+        'Enable this when using scoped Atlassian tokens (api.atlassian.com).',
+      jiraEmailTip: 'Email associated with the Jira account/API token.',
+      jiraTokenTip:
+        'API token generated from https://id.atlassian.com/manage-profile/security/api-tokens.',
+      jiraPasswordTip:
+        'Optional password for Jira Server/Data Center environments.',
       availableSourcesDescription: 'Select a data source to add',
       availableSources: 'Available sources',
       datasourceDescription: 'Manage your data source and connections',
diff --git a/web/src/locales/zh.ts b/web/src/locales/zh.ts
index 301719117..b6d25dc1f 100644
--- a/web/src/locales/zh.ts
+++ b/web/src/locales/zh.ts
@@ -716,6 +716,23 @@ General：实体和关系提取提示来自 GitHub - microsoft/graphrag：基于
         '需要索引其 “我的云端硬盘” 的邮箱，多个邮箱用逗号分隔（建议包含管理员）。',
       google_driveSharedFoldersTip:
         '需要同步的 Google Drive 文件夹链接，多个链接用逗号分隔。',
+      jiraDescription: '接入 Jira 工作区，持续同步Issues、评论与附件。',
+      jiraBaseUrlTip:
+        'Jira 的 Base URL，例如：https://your-domain.atlassian.net。',
+      jiraProjectKeyTip: '可选：仅同步指定的项目（如 RAG）。',
+      jiraJqlTip: '可选：自定义 JQL 过滤条件，留空则使用项目 / 时间范围。',
+      jiraBatchSizeTip: '每次向 Jira 请求的 Issue 数量上限。',
+      jiraCommentsTip: '同步评论。',
+      jiraAttachmentsTip: '开启后会将附件下载为独立文档。',
+      jiraAttachmentSizeTip: '超过该字节阈值的附件会被跳过。',
+      jiraLabelsTip: '需要跳过的标签（逗号分隔）。',
+      jiraBlacklistTip: '这些邮箱作者的评论会被忽略。',
+      jiraScopedTokenTip:
+        '仅当凭证为 Atlassian scoped token（api.atlassian.com）时生效。',
+      jiraEmailTip: '与 API Token 对应的 Jira 账户邮箱。',
+      jiraTokenTip:
+        '在 https://id.atlassian.com/manage-profile/security/api-tokens 生成的 API Token。 (Clould only)',
+      jiraPasswordTip: '可选：仅 Jira Server/Data Center 环境需要的密码字段。',
       availableSourcesDescription: '选择要添加的数据源',
       availableSources: '可用数据源',
       datasourceDescription: '管理您的数据源和连接',
diff --git a/web/src/pages/user-setting/data-source/contant.tsx b/web/src/pages/user-setting/data-source/contant.tsx
index 7acf3036d..3c8c55826 100644
--- a/web/src/pages/user-setting/data-source/contant.tsx
+++ b/web/src/pages/user-setting/data-source/contant.tsx
@@ -9,8 +9,8 @@ export enum DataSourceKey {
   NOTION = 'notion',
   DISCORD = 'discord',
   GOOGLE_DRIVE = 'google_drive',
-  //   GMAIL = 'gmail',
-  //   JIRA = 'jira',
+  // GMAIL = 'gmail',
+  JIRA = 'jira',
   //   SHAREPOINT = 'sharepoint',
   //   SLACK = 'slack',
   //   TEAMS = 'teams',
@@ -42,6 +42,11 @@ export const DataSourceInfo = {
     description: t(`setting.${DataSourceKey.GOOGLE_DRIVE}Description`),
     icon: <SvgIcon name={'data-source/google-drive'} width={38} />,
   },
+  [DataSourceKey.JIRA]: {
+    name: 'Jira',
+    description: t(`setting.${DataSourceKey.JIRA}Description`),
+    icon: <SvgIcon name={'data-source/jira'} width={38} />,
+  },
 };
 
 export const DataSourceFormBaseFields = [
@@ -270,6 +275,106 @@ export const DataSourceFormFields = {
       defaultValue: 'uploaded',
     },
   ],
+  [DataSourceKey.JIRA]: [
+    {
+      label: 'Jira Base URL',
+      name: 'config.base_url',
+      type: FormFieldType.Text,
+      required: true,
+      placeholder: 'https://your-domain.atlassian.net',
+      tooltip: t('setting.jiraBaseUrlTip'),
+    },
+    {
+      label: 'Project Key',
+      name: 'config.project_key',
+      type: FormFieldType.Text,
+      required: false,
+      placeholder: 'RAGFlow',
+      tooltip: t('setting.jiraProjectKeyTip'),
+    },
+    {
+      label: 'Custom JQL',
+      name: 'config.jql_query',
+      type: FormFieldType.Textarea,
+      required: false,
+      placeholder: 'project = RAG AND updated >= -7d',
+      tooltip: t('setting.jiraJqlTip'),
+    },
+    {
+      label: 'Batch Size',
+      name: 'config.batch_size',
+      type: FormFieldType.Number,
+      required: false,
+      tooltip: t('setting.jiraBatchSizeTip'),
+    },
+    {
+      label: 'Include Comments',
+      name: 'config.include_comments',
+      type: FormFieldType.Checkbox,
+      required: false,
+      defaultValue: true,
+      tooltip: t('setting.jiraCommentsTip'),
+    },
+    {
+      label: 'Include Attachments',
+      name: 'config.include_attachments',
+      type: FormFieldType.Checkbox,
+      required: false,
+      defaultValue: false,
+      tooltip: t('setting.jiraAttachmentsTip'),
+    },
+    {
+      label: 'Attachment Size Limit (bytes)',
+      name: 'config.attachment_size_limit',
+      type: FormFieldType.Number,
+      required: false,
+      defaultValue: 10 * 1024 * 1024,
+      tooltip: t('setting.jiraAttachmentSizeTip'),
+    },
+    {
+      label: 'Labels to Skip',
+      name: 'config.labels_to_skip',
+      type: FormFieldType.Tag,
+      required: false,
+      tooltip: t('setting.jiraLabelsTip'),
+    },
+    {
+      label: 'Comment Email Blacklist',
+      name: 'config.comment_email_blacklist',
+      type: FormFieldType.Tag,
+      required: false,
+      tooltip: t('setting.jiraBlacklistTip'),
+    },
+    {
+      label: 'Use Scoped Token (Clould only)',
+      name: 'config.scoped_token',
+      type: FormFieldType.Checkbox,
+      required: false,
+      tooltip: t('setting.jiraScopedTokenTip'),
+    },
+    {
+      label: 'Jira User Email (Cloud) or User Name (Server)',
+      name: 'config.credentials.jira_user_email',
+      type: FormFieldType.Text,
+      required: true,
+      placeholder: 'you@example.com',
+      tooltip: t('setting.jiraEmailTip'),
+    },
+    {
+      label: 'Jira API Token (Cloud only)',
+      name: 'config.credentials.jira_api_token',
+      type: FormFieldType.Password,
+      required: false,
+      tooltip: t('setting.jiraTokenTip'),
+    },
+    {
+      label: 'Jira Password (Server only)',
+      name: 'config.credentials.jira_password',
+      type: FormFieldType.Password,
+      required: false,
+      tooltip: t('setting.jiraPasswordTip'),
+    },
+  ],
   // [DataSourceKey.GOOGLE_DRIVE]: [
   //   {
   //     label: 'Primary Admin Email',
@@ -433,4 +538,25 @@ export const DataSourceFormDefaultValues = {
       },
     },
   },
+  [DataSourceKey.JIRA]: {
+    name: '',
+    source: DataSourceKey.JIRA,
+    config: {
+      base_url: '',
+      project_key: '',
+      jql_query: '',
+      batch_size: 2,
+      include_comments: true,
+      include_attachments: false,
+      attachment_size_limit: 10 * 1024 * 1024,
+      labels_to_skip: [],
+      comment_email_blacklist: [],
+      scoped_token: false,
+      credentials: {
+        jira_user_email: '',
+        jira_api_token: '',
+        jira_password: '',
+      },
+    },
+  },
 };
diff --git a/web/src/pages/user-setting/data-source/index.tsx b/web/src/pages/user-setting/data-source/index.tsx
index 80ceea1d7..9cb58672a 100644
--- a/web/src/pages/user-setting/data-source/index.tsx
+++ b/web/src/pages/user-setting/data-source/index.tsx
@@ -44,6 +44,12 @@ const dataSourceTemplates = [
     description: DataSourceInfo[DataSourceKey.NOTION].description,
     icon: DataSourceInfo[DataSourceKey.NOTION].icon,
   },
+  {
+    id: DataSourceKey.JIRA,
+    name: DataSourceInfo[DataSourceKey.JIRA].name,
+    description: DataSourceInfo[DataSourceKey.JIRA].description,
+    icon: DataSourceInfo[DataSourceKey.JIRA].icon,
+  },
 ];
 const DataSource = () => {
   const { t } = useTranslation();

From e7e89d3ecbf9638865b15f951549875534b62538 Mon Sep 17 00:00:00 2001
From: Billy Bao <newyorkupperbay@gmail.com>
Date: Mon, 17 Nov 2025 11:16:34 +0800
Subject: [PATCH 14/15] Doc: style fix (#11295)

### What problem does this PR solve?

Style fix based on  #11283
### Type of change

- [x] Documentation Update
---
 deepdoc/parser/mineru_parser.py   | 2 +-
 docs/guides/accessing_admin_ui.md | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/deepdoc/parser/mineru_parser.py b/deepdoc/parser/mineru_parser.py
index bb663de0d..6d3b292d0 100644
--- a/deepdoc/parser/mineru_parser.py
+++ b/deepdoc/parser/mineru_parser.py
@@ -434,7 +434,7 @@ class MinerUParser(RAGFlowPdfParser):
                     if not section.strip():
                         section = "FAILED TO PARSE TABLE"
                 case MinerUContentType.IMAGE:
-                    section = "".join(output.get(["image_caption"],[])) + "\n" + "".join(output.get(["image_footnote"],[]))
+                    section = "".join(output.get("image_caption", [])) + "\n" + "".join(output.get("image_footnote", []))
                 case MinerUContentType.EQUATION:
                     section = output["text"]
                 case MinerUContentType.CODE:
diff --git a/docs/guides/accessing_admin_ui.md b/docs/guides/accessing_admin_ui.md
index 23521244b..181cff5ac 100644
--- a/docs/guides/accessing_admin_ui.md
+++ b/docs/guides/accessing_admin_ui.md
@@ -15,7 +15,7 @@ To access the RAGFlow admin UI, append `/admin` to the web UI's address, e.g. `h
 ### Default Credentials
 | Username | Password |
 |----------|----------|
-| admin@ragflow.io   | admin |
+| `admin@ragflow.io`   | `admin` |
 
 ## Admin UI Overview
 

From 9cef3a26250667fa6761dbe9893a194de0c28aef Mon Sep 17 00:00:00 2001
From: chanx <1243304602@qq.com>
Date: Mon, 17 Nov 2025 11:16:55 +0800
Subject: [PATCH 15/15] Fix: Fixed the issue of not being able to select the
 time zone in the user center. (#11298)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

… user center.

### What problem does this PR solve?

Fix: Fixed the issue of not being able to select the time zone in the
user center.

### Type of change

- [x] Bug Fix (non-breaking change which fixes an issue)
---
 web/src/components/ui/modal/modal.tsx        |  4 +++
 web/src/pages/user-setting/profile/index.tsx | 30 ++++++--------------
 2 files changed, 12 insertions(+), 22 deletions(-)

diff --git a/web/src/components/ui/modal/modal.tsx b/web/src/components/ui/modal/modal.tsx
index acae6c147..af516b1e6 100644
--- a/web/src/components/ui/modal/modal.tsx
+++ b/web/src/components/ui/modal/modal.tsx
@@ -86,6 +86,9 @@ const Modal: ModalType = ({
     onOk?.();
   }, [onOk, onOpenChange]);
   const handleChange = (open: boolean) => {
+    if (!open && !maskClosable) {
+      return;
+    }
     onOpenChange?.(open);
     console.log('open', open, onOpenChange);
     if (open && !disabled) {
@@ -185,6 +188,7 @@ const Modal: ModalType = ({
                     <button
                       type="button"
                       className="flex h-7 w-7 items-center justify-center rounded-full hover:bg-muted focus-visible:outline-none"
+                      onClick={handleCancel}
                     >
                       {closeIcon}
                     </button>
diff --git a/web/src/pages/user-setting/profile/index.tsx b/web/src/pages/user-setting/profile/index.tsx
index dceb2cdf3..5c2741cf6 100644
--- a/web/src/pages/user-setting/profile/index.tsx
+++ b/web/src/pages/user-setting/profile/index.tsx
@@ -13,13 +13,7 @@ import {
 } from '@/components/ui/form';
 import { Input } from '@/components/ui/input';
 import { Modal } from '@/components/ui/modal/modal';
-import {
-  Select,
-  SelectContent,
-  SelectItem,
-  SelectTrigger,
-  SelectValue,
-} from '@/components/ui/select';
+import { RAGFlowSelect } from '@/components/ui/select';
 import { useTranslate } from '@/hooks/common-hooks';
 import { TimezoneList } from '@/pages/user-setting/constants';
 import { zodResolver } from '@hookform/resolvers/zod';
@@ -230,6 +224,7 @@ const ProfilePage: FC = () => {
           title={modalTitle[editType]}
           open={isEditing}
           showfooter={false}
+          maskClosable={false}
           titleClassName="text-base"
           onOpenChange={(open) => {
             if (!open) {
@@ -281,23 +276,14 @@ const ProfilePage: FC = () => {
                         <FormLabel className="text-sm text-text-secondary whitespace-nowrap">
                           {t('timezone')}
                         </FormLabel>
-                        <Select
+                        <RAGFlowSelect
+                          options={TimezoneList.map((timeStr) => {
+                            return { value: timeStr, label: timeStr };
+                          })}
+                          placeholder="Select a timeZone"
                           onValueChange={field.onChange}
                           value={field.value}
-                        >
-                          <FormControl className="w-full bg-bg-input border-border-default">
-                            <SelectTrigger>
-                              <SelectValue placeholder="Select a timeZone" />
-                            </SelectTrigger>
-                          </FormControl>
-                          <SelectContent>
-                            {TimezoneList.map((timeStr) => (
-                              <SelectItem key={timeStr} value={timeStr}>
-                                {timeStr}
-                              </SelectItem>
-                            ))}
-                          </SelectContent>
-                        </Select>
+                        />
                       </div>
                       <div className="flex w-full pt-1">
                         <div className="w-1/4"></div>