From 87e69868c0b1c817f2fd51f30a2737e6020a728b Mon Sep 17 00:00:00 2001 From: chanx <1243304602@qq.com> Date: Fri, 14 Nov 2025 13:56:56 +0800 Subject: [PATCH 01/15] Fixes: Added session variable types and modified configuration (#11269) ### What problem does this PR solve? Fixes: Added session variable types and modified configuration - Added more types of session variables - Modified the embedding model switching logic in the knowledge base configuration ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue) --- web/src/components/dynamic-form.tsx | 152 ++++++++--- web/src/components/ui/segmented.tsx | 5 +- web/src/locales/en.ts | 1 + web/src/locales/zh.ts | 1 + .../component/add-variable-modal.tsx | 134 ++++++++++ .../{contant.ts => constant.ts} | 26 +- .../gobal-variable-sheet/hooks/use-form.tsx | 41 +++ .../hooks/use-object-fields.tsx | 246 ++++++++++++++++++ .../agent/gobal-variable-sheet/index.tsx | 188 ++++--------- web/src/pages/agent/hooks/use-build-dsl.ts | 10 +- web/src/pages/agent/hooks/use-save-graph.ts | 2 +- web/src/pages/agent/index.tsx | 18 +- web/src/pages/agent/utils.ts | 24 +- .../configuration/common-item.tsx | 46 +++- .../dataset/dataset-setting/general-form.tsx | 2 +- .../pages/dataset/dataset-setting/hooks.ts | 21 ++ web/src/services/knowledge-service.ts | 6 + web/src/utils/api.ts | 2 + 18 files changed, 712 insertions(+), 213 deletions(-) create mode 100644 web/src/pages/agent/gobal-variable-sheet/component/add-variable-modal.tsx rename web/src/pages/agent/gobal-variable-sheet/{contant.ts => constant.ts} (72%) create mode 100644 web/src/pages/agent/gobal-variable-sheet/hooks/use-form.tsx create mode 100644 web/src/pages/agent/gobal-variable-sheet/hooks/use-object-fields.tsx diff --git a/web/src/components/dynamic-form.tsx b/web/src/components/dynamic-form.tsx index f7449ec9f..a90afe287 100644 --- a/web/src/components/dynamic-form.tsx +++ b/web/src/components/dynamic-form.tsx @@ -61,6 +61,12 @@ export interface FormFieldConfig { horizontal?: boolean; onChange?: (value: any) => void; tooltip?: React.ReactNode; + customValidate?: ( + value: any, + formValues: any, + ) => string | boolean | Promise; + dependencies?: string[]; + schema?: ZodSchema; } // Component props interface @@ -94,36 +100,40 @@ const generateSchema = (fields: FormFieldConfig[]): ZodSchema => { let fieldSchema: ZodSchema; // Create base validation schema based on field type - switch (field.type) { - case FormFieldType.Email: - fieldSchema = z.string().email('Please enter a valid email address'); - break; - case FormFieldType.Number: - fieldSchema = z.coerce.number(); - if (field.validation?.min !== undefined) { - fieldSchema = (fieldSchema as z.ZodNumber).min( - field.validation.min, - field.validation.message || - `Value cannot be less than ${field.validation.min}`, - ); - } - if (field.validation?.max !== undefined) { - fieldSchema = (fieldSchema as z.ZodNumber).max( - field.validation.max, - field.validation.message || - `Value cannot be greater than ${field.validation.max}`, - ); - } - break; - case FormFieldType.Checkbox: - fieldSchema = z.boolean(); - break; - case FormFieldType.Tag: - fieldSchema = z.array(z.string()); - break; - default: - fieldSchema = z.string(); - break; + if (field.schema) { + fieldSchema = field.schema; + } else { + switch (field.type) { + case FormFieldType.Email: + fieldSchema = z.string().email('Please enter a valid email address'); + break; + case FormFieldType.Number: + fieldSchema = z.coerce.number(); + if (field.validation?.min !== undefined) { + fieldSchema = (fieldSchema as z.ZodNumber).min( + field.validation.min, + field.validation.message || + `Value cannot be less than ${field.validation.min}`, + ); + } + if (field.validation?.max !== undefined) { + fieldSchema = (fieldSchema as z.ZodNumber).max( + field.validation.max, + field.validation.message || + `Value cannot be greater than ${field.validation.max}`, + ); + } + break; + case FormFieldType.Checkbox: + fieldSchema = z.boolean(); + break; + case FormFieldType.Tag: + fieldSchema = z.array(z.string()); + break; + default: + fieldSchema = z.string(); + break; + } } // Handle required fields @@ -300,10 +310,90 @@ const DynamicForm = { // Initialize form const form = useForm({ - resolver: zodResolver(schema), + resolver: async (data, context, options) => { + const zodResult = await zodResolver(schema)(data, context, options); + + let combinedErrors = { ...zodResult.errors }; + + const fieldErrors: Record = + {}; + for (const field of fields) { + if (field.customValidate && data[field.name] !== undefined) { + try { + const result = await field.customValidate( + data[field.name], + data, + ); + if (typeof result === 'string') { + fieldErrors[field.name] = { + type: 'custom', + message: result, + }; + } else if (result === false) { + fieldErrors[field.name] = { + type: 'custom', + message: + field.validation?.message || `${field.label} is invalid`, + }; + } + } catch (error) { + fieldErrors[field.name] = { + type: 'custom', + message: + error instanceof Error + ? error.message + : 'Validation failed', + }; + } + } + } + + combinedErrors = { + ...combinedErrors, + ...fieldErrors, + } as any; + console.log('combinedErrors', combinedErrors); + return { + values: Object.keys(combinedErrors).length ? {} : data, + errors: combinedErrors, + } as any; + }, defaultValues, }); + useEffect(() => { + const dependencyMap: Record = {}; + + fields.forEach((field) => { + if (field.dependencies && field.dependencies.length > 0) { + field.dependencies.forEach((dep) => { + if (!dependencyMap[dep]) { + dependencyMap[dep] = []; + } + dependencyMap[dep].push(field.name); + }); + } + }); + + const subscriptions = Object.keys(dependencyMap).map((depField) => { + return form.watch((values: any, { name }) => { + if (name === depField && dependencyMap[depField]) { + dependencyMap[depField].forEach((dependentField) => { + form.trigger(dependentField as any); + }); + } + }); + }); + + return () => { + subscriptions.forEach((sub) => { + if (sub.unsubscribe) { + sub.unsubscribe(); + } + }); + }; + }, [fields, form]); + // Expose form methods via ref useImperativeHandle(ref, () => ({ submit: () => form.handleSubmit(onSubmit)(), diff --git a/web/src/components/ui/segmented.tsx b/web/src/components/ui/segmented.tsx index 8aadc3b21..3f9b0cc53 100644 --- a/web/src/components/ui/segmented.tsx +++ b/web/src/components/ui/segmented.tsx @@ -51,6 +51,7 @@ export interface SegmentedProps direction?: 'ltr' | 'rtl'; motionName?: string; activeClassName?: string; + itemClassName?: string; rounded?: keyof typeof segmentedVariants.round; sizeType?: keyof typeof segmentedVariants.size; buttonSize?: keyof typeof segmentedVariants.buttonSize; @@ -62,6 +63,7 @@ export function Segmented({ onChange, className, activeClassName, + itemClassName, rounded = 'default', sizeType = 'default', buttonSize = 'default', @@ -92,12 +94,13 @@ export function Segmented({
void; + visible?: boolean; + hideModal: () => void; + defaultValues?: FieldValues; + setDefaultValues?: (value: FieldValues) => void; +}) => { + const { + fields, + setFields, + visible, + hideModal, + defaultValues, + setDefaultValues, + } = props; + + const { handleSubmit: submitForm, loading } = useHandleForm(); + + const { handleCustomValidate, handleCustomSchema, handleRender } = + useObjectFields(); + + const formRef = useRef(null); + + const handleFieldUpdate = ( + fieldName: string, + updatedField: Partial, + ) => { + setFields((prevFields: any) => + prevFields.map((field: any) => + field.name === fieldName ? { ...field, ...updatedField } : field, + ), + ); + }; + + useEffect(() => { + const typeField = fields?.find((item) => item.name === 'type'); + + if (typeField) { + typeField.onChange = (value) => { + handleFieldUpdate('value', { + type: TypeMaps[value as keyof typeof TypeMaps], + render: handleRender(value), + customValidate: handleCustomValidate(value), + schema: handleCustomSchema(value), + }); + const values = formRef.current?.getValues(); + // setTimeout(() => { + switch (value) { + case TypesWithArray.Boolean: + setDefaultValues?.({ ...values, value: false }); + break; + case TypesWithArray.Number: + setDefaultValues?.({ ...values, value: 0 }); + break; + case TypesWithArray.Object: + setDefaultValues?.({ ...values, value: {} }); + break; + case TypesWithArray.ArrayString: + setDefaultValues?.({ ...values, value: [''] }); + break; + case TypesWithArray.ArrayNumber: + setDefaultValues?.({ ...values, value: [''] }); + break; + case TypesWithArray.ArrayBoolean: + setDefaultValues?.({ ...values, value: [false] }); + break; + case TypesWithArray.ArrayObject: + setDefaultValues?.({ ...values, value: [] }); + break; + default: + setDefaultValues?.({ ...values, value: '' }); + break; + } + // }, 0); + }; + } + }, [fields]); + + const handleSubmit = async (fieldValue: FieldValues) => { + await submitForm(fieldValue); + hideModal(); + }; + + return ( + + { + console.log(data); + }} + defaultValues={defaultValues} + onFieldUpdate={handleFieldUpdate} + > +
+ { + hideModal?.(); + }} + /> + { + handleSubmit(values); + // console.log(values); + // console.log(nodes, edges); + // handleOk(values); + }} + /> +
+
+
+ ); +}; diff --git a/web/src/pages/agent/gobal-variable-sheet/contant.ts b/web/src/pages/agent/gobal-variable-sheet/constant.ts similarity index 72% rename from web/src/pages/agent/gobal-variable-sheet/contant.ts rename to web/src/pages/agent/gobal-variable-sheet/constant.ts index 2f3bd395f..fc668e330 100644 --- a/web/src/pages/agent/gobal-variable-sheet/contant.ts +++ b/web/src/pages/agent/gobal-variable-sheet/constant.ts @@ -13,14 +13,14 @@ export enum TypesWithArray { String = 'string', Number = 'number', Boolean = 'boolean', - // Object = 'object', - // ArrayString = 'array', - // ArrayNumber = 'array', - // ArrayBoolean = 'array', - // ArrayObject = 'array', + Object = 'object', + ArrayString = 'array', + ArrayNumber = 'array', + ArrayBoolean = 'array', + ArrayObject = 'array', } -export const GobalFormFields = [ +export const GlobalFormFields = [ { label: t('flow.name'), name: 'name', @@ -50,11 +50,11 @@ export const GobalFormFields = [ label: t('flow.description'), name: 'description', placeholder: t('flow.variableDescription'), - type: 'textarea', + type: FormFieldType.Textarea, }, ] as FormFieldConfig[]; -export const GobalVariableFormDefaultValues = { +export const GlobalVariableFormDefaultValues = { name: '', type: TypesWithArray.String, value: '', @@ -65,9 +65,9 @@ export const TypeMaps = { [TypesWithArray.String]: FormFieldType.Textarea, [TypesWithArray.Number]: FormFieldType.Number, [TypesWithArray.Boolean]: FormFieldType.Checkbox, - // [TypesWithArray.Object]: FormFieldType.Textarea, - // [TypesWithArray.ArrayString]: FormFieldType.Textarea, - // [TypesWithArray.ArrayNumber]: FormFieldType.Textarea, - // [TypesWithArray.ArrayBoolean]: FormFieldType.Textarea, - // [TypesWithArray.ArrayObject]: FormFieldType.Textarea, + [TypesWithArray.Object]: FormFieldType.Textarea, + [TypesWithArray.ArrayString]: FormFieldType.Textarea, + [TypesWithArray.ArrayNumber]: FormFieldType.Textarea, + [TypesWithArray.ArrayBoolean]: FormFieldType.Textarea, + [TypesWithArray.ArrayObject]: FormFieldType.Textarea, }; diff --git a/web/src/pages/agent/gobal-variable-sheet/hooks/use-form.tsx b/web/src/pages/agent/gobal-variable-sheet/hooks/use-form.tsx new file mode 100644 index 000000000..cb38012f3 --- /dev/null +++ b/web/src/pages/agent/gobal-variable-sheet/hooks/use-form.tsx @@ -0,0 +1,41 @@ +import { useFetchAgent } from '@/hooks/use-agent-request'; +import { GlobalVariableType } from '@/interfaces/database/agent'; +import { useCallback } from 'react'; +import { FieldValues } from 'react-hook-form'; +import { useSaveGraph } from '../../hooks/use-save-graph'; +import { TypesWithArray } from '../constant'; + +export const useHandleForm = () => { + const { data, refetch } = useFetchAgent(); + const { saveGraph, loading } = useSaveGraph(); + const handleObjectData = (value: any) => { + try { + return JSON.parse(value); + } catch (error) { + return value; + } + }; + const handleSubmit = useCallback(async (fieldValue: FieldValues) => { + const param = { + ...(data.dsl?.variables || {}), + [fieldValue.name]: { + ...fieldValue, + value: + fieldValue.type === TypesWithArray.Object || + fieldValue.type === TypesWithArray.ArrayObject + ? handleObjectData(fieldValue.value) + : fieldValue.value, + }, + } as Record; + + const res = await saveGraph(undefined, { + globalVariables: param, + }); + + if (res.code === 0) { + refetch(); + } + }, []); + + return { handleSubmit, loading }; +}; diff --git a/web/src/pages/agent/gobal-variable-sheet/hooks/use-object-fields.tsx b/web/src/pages/agent/gobal-variable-sheet/hooks/use-object-fields.tsx new file mode 100644 index 000000000..d8600d568 --- /dev/null +++ b/web/src/pages/agent/gobal-variable-sheet/hooks/use-object-fields.tsx @@ -0,0 +1,246 @@ +import { BlockButton, Button } from '@/components/ui/button'; +import { Input } from '@/components/ui/input'; +import { Segmented } from '@/components/ui/segmented'; +import { Editor } from '@monaco-editor/react'; +import { t } from 'i18next'; +import { Trash2, X } from 'lucide-react'; +import { useCallback } from 'react'; +import { FieldValues } from 'react-hook-form'; +import { z } from 'zod'; +import { TypesWithArray } from '../constant'; + +export const useObjectFields = () => { + const booleanRender = useCallback( + (field: FieldValues, className?: string) => { + const fieldValue = field.value ? true : false; + return ( + + ); + }, + [], + ); + + const objectRender = useCallback((field: FieldValues) => { + const fieldValue = + typeof field.value === 'object' + ? JSON.stringify(field.value, null, 2) + : JSON.stringify({}, null, 2); + console.log('object-render-field', field, fieldValue); + return ( + + ); + }, []); + + const objectValidate = useCallback((value: any) => { + try { + if (!JSON.parse(value)) { + throw new Error(t('knowledgeDetails.formatTypeError')); + } + return true; + } catch (e) { + throw new Error(t('knowledgeDetails.formatTypeError')); + } + }, []); + + const arrayStringRender = useCallback((field: FieldValues, type = 'text') => { + const values = Array.isArray(field.value) + ? field.value + : [type === 'number' ? 0 : '']; + return ( + <> + {values?.map((item: any, index: number) => ( +
+ { + const newValues = [...values]; + newValues[index] = e.target.value; + field.onChange(newValues); + }} + /> + +
+ ))} + { + field.onChange([...field.value, '']); + }} + > + {t('flow.add')} + + + ); + }, []); + + const arrayBooleanRender = useCallback( + (field: FieldValues) => { + // const values = field.value || [false]; + const values = Array.isArray(field.value) ? field.value : [false]; + return ( +
+ {values?.map((item: any, index: number) => ( +
+ {booleanRender( + { + value: item, + onChange: (value) => { + values[index] = !!value; + field.onChange(values); + }, + }, + 'bg-transparent', + )} + +
+ ))} + { + field.onChange([...field.value, false]); + }} + > + {t('flow.add')} + +
+ ); + }, + [booleanRender], + ); + + const arrayNumberRender = useCallback( + (field: FieldValues) => { + return arrayStringRender(field, 'number'); + }, + [arrayStringRender], + ); + + const arrayValidate = useCallback((value: any, type: string = 'string') => { + if (!Array.isArray(value) || !value.every((item) => typeof item === type)) { + throw new Error(t('flow.formatTypeError')); + } + return true; + }, []); + + const arrayStringValidate = useCallback( + (value: any) => { + return arrayValidate(value, 'string'); + }, + [arrayValidate], + ); + + const arrayNumberValidate = useCallback( + (value: any) => { + return arrayValidate(value, 'number'); + }, + [arrayValidate], + ); + + const arrayBooleanValidate = useCallback( + (value: any) => { + return arrayValidate(value, 'boolean'); + }, + [arrayValidate], + ); + + const handleRender = (value: TypesWithArray) => { + switch (value) { + case TypesWithArray.Boolean: + return booleanRender; + case TypesWithArray.Object: + case TypesWithArray.ArrayObject: + return objectRender; + case TypesWithArray.ArrayString: + return arrayStringRender; + case TypesWithArray.ArrayNumber: + return arrayNumberRender; + case TypesWithArray.ArrayBoolean: + return arrayBooleanRender; + default: + return undefined; + } + }; + const handleCustomValidate = (value: TypesWithArray) => { + switch (value) { + case TypesWithArray.Object: + case TypesWithArray.ArrayObject: + return objectValidate; + case TypesWithArray.ArrayString: + return arrayStringValidate; + case TypesWithArray.ArrayNumber: + return arrayNumberValidate; + case TypesWithArray.ArrayBoolean: + return arrayBooleanValidate; + default: + return undefined; + } + }; + const handleCustomSchema = (value: TypesWithArray) => { + switch (value) { + case TypesWithArray.ArrayString: + return z.array(z.string()); + case TypesWithArray.ArrayNumber: + return z.array(z.number()); + case TypesWithArray.ArrayBoolean: + return z.array(z.boolean()); + default: + return undefined; + } + }; + return { + objectRender, + objectValidate, + arrayStringRender, + arrayStringValidate, + arrayNumberRender, + booleanRender, + arrayBooleanRender, + arrayNumberValidate, + arrayBooleanValidate, + handleRender, + handleCustomValidate, + handleCustomSchema, + }; +}; diff --git a/web/src/pages/agent/gobal-variable-sheet/index.tsx b/web/src/pages/agent/gobal-variable-sheet/index.tsx index 454131638..51648b8d1 100644 --- a/web/src/pages/agent/gobal-variable-sheet/index.tsx +++ b/web/src/pages/agent/gobal-variable-sheet/index.tsx @@ -1,12 +1,6 @@ import { ConfirmDeleteDialog } from '@/components/confirm-delete-dialog'; -import { - DynamicForm, - DynamicFormRef, - FormFieldConfig, - FormFieldType, -} from '@/components/dynamic-form'; +import { FormFieldConfig } from '@/components/dynamic-form'; import { BlockButton, Button } from '@/components/ui/button'; -import { Modal } from '@/components/ui/modal/modal'; import { Sheet, SheetContent, @@ -19,117 +13,65 @@ import { GlobalVariableType } from '@/interfaces/database/agent'; import { cn } from '@/lib/utils'; import { t } from 'i18next'; import { Trash2 } from 'lucide-react'; -import { useEffect, useRef, useState } from 'react'; +import { useState } from 'react'; import { FieldValues } from 'react-hook-form'; import { useSaveGraph } from '../hooks/use-save-graph'; +import { AddVariableModal } from './component/add-variable-modal'; import { - GobalFormFields, - GobalVariableFormDefaultValues, + GlobalFormFields, + GlobalVariableFormDefaultValues, TypeMaps, TypesWithArray, -} from './contant'; +} from './constant'; +import { useObjectFields } from './hooks/use-object-fields'; -export type IGobalParamModalProps = { +export type IGlobalParamModalProps = { data: any; hideModal: (open: boolean) => void; }; -export const GobalParamSheet = (props: IGobalParamModalProps) => { +export const GlobalParamSheet = (props: IGlobalParamModalProps) => { const { hideModal } = props; const { data, refetch } = useFetchAgent(); - const [fields, setFields] = useState(GobalFormFields); const { visible, showModal, hideModal: hideAddModal } = useSetModalState(); + const [fields, setFields] = useState(GlobalFormFields); const [defaultValues, setDefaultValues] = useState( - GobalVariableFormDefaultValues, + GlobalVariableFormDefaultValues, ); - const formRef = useRef(null); + const { handleCustomValidate, handleCustomSchema, handleRender } = + useObjectFields(); + const { saveGraph } = useSaveGraph(); - const handleFieldUpdate = ( - fieldName: string, - updatedField: Partial, - ) => { - setFields((prevFields) => - prevFields.map((field) => - field.name === fieldName ? { ...field, ...updatedField } : field, - ), - ); - }; - - useEffect(() => { - const typefileld = fields.find((item) => item.name === 'type'); - - if (typefileld) { - typefileld.onChange = (value) => { - // setWatchType(value); - handleFieldUpdate('value', { - type: TypeMaps[value as keyof typeof TypeMaps], - }); - const values = formRef.current?.getValues(); - setTimeout(() => { - switch (value) { - case TypesWithArray.Boolean: - setDefaultValues({ ...values, value: false }); - break; - case TypesWithArray.Number: - setDefaultValues({ ...values, value: 0 }); - break; - default: - setDefaultValues({ ...values, value: '' }); - } - }, 0); - }; - } - }, [fields]); - - const { saveGraph, loading } = useSaveGraph(); - - const handleSubmit = async (value: FieldValues) => { - const param = { - ...(data.dsl?.variables || {}), - [value.name]: value, - } as Record; - - const res = await saveGraph(undefined, { - gobalVariables: param, - }); - - if (res.code === 0) { - refetch(); - } - hideAddModal(); - }; - - const handleDeleteGobalVariable = async (key: string) => { + const handleDeleteGlobalVariable = async (key: string) => { const param = { ...(data.dsl?.variables || {}), } as Record; delete param[key]; const res = await saveGraph(undefined, { - gobalVariables: param, + globalVariables: param, }); - console.log('delete gobal variable-->', res); if (res.code === 0) { refetch(); } }; - const handleEditGobalVariable = (item: FieldValues) => { - fields.forEach((field) => { - if (field.name === 'value') { - switch (item.type) { - // [TypesWithArray.String]: FormFieldType.Textarea, - // [TypesWithArray.Number]: FormFieldType.Number, - // [TypesWithArray.Boolean]: FormFieldType.Checkbox, - case TypesWithArray.Boolean: - field.type = FormFieldType.Checkbox; - break; - case TypesWithArray.Number: - field.type = FormFieldType.Number; - break; - default: - field.type = FormFieldType.Textarea; - } + const handleEditGlobalVariable = (item: FieldValues) => { + const newFields = fields.map((field) => { + let newField = field; + newField.render = undefined; + newField.schema = undefined; + newField.customValidate = undefined; + if (newField.name === 'value') { + newField = { + ...newField, + type: TypeMaps[item.type as keyof typeof TypeMaps], + render: handleRender(item.type), + customValidate: handleCustomValidate(item.type), + schema: handleCustomSchema(item.type), + }; } + return newField; }); + setFields(newFields); setDefaultValues(item); showModal(); }; @@ -149,8 +91,8 @@ export const GobalParamSheet = (props: IGobalParamModalProps) => {
{ - setFields(GobalFormFields); - setDefaultValues(GobalVariableFormDefaultValues); + setFields(GlobalFormFields); + setDefaultValues(GlobalVariableFormDefaultValues); showModal(); }} > @@ -167,7 +109,7 @@ export const GobalParamSheet = (props: IGobalParamModalProps) => { key={key} className="flex items-center gap-3 min-h-14 justify-between px-5 py-3 border border-border-default rounded-lg hover:bg-bg-card group" onClick={() => { - handleEditGobalVariable(item); + handleEditGlobalVariable(item); }} >
@@ -177,13 +119,23 @@ export const GobalParamSheet = (props: IGobalParamModalProps) => { {item.type}
-
- {item.value} -
+ {![ + TypesWithArray.Object, + TypesWithArray.ArrayObject, + TypesWithArray.ArrayString, + TypesWithArray.ArrayNumber, + TypesWithArray.ArrayBoolean, + ].includes(item.type as TypesWithArray) && ( +
+ + {item.value} + +
+ )}
handleDeleteGobalVariable(key)} + onOk={() => handleDeleteGlobalVariable(key)} >
- - { - console.log(data); - }} - defaultValues={defaultValues} - onFieldUpdate={handleFieldUpdate} - > -
- { - hideAddModal?.(); - }} - /> - { - handleSubmit(values); - // console.log(values); - // console.log(nodes, edges); - // handleOk(values); - }} - /> -
-
-
+ ); diff --git a/web/src/pages/agent/hooks/use-build-dsl.ts b/web/src/pages/agent/hooks/use-build-dsl.ts index 1a8569636..47ec1c225 100644 --- a/web/src/pages/agent/hooks/use-build-dsl.ts +++ b/web/src/pages/agent/hooks/use-build-dsl.ts @@ -4,7 +4,7 @@ import { RAGFlowNodeType } from '@/interfaces/database/flow'; import { useCallback } from 'react'; import { Operator } from '../constant'; import useGraphStore from '../store'; -import { buildDslComponentsByGraph, buildDslGobalVariables } from '../utils'; +import { buildDslComponentsByGraph, buildDslGlobalVariables } from '../utils'; export const useBuildDslData = () => { const { data } = useFetchAgent(); @@ -13,7 +13,7 @@ export const useBuildDslData = () => { const buildDslData = useCallback( ( currentNodes?: RAGFlowNodeType[], - otherParam?: { gobalVariables: Record }, + otherParam?: { globalVariables: Record }, ) => { const nodesToProcess = currentNodes ?? nodes; @@ -41,13 +41,13 @@ export const useBuildDslData = () => { data.dsl.components, ); - const gobalVariables = buildDslGobalVariables( + const globalVariables = buildDslGlobalVariables( data.dsl, - otherParam?.gobalVariables, + otherParam?.globalVariables, ); return { ...data.dsl, - ...gobalVariables, + ...globalVariables, graph: { nodes: filteredNodes, edges: filteredEdges }, components: dslComponents, }; diff --git a/web/src/pages/agent/hooks/use-save-graph.ts b/web/src/pages/agent/hooks/use-save-graph.ts index e59b99193..500baf716 100644 --- a/web/src/pages/agent/hooks/use-save-graph.ts +++ b/web/src/pages/agent/hooks/use-save-graph.ts @@ -21,7 +21,7 @@ export const useSaveGraph = (showMessage: boolean = true) => { const saveGraph = useCallback( async ( currentNodes?: RAGFlowNodeType[], - otherParam?: { gobalVariables: Record }, + otherParam?: { globalVariables: Record }, ) => { return setAgent({ id, diff --git a/web/src/pages/agent/index.tsx b/web/src/pages/agent/index.tsx index 21ecb22e7..b0d2f6f15 100644 --- a/web/src/pages/agent/index.tsx +++ b/web/src/pages/agent/index.tsx @@ -39,7 +39,7 @@ import { useParams } from 'umi'; import AgentCanvas from './canvas'; import { DropdownProvider } from './canvas/context'; import { Operator } from './constant'; -import { GobalParamSheet } from './gobal-variable-sheet'; +import { GlobalParamSheet } from './gobal-variable-sheet'; import { useCancelCurrentDataflow } from './hooks/use-cancel-dataflow'; import { useHandleExportJsonFile } from './hooks/use-export-json'; import { useFetchDataOnMount } from './hooks/use-fetch-data'; @@ -126,9 +126,9 @@ export default function Agent() { } = useSetModalState(); const { - visible: gobalParamSheetVisible, - showModal: showGobalParamSheet, - hideModal: hideGobalParamSheet, + visible: globalParamSheetVisible, + showModal: showGlobalParamSheet, + hideModal: hideGlobalParamSheet, } = useSetModalState(); const { @@ -216,7 +216,7 @@ export default function Agent() { showGobalParamSheet()} + onClick={() => showGlobalParamSheet()} loading={loading} > {t('flow.conversationVariable')} @@ -314,11 +314,11 @@ export default function Agent() { loading={pipelineRunning} > )} - {gobalParamSheetVisible && ( - + hideModal={hideGlobalParamSheet} + > )} ); diff --git a/web/src/pages/agent/utils.ts b/web/src/pages/agent/utils.ts index 487067ed8..3312b7236 100644 --- a/web/src/pages/agent/utils.ts +++ b/web/src/pages/agent/utils.ts @@ -348,30 +348,30 @@ export const buildDslComponentsByGraph = ( return components; }; -export const buildDslGobalVariables = ( +export const buildDslGlobalVariables = ( dsl: DSL, - gobalVariables?: Record, + globalVariables?: Record, ) => { - if (!gobalVariables) { + if (!globalVariables) { return { globals: dsl.globals, variables: dsl.variables || {} }; } - let gobalVariablesTemp: Record = {}; - let gobalSystem: Record = {}; + let globalVariablesTemp: Record = {}; + let globalSystem: Record = {}; Object.keys(dsl.globals)?.forEach((key) => { if (key.indexOf('sys') > -1) { - gobalSystem[key] = dsl.globals[key]; + globalSystem[key] = dsl.globals[key]; } }); - Object.keys(gobalVariables).forEach((key) => { - gobalVariablesTemp['env.' + key] = gobalVariables[key].value; + Object.keys(globalVariables).forEach((key) => { + globalVariablesTemp['env.' + key] = globalVariables[key].value; }); - const gobalVariablesResult = { - ...gobalSystem, - ...gobalVariablesTemp, + const globalVariablesResult = { + ...globalSystem, + ...globalVariablesTemp, }; - return { globals: gobalVariablesResult, variables: gobalVariables }; + return { globals: globalVariablesResult, variables: globalVariables }; }; export const receiveMessageError = (res: any) => diff --git a/web/src/pages/dataset/dataset-setting/configuration/common-item.tsx b/web/src/pages/dataset/dataset-setting/configuration/common-item.tsx index c63309c50..c6d18af13 100644 --- a/web/src/pages/dataset/dataset-setting/configuration/common-item.tsx +++ b/web/src/pages/dataset/dataset-setting/configuration/common-item.tsx @@ -7,11 +7,14 @@ import { FormMessage, } from '@/components/ui/form'; import { Radio } from '@/components/ui/radio'; +import { Spin } from '@/components/ui/spin'; import { Switch } from '@/components/ui/switch'; import { useTranslate } from '@/hooks/common-hooks'; import { cn } from '@/lib/utils'; +import { useMemo, useState } from 'react'; import { useFormContext } from 'react-hook-form'; import { + useHandleKbEmbedding, useHasParsedDocument, useSelectChunkMethodList, useSelectEmbeddingModelOptions, @@ -62,11 +65,17 @@ export function ChunkMethodItem(props: IProps) { /> ); } -export function EmbeddingModelItem({ line = 1, isEdit = true }: IProps) { +export function EmbeddingModelItem({ line = 1, isEdit }: IProps) { const { t } = useTranslate('knowledgeConfiguration'); const form = useFormContext(); const embeddingModelOptions = useSelectEmbeddingModelOptions(); + const { handleChange } = useHandleKbEmbedding(); const disabled = useHasParsedDocument(isEdit); + const oldValue = useMemo(() => { + const embdStr = form.getValues('embd_id'); + return embdStr || ''; + }, [form]); + const [loading, setLoading] = useState(false); return ( <> - + + { + field.onChange(value); + if (isEdit && disabled) { + setLoading(true); + const res = await handleChange({ + embed_id: value, + callback: field.onChange, + }); + if (res.code !== 0) { + field.onChange(oldValue); + } + setLoading(false); + } + }} + value={field.value} + options={embeddingModelOptions} + placeholder={t('embeddingModelPlaceholder')} + triggerClassName="!bg-bg-base" + /> + diff --git a/web/src/pages/dataset/dataset-setting/general-form.tsx b/web/src/pages/dataset/dataset-setting/general-form.tsx index b4a7b9635..110c03a3e 100644 --- a/web/src/pages/dataset/dataset-setting/general-form.tsx +++ b/web/src/pages/dataset/dataset-setting/general-form.tsx @@ -88,7 +88,7 @@ export function GeneralForm() { }} /> - + diff --git a/web/src/pages/dataset/dataset-setting/hooks.ts b/web/src/pages/dataset/dataset-setting/hooks.ts index 605f91e4d..f9efe1d08 100644 --- a/web/src/pages/dataset/dataset-setting/hooks.ts +++ b/web/src/pages/dataset/dataset-setting/hooks.ts @@ -4,10 +4,12 @@ import { useSetModalState } from '@/hooks/common-hooks'; import { useSelectLlmOptionsByModelType } from '@/hooks/llm-hooks'; import { useFetchKnowledgeBaseConfiguration } from '@/hooks/use-knowledge-request'; import { useSelectParserList } from '@/hooks/user-setting-hooks'; +import kbService from '@/services/knowledge-service'; import { useIsFetching } from '@tanstack/react-query'; import { pick } from 'lodash'; import { useCallback, useEffect, useState } from 'react'; import { UseFormReturn } from 'react-hook-form'; +import { useParams, useSearchParams } from 'umi'; import { z } from 'zod'; import { formSchema } from './form-schema'; @@ -98,3 +100,22 @@ export const useRenameKnowledgeTag = () => { showTagRenameModal: handleShowTagRenameModal, }; }; + +export const useHandleKbEmbedding = () => { + const { id } = useParams(); + const [searchParams] = useSearchParams(); + const knowledgeBaseId = searchParams.get('id') || id; + const handleChange = useCallback( + async ({ embed_id }: { embed_id: string }) => { + const res = await kbService.checkEmbedding({ + kb_id: knowledgeBaseId, + embd_id: embed_id, + }); + return res.data; + }, + [knowledgeBaseId], + ); + return { + handleChange, + }; +}; diff --git a/web/src/services/knowledge-service.ts b/web/src/services/knowledge-service.ts index 350fa4e2a..01b8da127 100644 --- a/web/src/services/knowledge-service.ts +++ b/web/src/services/knowledge-service.ts @@ -47,6 +47,7 @@ const { traceGraphRag, runRaptor, traceRaptor, + check_embedding, } = api; const methods = { @@ -214,6 +215,11 @@ const methods = { url: api.pipelineRerun, method: 'post', }, + + checkEmbedding: { + url: check_embedding, + method: 'post', + }, }; const kbService = registerServer(methods, request); diff --git a/web/src/utils/api.ts b/web/src/utils/api.ts index 0d97801ac..e0afdbeb3 100644 --- a/web/src/utils/api.ts +++ b/web/src/utils/api.ts @@ -49,6 +49,8 @@ export default { llm_tools: `${api_host}/plugin/llm_tools`, // knowledge base + + check_embedding: `${api_host}/kb/check_embedding`, kb_list: `${api_host}/kb/list`, create_kb: `${api_host}/kb/create`, update_kb: `${api_host}/kb/update`, From 5f59418ababc619aa61244dba6772dca424c507b Mon Sep 17 00:00:00 2001 From: redredrrred <1589289338@qq.com> Date: Fri, 14 Nov 2025 13:59:03 +0800 Subject: [PATCH 02/15] Remove leftover account and password from the code (#11248) Remove legacy accounts and passwords. ### What problem does this PR solve? Remove leftover account and password in agent/templates/sql_assistant.json ### Type of change - [x] Other (please describe): --- agent/templates/sql_assistant.json | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/agent/templates/sql_assistant.json b/agent/templates/sql_assistant.json index 92804abc6..6e7140196 100644 --- a/agent/templates/sql_assistant.json +++ b/agent/templates/sql_assistant.json @@ -83,10 +83,10 @@ "value": [] } }, - "password": "20010812Yy!", + "password": "", "port": 3306, "sql": "{Agent:WickedGoatsDivide@content}", - "username": "13637682833@163.com" + "username": "" } }, "upstream": [ @@ -527,10 +527,10 @@ "value": [] } }, - "password": "20010812Yy!", + "password": "", "port": 3306, "sql": "{Agent:WickedGoatsDivide@content}", - "username": "13637682833@163.com" + "username": "" }, "label": "ExeSQL", "name": "ExeSQL" From e27ff8d3d42ce726941f8494a1a428ebe76587de Mon Sep 17 00:00:00 2001 From: Billy Bao Date: Fri, 14 Nov 2025 13:59:54 +0800 Subject: [PATCH 03/15] Fix: rerank algorithm (#11266) ### What problem does this PR solve? Fix: rerank algorithm #11234 ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue) --- rag/nlp/search.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rag/nlp/search.py b/rag/nlp/search.py index f8b3d513f..4dbd9945c 100644 --- a/rag/nlp/search.py +++ b/rag/nlp/search.py @@ -347,7 +347,7 @@ class Dealer: ## For rank feature(tag_fea) scores. rank_fea = self._rank_feature_scores(rank_feature, sres) - return tkweight * (np.array(tksim)+rank_fea) + vtweight * vtsim, tksim, vtsim + return tkweight * np.array(tksim) + vtweight * vtsim + rank_fea, tksim, vtsim def hybrid_similarity(self, ans_embd, ins_embd, ans, inst): return self.qryr.hybrid_similarity(ans_embd, From b5f2cf16bcad7b1f9f9f10ff11323352680d02ff Mon Sep 17 00:00:00 2001 From: Lynn Date: Fri, 14 Nov 2025 15:52:28 +0800 Subject: [PATCH 04/15] Fix: check task executor alive and display status (#11270) ### What problem does this PR solve? Correctly check task executor alive and display status. ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue) --- admin/client/admin_client.py | 9 ++++++--- api/utils/health_utils.py | 3 ++- rag/utils/redis_conn.py | 2 +- 3 files changed, 9 insertions(+), 5 deletions(-) diff --git a/admin/client/admin_client.py b/admin/client/admin_client.py index b52e67494..0d04cb3b2 100644 --- a/admin/client/admin_client.py +++ b/admin/client/admin_client.py @@ -393,7 +393,9 @@ class AdminCLI(Cmd): print(f"Can't access {self.host}, port: {self.port}") def _format_service_detail_table(self, data): - if not any([isinstance(v, list) for v in data.values()]): + if isinstance(data, list): + return data + if not all([isinstance(v, list) for v in data.values()]): # normal table return data # handle task_executor heartbeats map, for example {'name': [{'done': 2, 'now': timestamp1}, {'done': 3, 'now': timestamp2}] @@ -404,7 +406,7 @@ class AdminCLI(Cmd): task_executor_list.append({ "task_executor_name": k, **heartbeats[0], - }) + } if heartbeats else {"task_executor_name": k}) return task_executor_list def _print_table_simple(self, data): @@ -415,7 +417,8 @@ class AdminCLI(Cmd): # handle single row data data = [data] - columns = list(data[0].keys()) + columns = list(set().union(*(d.keys() for d in data))) + columns.sort() col_widths = {} def get_string_width(text): diff --git a/api/utils/health_utils.py b/api/utils/health_utils.py index 88e5aaebb..0a7ab6e7a 100644 --- a/api/utils/health_utils.py +++ b/api/utils/health_utils.py @@ -173,7 +173,8 @@ def check_task_executor_alive(): heartbeats = [json.loads(heartbeat) for heartbeat in heartbeats] task_executor_heartbeats[task_executor_id] = heartbeats if task_executor_heartbeats: - return {"status": "alive", "message": task_executor_heartbeats} + status = "alive" if any(task_executor_heartbeats.values()) else "timeout" + return {"status": status, "message": task_executor_heartbeats} else: return {"status": "timeout", "message": "Not found any task executor."} except Exception as e: diff --git a/rag/utils/redis_conn.py b/rag/utils/redis_conn.py index 58b0fe15b..a8bc43b57 100644 --- a/rag/utils/redis_conn.py +++ b/rag/utils/redis_conn.py @@ -110,7 +110,7 @@ class RedisDB: info = self.REDIS.info() return { 'redis_version': info["redis_version"], - 'server_mode': info["server_mode"], + 'server_mode': info["server_mode"] if "server_mode" in info else info.get("redis_mode", ""), 'used_memory': info["used_memory_human"], 'total_system_memory': info["total_system_memory_human"], 'mem_fragmentation_ratio': info["mem_fragmentation_ratio"], From 12db62b9c736c8b9efba3ef58fa7151a0c50099b Mon Sep 17 00:00:00 2001 From: Stephen Hu <812791840@qq.com> Date: Fri, 14 Nov 2025 16:32:35 +0800 Subject: [PATCH 05/15] Refactor: improve mineru_parser get property logic (#11268) ### What problem does this PR solve? improve mineru_parser get property logic ### Type of change - [x] Refactoring --- deepdoc/parser/mineru_parser.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/deepdoc/parser/mineru_parser.py b/deepdoc/parser/mineru_parser.py index 3d4c9f149..bb663de0d 100644 --- a/deepdoc/parser/mineru_parser.py +++ b/deepdoc/parser/mineru_parser.py @@ -434,7 +434,7 @@ class MinerUParser(RAGFlowPdfParser): if not section.strip(): section = "FAILED TO PARSE TABLE" case MinerUContentType.IMAGE: - section = "".join(output["image_caption"]) + "\n" + "".join(output["image_footnote"]) + section = "".join(output.get(["image_caption"],[])) + "\n" + "".join(output.get(["image_footnote"],[])) case MinerUContentType.EQUATION: section = output["text"] case MinerUContentType.CODE: From db4fd19c8269a64f8d213a64c37f41e5325f22cd Mon Sep 17 00:00:00 2001 From: buua436 <66937541+buua436@users.noreply.github.com> Date: Fri, 14 Nov 2025 16:33:20 +0800 Subject: [PATCH 06/15] Feat:new component list operations (#11276) ### What problem does this PR solve? issue: https://github.com/infiniflow/ragflow/issues/10427 change: new component list operations ### Type of change - [x] New Feature (non-breaking change which adds functionality) --- agent/component/list_operations.py | 149 ++++++++++++++++++ web/src/constants/agent.tsx | 1 + web/src/locales/en.ts | 15 ++ web/src/locales/zh.ts | 15 ++ web/src/pages/agent/canvas/index.tsx | 2 + .../node/dropdown/accordion-operators.tsx | 1 + .../canvas/node/list-operations-node.tsx | 22 +++ web/src/pages/agent/constant/index.tsx | 31 ++++ .../agent/form-sheet/form-config-map.tsx | 4 + .../agent/form/list-operations-form/index.tsx | 140 ++++++++++++++++ web/src/pages/agent/hooks/use-add-node.ts | 2 + web/src/pages/agent/operator-icon.tsx | 3 +- web/src/pages/agent/utils.ts | 1 - 13 files changed, 384 insertions(+), 2 deletions(-) create mode 100644 agent/component/list_operations.py create mode 100644 web/src/pages/agent/canvas/node/list-operations-node.tsx create mode 100644 web/src/pages/agent/form/list-operations-form/index.tsx diff --git a/agent/component/list_operations.py b/agent/component/list_operations.py new file mode 100644 index 000000000..c29d79ea6 --- /dev/null +++ b/agent/component/list_operations.py @@ -0,0 +1,149 @@ +from abc import ABC +import os +from agent.component.base import ComponentBase, ComponentParamBase +from api.utils.api_utils import timeout + +class ListOperationsParam(ComponentParamBase): + """ + Define the List Operations component parameters. + """ + def __init__(self): + super().__init__() + self.query = "" + self.operations = "topN" + self.n=0 + self.sort_method = "asc" + self.filter = { + "operator": "=", + "value": "" + } + self.outputs = { + "result": { + "value": [], + "type": "Array of ?" + }, + "first": { + "value": "", + "type": "?" + }, + "last": { + "value": "", + "type": "?" + } + } + + def check(self): + self.check_empty(self.query, "query") + self.check_valid_value(self.operations, "Support operations", ["topN","head","tail","filter","sort","drop_duplicates"]) + + def get_input_form(self) -> dict[str, dict]: + return {} + + +class ListOperations(ComponentBase,ABC): + component_name = "ListOperations" + + @timeout(int(os.environ.get("COMPONENT_EXEC_TIMEOUT", 10*60))) + def _invoke(self, **kwargs): + self.input_objects=[] + inputs = getattr(self._param, "query", None) + self.inputs=self._canvas.get_variable_value(inputs) + self.set_input_value(inputs, self.inputs) + if self._param.operations == "topN": + self._topN() + elif self._param.operations == "head": + self._head() + elif self._param.operations == "tail": + self._tail() + elif self._param.operations == "filter": + self._filter() + elif self._param.operations == "sort": + self._sort() + elif self._param.operations == "drop_duplicates": + self._drop_duplicates() + + + def _coerce_n(self): + try: + return int(getattr(self._param, "n", 0)) + except Exception: + return 0 + + def _set_outputs(self, outputs): + self._param.outputs["result"]["value"] = outputs + self._param.outputs["first"]["value"] = outputs[0] if outputs else None + self._param.outputs["last"]["value"] = outputs[-1] if outputs else None + + def _topN(self): + n = self._coerce_n() + if n < 1: + outputs = [] + else: + n = min(n, len(self.inputs)) + outputs = self.inputs[:n] + self._set_outputs(outputs) + + def _head(self): + n = self._coerce_n() + if 1 <= n <= len(self.inputs): + outputs = [self.inputs[n - 1]] + else: + outputs = [] + self._set_outputs(outputs) + + def _tail(self): + n = self._coerce_n() + if 1 <= n <= len(self.inputs): + outputs = [self.inputs[-n]] + else: + outputs = [] + self._set_outputs(outputs) + + def _filter(self): + self._set_outputs([i for i in self.inputs if self._eval(self._norm(i),self._param.filter["operator"],self._param.filter["value"])]) + + def _norm(self,v): + s = "" if v is None else str(v) + return s + + def _eval(self, v, operator, value): + if operator == "=": + return v == value + elif operator == "≠": + return v != value + elif operator == "contains": + return value in v + elif operator == "start with": + return v.startswith(value) + elif operator == "end with": + return v.endswith(value) + else: + return False + + def _sort(self): + if self._param.sort_method == "asc": + self._set_outputs(sorted(self.inputs)) + elif self._param.sort_method == "desc": + self._set_outputs(sorted(self.inputs, reverse=True)) + + def _drop_duplicates(self): + seen = set() + outs = [] + for item in self.inputs: + k = self._hashable(item) + if k in seen: + continue + seen.add(k) + outs.append(item) + self._set_outputs(outs) + + def _hashable(self,x): + if isinstance(x, dict): + return tuple(sorted((k, self._hashable(v)) for k, v in x.items())) + if isinstance(x, (list, tuple)): + return tuple(self._hashable(v) for v in x) + if isinstance(x, set): + return tuple(sorted(self._hashable(v) for v in x)) + return x + def thoughts(self) -> str: + return "ListOperation in progress" diff --git a/web/src/constants/agent.tsx b/web/src/constants/agent.tsx index 6ee8ab516..3a8411ce3 100644 --- a/web/src/constants/agent.tsx +++ b/web/src/constants/agent.tsx @@ -109,6 +109,7 @@ export enum Operator { SearXNG = 'SearXNG', Placeholder = 'Placeholder', DataOperations = 'DataOperations', + ListOperations = 'ListOperations', VariableAssigner = 'VariableAssigner', VariableAggregator = 'VariableAggregator', File = 'File', // pipeline diff --git a/web/src/locales/en.ts b/web/src/locales/en.ts index 9a0569ab5..b9f374f7c 100644 --- a/web/src/locales/en.ts +++ b/web/src/locales/en.ts @@ -1591,6 +1591,8 @@ This delimiter is used to split the input text into several text pieces echo of codeDescription: 'It allows developers to write custom Python logic.', dataOperations: 'Data operations', dataOperationsDescription: 'Perform various operations on a Data object.', + listOperations: 'List operations', + listOperationsDescription: 'Perform operations on a list.', variableAssigner: 'Variable assigner', variableAssignerDescription: 'This component performs operations on Data objects, including extracting, filtering, and editing keys and values in the Data.', @@ -1806,6 +1808,19 @@ Important structured information may include: names, dates, locations, events, k removeKeys: 'Remove keys', renameKeys: 'Rename keys', }, + ListOperationsOptions: { + topN: 'Top N', + head: 'Head', + tail: 'Tail', + sort: 'Sort', + filter: 'Filter', + dropDuplicates: 'Drop duplicates', + }, + sortMethod: 'Sort method', + SortMethodOptions: { + asc: 'Ascending', + desc: 'Descending', + }, }, llmTools: { bad_calculator: { diff --git a/web/src/locales/zh.ts b/web/src/locales/zh.ts index c065986f2..ce21c5a30 100644 --- a/web/src/locales/zh.ts +++ b/web/src/locales/zh.ts @@ -1508,6 +1508,8 @@ General:实体和关系提取提示来自 GitHub - microsoft/graphrag:基于 codeDescription: '它允许开发人员编写自定义 Python 逻辑。', dataOperations: '数据操作', dataOperationsDescription: '对数据对象执行各种操作。', + listOperations: '列表操作', + listOperationsDescription: '对列表对象执行各种操作。', variableAssigner: '变量赋值器', variableAssignerDescription: '此组件对数据对象执行操作,包括提取、筛选和编辑数据中的键和值。', @@ -1679,6 +1681,19 @@ Tokenizer 会根据所选方式将内容存储为对应的数据结构。`, removeKeys: '删除键', renameKeys: '重命名键', }, + ListOperationsOptions: { + topN: '取前N项', + head: '取前第N项', + tail: '取后第N项', + sort: '排序', + filter: '筛选', + dropDuplicates: '去重', + }, + sortMethod: '排序方式', + SortMethodOptions: { + asc: '升序', + desc: '降序', + }, }, footer: { profile: 'All rights reserved @ React', diff --git a/web/src/pages/agent/canvas/index.tsx b/web/src/pages/agent/canvas/index.tsx index 5f78e8185..f2fc983e2 100644 --- a/web/src/pages/agent/canvas/index.tsx +++ b/web/src/pages/agent/canvas/index.tsx @@ -61,6 +61,7 @@ import { FileNode } from './node/file-node'; import { InvokeNode } from './node/invoke-node'; import { IterationNode, IterationStartNode } from './node/iteration-node'; import { KeywordNode } from './node/keyword-node'; +import { ListOperationsNode } from './node/list-operations-node'; import { MessageNode } from './node/message-node'; import NoteNode from './node/note-node'; import ParserNode from './node/parser-node'; @@ -101,6 +102,7 @@ export const nodeTypes: NodeTypes = { splitterNode: SplitterNode, contextNode: ExtractorNode, dataOperationsNode: DataOperationsNode, + listOperationsNode: ListOperationsNode, variableAssignerNode: VariableAssignerNode, variableAggregatorNode: VariableAggregatorNode, }; diff --git a/web/src/pages/agent/canvas/node/dropdown/accordion-operators.tsx b/web/src/pages/agent/canvas/node/dropdown/accordion-operators.tsx index 232ab78ff..8fd96f55f 100644 --- a/web/src/pages/agent/canvas/node/dropdown/accordion-operators.tsx +++ b/web/src/pages/agent/canvas/node/dropdown/accordion-operators.tsx @@ -79,6 +79,7 @@ export function AccordionOperators({ Operator.Code, Operator.StringTransform, Operator.DataOperations, + Operator.ListOperations, // Operator.VariableAssigner, Operator.VariableAggregator, ]} diff --git a/web/src/pages/agent/canvas/node/list-operations-node.tsx b/web/src/pages/agent/canvas/node/list-operations-node.tsx new file mode 100644 index 000000000..5b2778c92 --- /dev/null +++ b/web/src/pages/agent/canvas/node/list-operations-node.tsx @@ -0,0 +1,22 @@ +import { BaseNode } from '@/interfaces/database/agent'; +import { NodeProps } from '@xyflow/react'; +import { camelCase } from 'lodash'; +import { useTranslation } from 'react-i18next'; +import { RagNode } from '.'; +import { ListOperationsFormSchemaType } from '../../form/list-operations-form'; +import { LabelCard } from './card'; + +export function ListOperationsNode({ + ...props +}: NodeProps>) { + const { data } = props; + const { t } = useTranslation(); + + return ( + + + {t(`flow.ListOperationsOptions.${camelCase(data.form?.operations)}`)} + + + ); +} diff --git a/web/src/pages/agent/constant/index.tsx b/web/src/pages/agent/constant/index.tsx index 45341abf4..7aad5e4a3 100644 --- a/web/src/pages/agent/constant/index.tsx +++ b/web/src/pages/agent/constant/index.tsx @@ -595,6 +595,35 @@ export const initialDataOperationsValues = { }, }, }; +export enum SortMethod { + Asc = 'asc', + Desc = 'desc', +} + +export enum ListOperations { + TopN = 'topN', + Head = 'head', + Tail = 'tail', + Filter = 'filter', + Sort = 'sort', + DropDuplicates = 'drop_duplicates', +} + +export const initialListOperationsValues = { + query: '', + operations: ListOperations.TopN, + outputs: { + result: { + type: 'Array', + }, + first: { + type: '?', + }, + last: { + type: '?', + }, + }, +}; export const initialVariableAssignerValues = {}; @@ -673,6 +702,7 @@ export const RestrictedUpstreamMap = { [Operator.Tool]: [Operator.Begin], [Operator.Placeholder]: [Operator.Begin], [Operator.DataOperations]: [Operator.Begin], + [Operator.ListOperations]: [Operator.Begin], [Operator.Parser]: [Operator.Begin], // pipeline [Operator.Splitter]: [Operator.Begin], [Operator.HierarchicalMerger]: [Operator.Begin], @@ -729,6 +759,7 @@ export const NodeMap = { [Operator.HierarchicalMerger]: 'splitterNode', [Operator.Extractor]: 'contextNode', [Operator.DataOperations]: 'dataOperationsNode', + [Operator.ListOperations]: 'listOperationsNode', [Operator.VariableAssigner]: 'variableAssignerNode', [Operator.VariableAggregator]: 'variableAggregatorNode', }; diff --git a/web/src/pages/agent/form-sheet/form-config-map.tsx b/web/src/pages/agent/form-sheet/form-config-map.tsx index c291e4e05..37ab4cf2f 100644 --- a/web/src/pages/agent/form-sheet/form-config-map.tsx +++ b/web/src/pages/agent/form-sheet/form-config-map.tsx @@ -21,6 +21,7 @@ import IterationForm from '../form/iteration-form'; import IterationStartForm from '../form/iteration-start-from'; import Jin10Form from '../form/jin10-form'; import KeywordExtractForm from '../form/keyword-extract-form'; +import ListOperationsForm from '../form/list-operations-form'; import MessageForm from '../form/message-form'; import ParserForm from '../form/parser-form'; import PubMedForm from '../form/pubmed-form'; @@ -184,6 +185,9 @@ export const FormConfigMap = { [Operator.DataOperations]: { component: DataOperationsForm, }, + [Operator.ListOperations]: { + component: ListOperationsForm, + }, [Operator.VariableAssigner]: { component: VariableAssignerForm, }, diff --git a/web/src/pages/agent/form/list-operations-form/index.tsx b/web/src/pages/agent/form/list-operations-form/index.tsx new file mode 100644 index 000000000..5803fe055 --- /dev/null +++ b/web/src/pages/agent/form/list-operations-form/index.tsx @@ -0,0 +1,140 @@ +import NumberInput from '@/components/originui/number-input'; +import { SelectWithSearch } from '@/components/originui/select-with-search'; +import { RAGFlowFormItem } from '@/components/ragflow-form'; +import { + Form, + FormControl, + FormField, + FormItem, + FormLabel, + FormMessage, +} from '@/components/ui/form'; +import { Separator } from '@/components/ui/separator'; +import { useBuildSwitchOperatorOptions } from '@/hooks/logic-hooks/use-build-operator-options'; +import { buildOptions } from '@/utils/form'; +import { zodResolver } from '@hookform/resolvers/zod'; +import { memo } from 'react'; +import { useForm, useWatch } from 'react-hook-form'; +import { useTranslation } from 'react-i18next'; +import { z } from 'zod'; +import { + DataOperationsOperatorOptions, + JsonSchemaDataType, + ListOperations, + SortMethod, + initialListOperationsValues, +} from '../../constant'; +import { useFormValues } from '../../hooks/use-form-values'; +import { useWatchFormChange } from '../../hooks/use-watch-form-change'; +import { INextOperatorForm } from '../../interface'; +import { buildOutputList } from '../../utils/build-output-list'; +import { FormWrapper } from '../components/form-wrapper'; +import { Output, OutputSchema } from '../components/output'; +import { PromptEditor } from '../components/prompt-editor'; +import { QueryVariable } from '../components/query-variable'; + +export const RetrievalPartialSchema = { + query: z.string(), + operations: z.string(), + n: z.number().int().min(0).optional(), + sort_method: z.string().optional(), + filter: z + .object({ + value: z.string().optional(), + operator: z.string().optional(), + }) + .optional(), + ...OutputSchema, +}; + +export const FormSchema = z.object(RetrievalPartialSchema); + +export type ListOperationsFormSchemaType = z.infer; + +const outputList = buildOutputList(initialListOperationsValues.outputs); + +function ListOperationsForm({ node }: INextOperatorForm) { + const { t } = useTranslation(); + + const defaultValues = useFormValues(initialListOperationsValues, node); + + const form = useForm({ + defaultValues: defaultValues, + mode: 'onChange', + resolver: zodResolver(FormSchema), + shouldUnregister: true, + }); + + const operations = useWatch({ control: form.control, name: 'operations' }); + + const ListOperationsOptions = buildOptions( + ListOperations, + t, + `flow.ListOperationsOptions`, + true, + ); + const SortMethodOptions = buildOptions( + SortMethod, + t, + `flow.SortMethodOptions`, + true, + ); + const operatorOptions = useBuildSwitchOperatorOptions( + DataOperationsOperatorOptions, + ); + useWatchFormChange(node?.id, form, true); + + return ( +
+ + + + + + + {[ + ListOperations.TopN, + ListOperations.Head, + ListOperations.Tail, + ].includes(operations as ListOperations) && ( + ( + + {t('flowNum')} + + + + + + )} + /> + )} + {[ListOperations.Sort].includes(operations as ListOperations) && ( + + + + )} + {[ListOperations.Filter].includes(operations as ListOperations) && ( +
+ + + + + + + +
+ )} + +
+
+ ); +} + +export default memo(ListOperationsForm); diff --git a/web/src/pages/agent/hooks/use-add-node.ts b/web/src/pages/agent/hooks/use-add-node.ts index ed092a01b..44091f1b1 100644 --- a/web/src/pages/agent/hooks/use-add-node.ts +++ b/web/src/pages/agent/hooks/use-add-node.ts @@ -31,6 +31,7 @@ import { initialIterationValues, initialJin10Values, initialKeywordExtractValues, + initialListOperationsValues, initialMessageValues, initialNoteValues, initialParserValues, @@ -129,6 +130,7 @@ export const useInitializeOperatorParams = () => { prompts: t('flow.prompts.user.summary'), }, [Operator.DataOperations]: initialDataOperationsValues, + [Operator.ListOperations]: initialListOperationsValues, [Operator.VariableAssigner]: initialVariableAssignerValues, [Operator.VariableAggregator]: initialVariableAggregatorValues, }; diff --git a/web/src/pages/agent/operator-icon.tsx b/web/src/pages/agent/operator-icon.tsx index a7ece8ead..44fe9d01a 100644 --- a/web/src/pages/agent/operator-icon.tsx +++ b/web/src/pages/agent/operator-icon.tsx @@ -14,7 +14,7 @@ import { ReactComponent as YahooFinanceIcon } from '@/assets/svg/yahoo-finance.s import { IconFont } from '@/components/icon-font'; import { cn } from '@/lib/utils'; -import { Equal, FileCode, HousePlus, Variable } from 'lucide-react'; +import { Columns3, Equal, FileCode, HousePlus, Variable } from 'lucide-react'; import { Operator } from './constant'; interface IProps { @@ -57,6 +57,7 @@ export const SVGIconMap = { }; export const LucideIconMap = { [Operator.DataOperations]: FileCode, + [Operator.ListOperations]: Columns3, [Operator.VariableAssigner]: Equal, [Operator.VariableAggregator]: Variable, }; diff --git a/web/src/pages/agent/utils.ts b/web/src/pages/agent/utils.ts index 3312b7236..a7d4248ff 100644 --- a/web/src/pages/agent/utils.ts +++ b/web/src/pages/agent/utils.ts @@ -328,7 +328,6 @@ export const buildDslComponentsByGraph = ( case Operator.DataOperations: params = transformDataOperationsParams(params); break; - default: break; } From 996b5fe14ec40ac56deb4021111341f941862581 Mon Sep 17 00:00:00 2001 From: chanx <1243304602@qq.com> Date: Fri, 14 Nov 2025 19:50:01 +0800 Subject: [PATCH 07/15] Fix: Added the ability to download files in the agent message reply function. (#11281) ### What problem does this PR solve? Fix: Added the ability to download files in the agent message reply function. ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue) --- .../components/next-message-item/index.tsx | 30 +++++++++++++- web/src/hooks/use-send-message.ts | 7 +++- web/src/interfaces/database/chat.ts | 3 ++ web/src/locales/en.ts | 2 + web/src/locales/zh.ts | 2 + .../agent/chat/use-send-agent-message.ts | 8 +++- web/src/pages/agent/constant/index.tsx | 8 ++++ .../pages/agent/form/message-form/index.tsx | 41 ++++++++++++++++++- .../agent/form/message-form/use-values.ts | 3 +- web/src/services/file-manager-service.ts | 7 ++++ web/src/utils/api.ts | 2 + 11 files changed, 108 insertions(+), 5 deletions(-) diff --git a/web/src/components/next-message-item/index.tsx b/web/src/components/next-message-item/index.tsx index 5dd6cdf60..706553b67 100644 --- a/web/src/components/next-message-item/index.tsx +++ b/web/src/components/next-message-item/index.tsx @@ -18,8 +18,10 @@ import { cn } from '@/lib/utils'; import { AgentChatContext } from '@/pages/agent/context'; import { WorkFlowTimeline } from '@/pages/agent/log-sheet/workflow-timeline'; import { IMessage } from '@/pages/chat/interface'; +import { downloadFile } from '@/services/file-manager-service'; +import { downloadFileFromBlob } from '@/utils/file-util'; import { isEmpty } from 'lodash'; -import { Atom, ChevronDown, ChevronUp } from 'lucide-react'; +import { Atom, ChevronDown, ChevronUp, Download } from 'lucide-react'; import MarkdownContent from '../next-markdown-content'; import { RAGFlowAvatar } from '../ragflow-avatar'; import { useTheme } from '../theme-provider'; @@ -245,6 +247,32 @@ function MessageItem({ {isUser && ( )} + {isAssistant && item.attachment && item.attachment.doc_id && ( +
+ +
+ )} diff --git a/web/src/hooks/use-send-message.ts b/web/src/hooks/use-send-message.ts index 8d602f2e0..e956217f3 100644 --- a/web/src/hooks/use-send-message.ts +++ b/web/src/hooks/use-send-message.ts @@ -44,9 +44,14 @@ export interface IInputData { inputs: Record; tips: string; } - +export interface IAttachment { + doc_id: string; + format: string; + file_name: string; +} export interface IMessageData { content: string; + outputs: any; start_to_think?: boolean; end_to_think?: boolean; } diff --git a/web/src/interfaces/database/chat.ts b/web/src/interfaces/database/chat.ts index 62bcb4696..eb6eebe89 100644 --- a/web/src/interfaces/database/chat.ts +++ b/web/src/interfaces/database/chat.ts @@ -1,4 +1,5 @@ import { MessageType } from '@/constants/chat'; +import { IAttachment } from '@/hooks/use-send-message'; export interface PromptConfig { empty_response: string; @@ -97,6 +98,7 @@ export interface Message { data?: any; files?: File[]; chatBoxId?: string; + attachment?: IAttachment; } export interface IReferenceChunk { @@ -126,6 +128,7 @@ export interface IReferenceObject { export interface IAnswer { answer: string; + attachment?: IAttachment; reference?: IReference; conversationId?: string; prompt?: string; diff --git a/web/src/locales/en.ts b/web/src/locales/en.ts index b9f374f7c..e2035a378 100644 --- a/web/src/locales/en.ts +++ b/web/src/locales/en.ts @@ -1009,6 +1009,8 @@ Example: general/v2/`, pleaseUploadAtLeastOneFile: 'Please upload at least one file', }, flow: { + downloadFileTypeTip: 'The file type to download', + downloadFileType: 'Download file type', formatTypeError: 'Format or type error', variableNameMessage: 'Variable name can only contain letters and underscores', diff --git a/web/src/locales/zh.ts b/web/src/locales/zh.ts index ce21c5a30..301719117 100644 --- a/web/src/locales/zh.ts +++ b/web/src/locales/zh.ts @@ -956,6 +956,8 @@ General:实体和关系提取提示来自 GitHub - microsoft/graphrag:基于 pleaseUploadAtLeastOneFile: '请上传至少一个文件', }, flow: { + downloadFileTypeTip: '文件下载的类型', + downloadFileType: '文件类型', formatTypeError: '格式或类型错误', variableNameMessage: '名称只能包含字母和下划线', variableDescription: '变量的描述', diff --git a/web/src/pages/agent/chat/use-send-agent-message.ts b/web/src/pages/agent/chat/use-send-agent-message.ts index a0460fd71..5fc49d4ce 100644 --- a/web/src/pages/agent/chat/use-send-agent-message.ts +++ b/web/src/pages/agent/chat/use-send-agent-message.ts @@ -5,6 +5,7 @@ import { useSelectDerivedMessages, } from '@/hooks/logic-hooks'; import { + IAttachment, IEventList, IInputEvent, IMessageEndData, @@ -75,9 +76,13 @@ export function findMessageFromList(eventList: IEventList) { nextContent += ''; } + const workflowFinished = eventList.find( + (x) => x.event === MessageEventType.WorkflowFinished, + ) as IMessageEvent; return { id: eventList[0]?.message_id, content: nextContent, + attachment: workflowFinished?.data?.outputs?.attachment || {}, }; } @@ -388,12 +393,13 @@ export const useSendAgentMessage = ({ }, [sendMessageInTaskMode]); useEffect(() => { - const { content, id } = findMessageFromList(answerList); + const { content, id, attachment } = findMessageFromList(answerList); const inputAnswer = findInputFromList(answerList); const answer = content || getLatestError(answerList); if (answerList.length > 0) { addNewestOneAnswer({ answer: answer ?? '', + attachment: attachment as IAttachment, id: id, ...inputAnswer, }); diff --git a/web/src/pages/agent/constant/index.tsx b/web/src/pages/agent/constant/index.tsx index 7aad5e4a3..3a161d87d 100644 --- a/web/src/pages/agent/constant/index.tsx +++ b/web/src/pages/agent/constant/index.tsx @@ -417,6 +417,7 @@ export const initialIterationValues = { items_ref: '', outputs: {}, }; + export const initialIterationStartValues = { outputs: { item: { @@ -845,3 +846,10 @@ export enum JsonSchemaDataType { Array = 'array', Object = 'object', } + +export enum ExportFileType { + PDF = 'pdf', + HTML = 'html', + Markdown = 'md', + DOCX = 'docx', +} diff --git a/web/src/pages/agent/form/message-form/index.tsx b/web/src/pages/agent/form/message-form/index.tsx index e93735ee7..31b52659e 100644 --- a/web/src/pages/agent/form/message-form/index.tsx +++ b/web/src/pages/agent/form/message-form/index.tsx @@ -8,12 +8,14 @@ import { FormLabel, FormMessage, } from '@/components/ui/form'; +import { RAGFlowSelect } from '@/components/ui/select'; import { zodResolver } from '@hookform/resolvers/zod'; import { X } from 'lucide-react'; import { memo } from 'react'; import { useFieldArray, useForm } from 'react-hook-form'; import { useTranslation } from 'react-i18next'; import { z } from 'zod'; +import { ExportFileType } from '../../constant'; import { INextOperatorForm } from '../../interface'; import { FormWrapper } from '../components/form-wrapper'; import { PromptEditor } from '../components/prompt-editor'; @@ -33,10 +35,14 @@ function MessageForm({ node }: INextOperatorForm) { }), ) .optional(), + output_format: z.string().optional(), }); const form = useForm({ - defaultValues: values, + defaultValues: { + ...values, + output_format: values.output_format, + }, resolver: zodResolver(FormSchema), }); @@ -50,6 +56,39 @@ function MessageForm({ node }: INextOperatorForm) { return (
+ + + + {t('flow.downloadFileType')} + + ( + + + { + return { + value: + ExportFileType[ + key as keyof typeof ExportFileType + ], + label: key, + }; + }, + )} + {...field} + onValueChange={field.onChange} + placeholder={t('flow.messagePlaceholder')} + > + + + )} + /> + + {t('flow.msg')} diff --git a/web/src/pages/agent/form/message-form/use-values.ts b/web/src/pages/agent/form/message-form/use-values.ts index 6a90881be..0cece91fc 100644 --- a/web/src/pages/agent/form/message-form/use-values.ts +++ b/web/src/pages/agent/form/message-form/use-values.ts @@ -1,7 +1,7 @@ import { RAGFlowNodeType } from '@/interfaces/database/flow'; import { isEmpty } from 'lodash'; import { useMemo } from 'react'; -import { initialMessageValues } from '../../constant'; +import { ExportFileType, initialMessageValues } from '../../constant'; import { convertToObjectArray } from '../../utils'; export function useValues(node?: RAGFlowNodeType) { @@ -15,6 +15,7 @@ export function useValues(node?: RAGFlowNodeType) { return { ...formData, content: convertToObjectArray(formData.content), + output_format: formData.output_format || ExportFileType.PDF, }; }, [node]); diff --git a/web/src/services/file-manager-service.ts b/web/src/services/file-manager-service.ts index 8342117c9..8c5eb6c4e 100644 --- a/web/src/services/file-manager-service.ts +++ b/web/src/services/file-manager-service.ts @@ -13,6 +13,7 @@ const { get_document_file, getFile, moveFile, + get_document_file_download, } = api; const methods = { @@ -65,4 +66,10 @@ const fileManagerService = registerServer( request, ); +export const downloadFile = (data: { docId: string; ext: string }) => { + return request.get(get_document_file_download(data.docId), { + params: { ext: data.ext }, + responseType: 'blob', + }); +}; export default fileManagerService; diff --git a/web/src/utils/api.ts b/web/src/utils/api.ts index e0afdbeb3..c4ce8205f 100644 --- a/web/src/utils/api.ts +++ b/web/src/utils/api.ts @@ -100,6 +100,8 @@ export default { document_change_parser: `${api_host}/document/change_parser`, document_thumbnails: `${api_host}/document/thumbnails`, get_document_file: `${api_host}/document/get`, + get_document_file_download: (docId: string) => + `${api_host}/document/download/${docId}`, document_upload: `${api_host}/document/upload`, web_crawl: `${api_host}/document/web_crawl`, document_infos: `${api_host}/document/infos`, From cd55f6c1b822d84e23a2199ae5f71eac5671d736 Mon Sep 17 00:00:00 2001 From: buua436 <66937541+buua436@users.noreply.github.com> Date: Fri, 14 Nov 2025 19:50:29 +0800 Subject: [PATCH 08/15] Fix:ListOperations does not support sorting arrays of objects. (#11278) ### What problem does this PR solve? pr: #11276 change: ListOperations does not support sorting arrays of objects. ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue) --- agent/component/list_operations.py | 25 +++++++++++++++++++++---- 1 file changed, 21 insertions(+), 4 deletions(-) diff --git a/agent/component/list_operations.py b/agent/component/list_operations.py index c29d79ea6..9ae8c2e04 100644 --- a/agent/component/list_operations.py +++ b/agent/component/list_operations.py @@ -121,10 +121,26 @@ class ListOperations(ComponentBase,ABC): return False def _sort(self): - if self._param.sort_method == "asc": - self._set_outputs(sorted(self.inputs)) - elif self._param.sort_method == "desc": - self._set_outputs(sorted(self.inputs, reverse=True)) + items = self.inputs or [] + method = getattr(self._param, "sort_method", "asc") or "asc" + reverse = method == "desc" + + if not items: + self._set_outputs([]) + return + + first = items[0] + + if isinstance(first, dict): + outputs = sorted( + items, + key=lambda x: self._hashable(x), + reverse=reverse, + ) + else: + outputs = sorted(items, reverse=reverse) + + self._set_outputs(outputs) def _drop_duplicates(self): seen = set() @@ -145,5 +161,6 @@ class ListOperations(ComponentBase,ABC): if isinstance(x, set): return tuple(sorted(self._hashable(v) for v in x)) return x + def thoughts(self) -> str: return "ListOperation in progress" From 68e3b33ae4b8043620d2ea901174ec75054b1a15 Mon Sep 17 00:00:00 2001 From: Billy Bao Date: Fri, 14 Nov 2025 19:52:11 +0800 Subject: [PATCH 09/15] Feat: extract message output to file (#11251) ### What problem does this PR solve? Feat: extract message output to file ### Type of change - [x] New Feature (non-breaking change which adds functionality) --- Dockerfile | 4 ++- agent/canvas.py | 4 +++ agent/component/message.py | 70 +++++++++++++++++++++++++++++++++++++- api/apps/document_app.py | 17 +++++++++ pyproject.toml | 1 + uv.lock | 10 ++++++ 6 files changed, 104 insertions(+), 2 deletions(-) diff --git a/Dockerfile b/Dockerfile index b16a0d7d5..239330183 100644 --- a/Dockerfile +++ b/Dockerfile @@ -51,7 +51,9 @@ RUN --mount=type=cache,id=ragflow_apt,target=/var/cache/apt,sharing=locked \ apt install -y libpython3-dev libgtk-4-1 libnss3 xdg-utils libgbm-dev && \ apt install -y libjemalloc-dev && \ apt install -y python3-pip pipx nginx unzip curl wget git vim less && \ - apt install -y ghostscript + apt install -y ghostscript && \ + apt install -y pandoc && \ + apt install -y texlive RUN if [ "$NEED_MIRROR" == "1" ]; then \ pip3 config set global.index-url https://pypi.tuna.tsinghua.edu.cn/simple && \ diff --git a/agent/canvas.py b/agent/canvas.py index bc7a45e3e..f262cd597 100644 --- a/agent/canvas.py +++ b/agent/canvas.py @@ -408,6 +408,10 @@ class Canvas(Graph): else: yield decorate("message", {"content": cpn_obj.output("content")}) cite = re.search(r"\[ID:[ 0-9]+\]", cpn_obj.output("content")) + + if isinstance(cpn_obj.output("attachment"), tuple): + yield decorate("message", {"attachment": cpn_obj.output("attachment")}) + yield decorate("message_end", {"reference": self.get_reference() if cite else None}) while partials: diff --git a/agent/component/message.py b/agent/component/message.py index 641198083..555534610 100644 --- a/agent/component/message.py +++ b/agent/component/message.py @@ -17,6 +17,9 @@ import json import os import random import re +import pypandoc +import logging +import tempfile from functools import partial from typing import Any @@ -24,7 +27,8 @@ from agent.component.base import ComponentBase, ComponentParamBase from jinja2 import Template as Jinja2Template from common.connection_utils import timeout - +from common.misc_utils import get_uuid +from common import settings class MessageParam(ComponentParamBase): """ @@ -34,6 +38,7 @@ class MessageParam(ComponentParamBase): super().__init__() self.content = [] self.stream = True + self.output_format = None # default output format self.outputs = { "content": { "type": "str" @@ -133,6 +138,7 @@ class Message(ComponentBase): yield rand_cnt[s: ] self.set_output("content", all_content) + self._convert_content(all_content) def _is_jinjia2(self, content:str) -> bool: patt = [ @@ -164,6 +170,68 @@ class Message(ComponentBase): content = re.sub(n, v, content) self.set_output("content", content) + self._convert_content(content) def thoughts(self) -> str: return "" + + def _convert_content(self, content): + doc_id = get_uuid() + + if self._param.output_format.lower() not in {"markdown", "html", "pdf", "docx"}: + self._param.output_format = "markdown" + + try: + if self._param.output_format in {"markdown", "html"}: + if isinstance(content, str): + converted = pypandoc.convert_text( + content, + to=self._param.output_format, + format="markdown", + ) + else: + converted = pypandoc.convert_file( + content, + to=self._param.output_format, + format="markdown", + ) + + binary_content = converted.encode("utf-8") + + else: # pdf, docx + with tempfile.NamedTemporaryFile(suffix=f".{self._param.output_format}", delete=False) as tmp: + tmp_name = tmp.name + + try: + if isinstance(content, str): + pypandoc.convert_text( + content, + to=self._param.output_format, + format="markdown", + outputfile=tmp_name, + ) + else: + pypandoc.convert_file( + content, + to=self._param.output_format, + format="markdown", + outputfile=tmp_name, + ) + + with open(tmp_name, "rb") as f: + binary_content = f.read() + + finally: + if os.path.exists(tmp_name): + os.remove(tmp_name) + + settings.STORAGE_IMPL.put(self._canvas._tenant_id, doc_id, binary_content) + self.set_output("attachment", { + "doc_id":doc_id, + "format":self._param.output_format, + "file_name":f"{doc_id[:8]}.{self._param.output_format}"}) + + logging.info(f"Converted content uploaded as {doc_id} (format={self._param.output_format})") + + except Exception as e: + logging.error(f"Error converting content to {self._param.output_format}: {e}") \ No newline at end of file diff --git a/api/apps/document_app.py b/api/apps/document_app.py index 12c19f978..8cea336de 100644 --- a/api/apps/document_app.py +++ b/api/apps/document_app.py @@ -508,6 +508,7 @@ def get(doc_id): ext = ext.group(1) if ext else None if ext: if doc.type == FileType.VISUAL.value: + content_type = CONTENT_TYPE_MAP.get(ext, f"image/{ext}") else: content_type = CONTENT_TYPE_MAP.get(ext, f"application/{ext}") @@ -517,6 +518,22 @@ def get(doc_id): return server_error_response(e) +@manager.route("/download/", methods=["GET"]) # noqa: F821 +@login_required +def download_attachment(attachment_id): + try: + ext = request.args.get("ext", "markdown") + data = settings.STORAGE_IMPL.get(current_user.id, attachment_id) + # data = settings.STORAGE_IMPL.get("eb500d50bb0411f0907561d2782adda5", attachment_id) + response = flask.make_response(data) + response.headers.set("Content-Type", CONTENT_TYPE_MAP.get(ext, f"application/{ext}")) + + return response + + except Exception as e: + return server_error_response(e) + + @manager.route("/change_parser", methods=["POST"]) # noqa: F821 @login_required @validate_request("doc_id") diff --git a/pyproject.toml b/pyproject.toml index 2ec792b90..c1210dfb4 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -145,6 +145,7 @@ dependencies = [ "markdownify>=1.2.0", "captcha>=0.7.1", "pip>=25.2", + "pypandoc>=1.16", ] [dependency-groups] diff --git a/uv.lock b/uv.lock index 166b34ce4..474ca510b 100644 --- a/uv.lock +++ b/uv.lock @@ -4892,6 +4892,14 @@ wheels = [ { url = "https://pypi.tuna.tsinghua.edu.cn/packages/80/28/2659c02301b9500751f8d42f9a6632e1508aa5120de5e43042b8b30f8d5d/pyopenssl-25.1.0-py3-none-any.whl", hash = "sha256:2b11f239acc47ac2e5aca04fd7fa829800aeee22a2eb30d744572a157bd8a1ab", size = 56771, upload-time = "2025-05-17T16:28:29.197Z" }, ] +[[package]] +name = "pypandoc" +version = "1.16" +source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" } +wheels = [ + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/24/77/af1fc54740a0712988f9518e629d38edc7b8ffccd7549203f19c3d8a2db6/pypandoc-1.16-py3-none-any.whl", hash = "sha256:868f390d48388743e7a5885915cbbaa005dea36a825ecdfd571f8c523416c822", size = 19425, upload-time = "2025-11-08T15:44:38.429Z" }, +] + [[package]] name = "pyparsing" version = "3.2.3" @@ -5292,6 +5300,7 @@ dependencies = [ { name = "pyicu" }, { name = "pymysql" }, { name = "pyodbc" }, + { name = "pypandoc" }, { name = "pypdf" }, { name = "pypdf2" }, { name = "python-calamine" }, @@ -5447,6 +5456,7 @@ requires-dist = [ { name = "pyicu", specifier = ">=2.15.3,<3.0.0" }, { name = "pymysql", specifier = ">=1.1.1,<2.0.0" }, { name = "pyodbc", specifier = ">=5.2.0,<6.0.0" }, + { name = "pypandoc", specifier = ">=1.16" }, { name = "pypdf", specifier = "==6.0.0" }, { name = "pypdf2", specifier = ">=3.0.1,<4.0.0" }, { name = "python-calamine", specifier = ">=0.4.0" }, From b1a1eedf5382512ec9ec737abb17174006209026 Mon Sep 17 00:00:00 2001 From: Billy Bao Date: Fri, 14 Nov 2025 19:52:58 +0800 Subject: [PATCH 10/15] Doc: add default username & pwd (#11283) ### What problem does this PR solve? Doc: add default username & pwd ### Type of change - [x] Documentation Update --------- Co-authored-by: writinwaters <93570324+writinwaters@users.noreply.github.com> --- docs/guides/accessing_admin_ui.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/docs/guides/accessing_admin_ui.md b/docs/guides/accessing_admin_ui.md index 52ff4d6c7..23521244b 100644 --- a/docs/guides/accessing_admin_ui.md +++ b/docs/guides/accessing_admin_ui.md @@ -12,6 +12,10 @@ The RAGFlow Admin UI is a web-based interface that provides comprehensive system To access the RAGFlow admin UI, append `/admin` to the web UI's address, e.g. `http://[RAGFLOW_WEB_UI_ADDR]/admin`, replace `[RAGFLOW_WEB_UI_ADDR]` with real RAGFlow web UI address. +### Default Credentials +| Username | Password | +|----------|----------| +| admin@ragflow.io | admin | ## Admin UI Overview From e841b09d631f2426067a2e45f25721e8d9ca9285 Mon Sep 17 00:00:00 2001 From: Jin Hai Date: Fri, 14 Nov 2025 20:39:54 +0800 Subject: [PATCH 11/15] Remove unused code and fix performance issue (#11284) ### What problem does this PR solve? 1. remove redundant code 2. fix miner performance issue ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue) - [x] Refactoring Signed-off-by: Jin Hai --- agent/canvas.py | 2 -- agent/component/base.py | 11 +++++++---- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/agent/canvas.py b/agent/canvas.py index f262cd597..e18cb8d26 100644 --- a/agent/canvas.py +++ b/agent/canvas.py @@ -298,8 +298,6 @@ class Canvas(Graph): for kk, vv in kwargs["webhook_payload"].items(): self.components[k]["obj"].set_output(kk, vv) - self.components[k]["obj"].reset(True) - for k in kwargs.keys(): if k in ["query", "user_id", "files"] and kwargs[k]: if k == "files": diff --git a/agent/component/base.py b/agent/component/base.py index 31ad46820..0864ccb9e 100644 --- a/agent/component/base.py +++ b/agent/component/base.py @@ -463,12 +463,15 @@ class ComponentBase(ABC): return self._param.outputs.get("_ERROR", {}).get("value") def reset(self, only_output=False): - for k in self._param.outputs.keys(): - self._param.outputs[k]["value"] = None + outputs: dict = self._param.outputs # for better performance + for k in outputs.keys(): + outputs[k]["value"] = None if only_output: return - for k in self._param.inputs.keys(): - self._param.inputs[k]["value"] = None + + inputs: dict = self._param.inputs # for better performance + for k in inputs.keys(): + inputs[k]["value"] = None self._param.debug_inputs = {} def get_input(self, key: str=None) -> Union[Any, dict[str, Any]]: From 61cf430dbb8507ee7d53cc5fe35a23ee8e271e55 Mon Sep 17 00:00:00 2001 From: Jin Hai Date: Sun, 16 Nov 2025 19:29:20 +0800 Subject: [PATCH 12/15] Minor tweats (#11271) ### What problem does this PR solve? As title. ### Type of change - [x] Refactoring --------- Signed-off-by: Jin Hai --- api/db/db_models.py | 5 +++-- api/db/services/connector_service.py | 6 ++--- api/db/services/dialog_service.py | 8 ++++--- api/db/services/document_service.py | 28 ++++++++++++------------ api/db/services/knowledgebase_service.py | 1 + api/utils/email_templates.py | 16 ++++++++++++++ api/utils/json_encode.py | 16 ++++++++++++++ 7 files changed, 58 insertions(+), 22 deletions(-) diff --git a/api/db/db_models.py b/api/db/db_models.py index 68bf37ce4..2b4c4a0ef 100644 --- a/api/db/db_models.py +++ b/api/db/db_models.py @@ -305,6 +305,7 @@ class RetryingPooledMySQLDatabase(PooledMySQLDatabase): time.sleep(self.retry_delay * (2 ** attempt)) else: raise + return None class RetryingPooledPostgresqlDatabase(PooledPostgresqlDatabase): @@ -772,7 +773,7 @@ class Document(DataBaseModel): thumbnail = TextField(null=True, help_text="thumbnail base64 string") kb_id = CharField(max_length=256, null=False, index=True) parser_id = CharField(max_length=32, null=False, help_text="default parser ID", index=True) - pipeline_id = CharField(max_length=32, null=True, help_text="pipleline ID", index=True) + pipeline_id = CharField(max_length=32, null=True, help_text="pipeline ID", index=True) parser_config = JSONField(null=False, default={"pages": [[1, 1000000]]}) source_type = CharField(max_length=128, null=False, default="local", help_text="where dose this document come from", index=True) type = CharField(max_length=32, null=False, help_text="file extension", index=True) @@ -876,7 +877,7 @@ class Dialog(DataBaseModel): class Conversation(DataBaseModel): id = CharField(max_length=32, primary_key=True) dialog_id = CharField(max_length=32, null=False, index=True) - name = CharField(max_length=255, null=True, help_text="converastion name", index=True) + name = CharField(max_length=255, null=True, help_text="conversation name", index=True) message = JSONField(null=True) reference = JSONField(null=True, default=[]) user_id = CharField(max_length=255, null=True, help_text="user_id", index=True) diff --git a/api/db/services/connector_service.py b/api/db/services/connector_service.py index 3e65c87da..2f29c3324 100644 --- a/api/db/services/connector_service.py +++ b/api/db/services/connector_service.py @@ -70,7 +70,7 @@ class ConnectorService(CommonService): def rebuild(cls, kb_id:str, connector_id: str, tenant_id:str): e, conn = cls.get_by_id(connector_id) if not e: - return + return None SyncLogsService.filter_delete([SyncLogs.connector_id==connector_id, SyncLogs.kb_id==kb_id]) docs = DocumentService.query(source_type=f"{conn.source}/{conn.id}", kb_id=kb_id) err = FileService.delete_docs([d.id for d in docs], tenant_id) @@ -125,11 +125,11 @@ class SyncLogsService(CommonService): ) query = query.distinct().order_by(cls.model.update_time.desc()) - totbal = query.count() + total = query.count() if page_number: query = query.paginate(page_number, items_per_page) - return list(query.dicts()), totbal + return list(query.dicts()), total @classmethod def start(cls, id, connector_id): diff --git a/api/db/services/dialog_service.py b/api/db/services/dialog_service.py index f54ebf709..d2f3b9bc1 100644 --- a/api/db/services/dialog_service.py +++ b/api/db/services/dialog_service.py @@ -342,7 +342,7 @@ def chat(dialog, messages, stream=True, **kwargs): if not dialog.kb_ids and not dialog.prompt_config.get("tavily_api_key"): for ans in chat_solo(dialog, messages, stream): yield ans - return + return None chat_start_ts = timer() @@ -386,7 +386,7 @@ def chat(dialog, messages, stream=True, **kwargs): ans = use_sql(questions[-1], field_map, dialog.tenant_id, chat_mdl, prompt_config.get("quote", True), dialog.kb_ids) if ans: yield ans - return + return None for p in prompt_config["parameters"]: if p["key"] == "knowledge": @@ -617,6 +617,8 @@ def chat(dialog, messages, stream=True, **kwargs): res["audio_binary"] = tts(tts_mdl, answer) yield res + return None + def use_sql(question, field_map, tenant_id, chat_mdl, quota=True, kb_ids=None): sys_prompt = """ @@ -745,7 +747,7 @@ Please write the SQL, only SQL, without any other explanations or text. def tts(tts_mdl, text): if not tts_mdl or not text: - return + return None bin = b"" for chunk in tts_mdl.tts(text): bin += chunk diff --git a/api/db/services/document_service.py b/api/db/services/document_service.py index 530133164..0abf1b1f3 100644 --- a/api/db/services/document_service.py +++ b/api/db/services/document_service.py @@ -113,7 +113,7 @@ class DocumentService(CommonService): def check_doc_health(cls, tenant_id: str, filename): import os MAX_FILE_NUM_PER_USER = int(os.environ.get("MAX_FILE_NUM_PER_USER", 0)) - if MAX_FILE_NUM_PER_USER > 0 and DocumentService.get_doc_count(tenant_id) >= MAX_FILE_NUM_PER_USER: + if 0 < MAX_FILE_NUM_PER_USER <= DocumentService.get_doc_count(tenant_id): raise RuntimeError("Exceed the maximum file number of a free user!") if len(filename.encode("utf-8")) > FILE_NAME_LEN_LIMIT: raise RuntimeError("Exceed the maximum length of file name!") @@ -464,7 +464,7 @@ class DocumentService(CommonService): cls.model.id == doc_id, Knowledgebase.status == StatusEnum.VALID.value) docs = docs.dicts() if not docs: - return + return None return docs[0]["tenant_id"] @classmethod @@ -473,7 +473,7 @@ class DocumentService(CommonService): docs = cls.model.select(cls.model.kb_id).where(cls.model.id == doc_id) docs = docs.dicts() if not docs: - return + return None return docs[0]["kb_id"] @classmethod @@ -486,7 +486,7 @@ class DocumentService(CommonService): cls.model.name == name, Knowledgebase.status == StatusEnum.VALID.value) docs = docs.dicts() if not docs: - return + return None return docs[0]["tenant_id"] @classmethod @@ -533,7 +533,7 @@ class DocumentService(CommonService): cls.model.id == doc_id, Knowledgebase.status == StatusEnum.VALID.value) docs = docs.dicts() if not docs: - return + return None return docs[0]["embd_id"] @classmethod @@ -569,7 +569,7 @@ class DocumentService(CommonService): .where(cls.model.name == doc_name) doc_id = doc_id.dicts() if not doc_id: - return + return None return doc_id[0]["id"] @classmethod @@ -715,7 +715,7 @@ class DocumentService(CommonService): prg = 1 status = TaskStatus.DONE.value - # only for special task and parsed docs and unfinised + # only for special task and parsed docs and unfinished freeze_progress = special_task_running and doc_progress >= 1 and not finished msg = "\n".join(sorted(msg)) info = { @@ -974,13 +974,13 @@ def doc_upload_and_parse(conversation_id, file_objs, user_id): def embedding(doc_id, cnts, batch_size=16): nonlocal embd_mdl, chunk_counts, token_counts - vects = [] + vectors = [] for i in range(0, len(cnts), batch_size): vts, c = embd_mdl.encode(cnts[i: i + batch_size]) - vects.extend(vts.tolist()) + vectors.extend(vts.tolist()) chunk_counts[doc_id] += len(cnts[i:i + batch_size]) token_counts[doc_id] += c - return vects + return vectors idxnm = search.index_name(kb.tenant_id) try_create_idx = True @@ -1011,15 +1011,15 @@ def doc_upload_and_parse(conversation_id, file_objs, user_id): except Exception: logging.exception("Mind map generation error") - vects = embedding(doc_id, [c["content_with_weight"] for c in cks]) - assert len(cks) == len(vects) + vectors = embedding(doc_id, [c["content_with_weight"] for c in cks]) + assert len(cks) == len(vectors) for i, d in enumerate(cks): - v = vects[i] + v = vectors[i] d["q_%d_vec" % len(v)] = v for b in range(0, len(cks), es_bulk_size): if try_create_idx: if not settings.docStoreConn.indexExist(idxnm, kb_id): - settings.docStoreConn.createIdx(idxnm, kb_id, len(vects[0])) + settings.docStoreConn.createIdx(idxnm, kb_id, len(vectors[0])) try_create_idx = False settings.docStoreConn.insert(cks[b:b + es_bulk_size], idxnm, kb_id) diff --git a/api/db/services/knowledgebase_service.py b/api/db/services/knowledgebase_service.py index 03179da49..ca30ca074 100644 --- a/api/db/services/knowledgebase_service.py +++ b/api/db/services/knowledgebase_service.py @@ -424,6 +424,7 @@ class KnowledgebaseService(CommonService): # Default parser_config (align with kb_app.create) — do not accept external overrides payload["parser_config"] = get_parser_config(parser_id, kwargs.get("parser_config")) + return payload diff --git a/api/utils/email_templates.py b/api/utils/email_templates.py index 10473908a..34201ee38 100644 --- a/api/utils/email_templates.py +++ b/api/utils/email_templates.py @@ -1,3 +1,19 @@ +# +# Copyright 2025 The InfiniFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + """ Reusable HTML email templates and registry. """ diff --git a/api/utils/json_encode.py b/api/utils/json_encode.py index b21addd4f..fa5ea973a 100644 --- a/api/utils/json_encode.py +++ b/api/utils/json_encode.py @@ -1,3 +1,19 @@ +# +# Copyright 2025 The InfiniFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + import datetime import json from enum import Enum, IntEnum From 13e212c8561dda5a8f2cc31df64eea5354abbc4f Mon Sep 17 00:00:00 2001 From: Yongteng Lei Date: Mon, 17 Nov 2025 09:38:04 +0800 Subject: [PATCH 13/15] Feat: add Jira connector (#11285) ### What problem does this PR solve? Add Jira connector. image --- image --- image --- image ### Type of change - [x] New Feature (non-breaking change which adds functionality) --- common/data_source/__init__.py | 2 +- common/data_source/config.py | 17 + common/data_source/confluence_connector.py | 1 + common/data_source/jira/__init__.py | 0 common/data_source/jira/connector.py | 973 ++++++++++++++++++ common/data_source/jira/utils.py | 149 +++ common/data_source/jira_connector.py | 112 -- common/data_source/utils.py | 40 +- common/log_utils.py | 2 +- rag/svr/sync_data_source.py | 208 ++-- web/src/assets/svg/data-source/jira.svg | 16 + web/src/locales/en.ts | 27 + web/src/locales/zh.ts | 17 + .../user-setting/data-source/contant.tsx | 130 ++- .../pages/user-setting/data-source/index.tsx | 6 + 15 files changed, 1521 insertions(+), 179 deletions(-) create mode 100644 common/data_source/jira/__init__.py create mode 100644 common/data_source/jira/connector.py create mode 100644 common/data_source/jira/utils.py delete mode 100644 common/data_source/jira_connector.py create mode 100644 web/src/assets/svg/data-source/jira.svg diff --git a/common/data_source/__init__.py b/common/data_source/__init__.py index 0802a5285..611c3c61a 100644 --- a/common/data_source/__init__.py +++ b/common/data_source/__init__.py @@ -11,7 +11,7 @@ from .confluence_connector import ConfluenceConnector from .discord_connector import DiscordConnector from .dropbox_connector import DropboxConnector from .google_drive.connector import GoogleDriveConnector -from .jira_connector import JiraConnector +from .jira.connector import JiraConnector from .sharepoint_connector import SharePointConnector from .teams_connector import TeamsConnector from .config import BlobType, DocumentSource diff --git a/common/data_source/config.py b/common/data_source/config.py index 02684dbac..e4040f85e 100644 --- a/common/data_source/config.py +++ b/common/data_source/config.py @@ -13,6 +13,7 @@ def get_current_tz_offset() -> int: return round(time_diff.total_seconds() / 3600) +ONE_MINUTE = 60 ONE_HOUR = 3600 ONE_DAY = ONE_HOUR * 24 @@ -42,6 +43,7 @@ class DocumentSource(str, Enum): OCI_STORAGE = "oci_storage" SLACK = "slack" CONFLUENCE = "confluence" + JIRA = "jira" GOOGLE_DRIVE = "google_drive" GMAIL = "gmail" DISCORD = "discord" @@ -178,6 +180,21 @@ GOOGLE_DRIVE_CONNECTOR_SIZE_THRESHOLD = int( os.environ.get("GOOGLE_DRIVE_CONNECTOR_SIZE_THRESHOLD", 10 * 1024 * 1024) ) +JIRA_CONNECTOR_LABELS_TO_SKIP = [ + ignored_tag + for ignored_tag in os.environ.get("JIRA_CONNECTOR_LABELS_TO_SKIP", "").split(",") + if ignored_tag +] +JIRA_CONNECTOR_MAX_TICKET_SIZE = int( + os.environ.get("JIRA_CONNECTOR_MAX_TICKET_SIZE", 100 * 1024) +) +JIRA_SYNC_TIME_BUFFER_SECONDS = int( + os.environ.get("JIRA_SYNC_TIME_BUFFER_SECONDS", ONE_MINUTE) +) +JIRA_TIMEZONE_OFFSET = float( + os.environ.get("JIRA_TIMEZONE_OFFSET", get_current_tz_offset()) +) + OAUTH_SLACK_CLIENT_ID = os.environ.get("OAUTH_SLACK_CLIENT_ID", "") OAUTH_SLACK_CLIENT_SECRET = os.environ.get("OAUTH_SLACK_CLIENT_SECRET", "") OAUTH_CONFLUENCE_CLOUD_CLIENT_ID = os.environ.get( diff --git a/common/data_source/confluence_connector.py b/common/data_source/confluence_connector.py index aed16ad2b..821f79862 100644 --- a/common/data_source/confluence_connector.py +++ b/common/data_source/confluence_connector.py @@ -1788,6 +1788,7 @@ class ConfluenceConnector( cql_url = self.confluence_client.build_cql_url( page_query, expand=",".join(_PAGE_EXPANSION_FIELDS) ) + logging.info(f"[Confluence Connector] Building CQL URL {cql_url}") return update_param_in_path(cql_url, "limit", str(limit)) @override diff --git a/common/data_source/jira/__init__.py b/common/data_source/jira/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/common/data_source/jira/connector.py b/common/data_source/jira/connector.py new file mode 100644 index 000000000..4635d72f3 --- /dev/null +++ b/common/data_source/jira/connector.py @@ -0,0 +1,973 @@ +"""Checkpointed Jira connector that emits markdown blobs for each issue.""" + +from __future__ import annotations + +import argparse +import copy +import logging +import os +import re +from collections.abc import Callable, Generator, Iterable, Iterator, Sequence +from datetime import datetime, timedelta, timezone +from typing import Any +from zoneinfo import ZoneInfo, ZoneInfoNotFoundError + +from jira import JIRA +from jira.resources import Issue +from pydantic import Field + +from common.data_source.config import ( + INDEX_BATCH_SIZE, + JIRA_CONNECTOR_LABELS_TO_SKIP, + JIRA_CONNECTOR_MAX_TICKET_SIZE, + JIRA_TIMEZONE_OFFSET, + ONE_HOUR, + DocumentSource, +) +from common.data_source.exceptions import ( + ConnectorMissingCredentialError, + ConnectorValidationError, + InsufficientPermissionsError, + UnexpectedValidationError, +) +from common.data_source.interfaces import ( + CheckpointedConnectorWithPermSync, + CheckpointOutputWrapper, + SecondsSinceUnixEpoch, + SlimConnectorWithPermSync, +) +from common.data_source.jira.utils import ( + JIRA_CLOUD_API_VERSION, + JIRA_SERVER_API_VERSION, + build_issue_url, + extract_body_text, + extract_named_value, + extract_user, + format_attachments, + format_comments, + parse_jira_datetime, + should_skip_issue, +) +from common.data_source.models import ( + ConnectorCheckpoint, + ConnectorFailure, + Document, + DocumentFailure, + SlimDocument, +) +from common.data_source.utils import is_atlassian_cloud_url, is_atlassian_date_error, scoped_url + +logger = logging.getLogger(__name__) + +_DEFAULT_FIELDS = "summary,description,updated,created,status,priority,assignee,reporter,labels,issuetype,project,comment,attachment" +_SLIM_FIELDS = "key,project" +_MAX_RESULTS_FETCH_IDS = 5000 +_JIRA_SLIM_PAGE_SIZE = 500 +_JIRA_FULL_PAGE_SIZE = 50 +_DEFAULT_ATTACHMENT_SIZE_LIMIT = 10 * 1024 * 1024 # 10MB + + +class JiraCheckpoint(ConnectorCheckpoint): + """Checkpoint that tracks which slice of the current JQL result set was emitted.""" + + start_at: int = 0 + cursor: str | None = None + ids_done: bool = False + all_issue_ids: list[list[str]] = Field(default_factory=list) + + +_TZ_OFFSET_PATTERN = re.compile(r"([+-])(\d{2})(:?)(\d{2})$") + + +class JiraConnector(CheckpointedConnectorWithPermSync, SlimConnectorWithPermSync): + """Retrieve Jira issues and emit them as markdown documents.""" + + def __init__( + self, + jira_base_url: str, + project_key: str | None = None, + jql_query: str | None = None, + batch_size: int = INDEX_BATCH_SIZE, + include_comments: bool = True, + include_attachments: bool = False, + labels_to_skip: Sequence[str] | None = None, + comment_email_blacklist: Sequence[str] | None = None, + scoped_token: bool = False, + attachment_size_limit: int | None = None, + timezone_offset: float | None = None, + ) -> None: + if not jira_base_url: + raise ConnectorValidationError("Jira base URL must be provided.") + + self.jira_base_url = jira_base_url.rstrip("/") + self.project_key = project_key + self.jql_query = jql_query + self.batch_size = batch_size + self.include_comments = include_comments + self.include_attachments = include_attachments + configured_labels = labels_to_skip or JIRA_CONNECTOR_LABELS_TO_SKIP + self.labels_to_skip = {label.lower() for label in configured_labels} + self.comment_email_blacklist = {email.lower() for email in comment_email_blacklist or []} + self.scoped_token = scoped_token + self.jira_client: JIRA | None = None + + self.max_ticket_size = JIRA_CONNECTOR_MAX_TICKET_SIZE + self.attachment_size_limit = attachment_size_limit if attachment_size_limit and attachment_size_limit > 0 else _DEFAULT_ATTACHMENT_SIZE_LIMIT + self._fields_param = _DEFAULT_FIELDS + self._slim_fields = _SLIM_FIELDS + + tz_offset_value = float(timezone_offset) if timezone_offset is not None else float(JIRA_TIMEZONE_OFFSET) + self.timezone_offset = tz_offset_value + self.timezone = timezone(offset=timedelta(hours=tz_offset_value)) + self._timezone_overridden = timezone_offset is not None + + # ------------------------------------------------------------------------- + # Connector lifecycle helpers + # ------------------------------------------------------------------------- + + def load_credentials(self, credentials: dict[str, Any]) -> dict[str, Any] | None: + """Instantiate the Jira client using either an API token or username/password.""" + jira_url_for_client = self.jira_base_url + if self.scoped_token: + if is_atlassian_cloud_url(self.jira_base_url): + try: + jira_url_for_client = scoped_url(self.jira_base_url, "jira") + except ValueError as exc: + raise ConnectorValidationError(str(exc)) from exc + else: + logger.warning(f"[Jira] Scoped token requested but Jira base URL {self.jira_base_url} does not appear to be an Atlassian Cloud domain; scoped token ignored.") + + user_email = credentials.get("jira_user_email") or credentials.get("username") + api_token = credentials.get("jira_api_token") or credentials.get("token") or credentials.get("api_token") + password = credentials.get("jira_password") or credentials.get("password") + rest_api_version = credentials.get("rest_api_version") + + if not rest_api_version: + rest_api_version = JIRA_CLOUD_API_VERSION if api_token else JIRA_SERVER_API_VERSION + options: dict[str, Any] = {"rest_api_version": rest_api_version} + + try: + if user_email and api_token: + self.jira_client = JIRA( + server=jira_url_for_client, + basic_auth=(user_email, api_token), + options=options, + ) + elif api_token: + self.jira_client = JIRA( + server=jira_url_for_client, + token_auth=api_token, + options=options, + ) + elif user_email and password: + self.jira_client = JIRA( + server=jira_url_for_client, + basic_auth=(user_email, password), + options=options, + ) + else: + raise ConnectorMissingCredentialError("Jira credentials must include either an API token or username/password.") + except Exception as exc: # pragma: no cover - jira lib raises many types + raise ConnectorMissingCredentialError(f"Jira: {exc}") from exc + self._sync_timezone_from_server() + return None + + def validate_connector_settings(self) -> None: + """Validate connectivity by fetching basic Jira info.""" + if not self.jira_client: + raise ConnectorMissingCredentialError("Jira") + + try: + if self.jql_query: + dummy_checkpoint = self.build_dummy_checkpoint() + checkpoint_callback = self._make_checkpoint_callback(dummy_checkpoint) + iterator = self._perform_jql_search( + jql=self.jql_query, + start=0, + max_results=1, + fields="key", + all_issue_ids=dummy_checkpoint.all_issue_ids, + checkpoint_callback=checkpoint_callback, + next_page_token=dummy_checkpoint.cursor, + ids_done=dummy_checkpoint.ids_done, + ) + next(iter(iterator), None) + elif self.project_key: + self.jira_client.project(self.project_key) + else: + self.jira_client.projects() + except Exception as exc: # pragma: no cover - dependent on Jira responses + self._handle_validation_error(exc) + + # ------------------------------------------------------------------------- + # Checkpointed connector implementation + # ------------------------------------------------------------------------- + + def load_from_checkpoint( + self, + start: SecondsSinceUnixEpoch, + end: SecondsSinceUnixEpoch, + checkpoint: JiraCheckpoint, + ) -> Generator[Document | ConnectorFailure, None, JiraCheckpoint]: + """Load Jira issues, emitting a Document per issue.""" + try: + return (yield from self._load_with_retry(start, end, checkpoint)) + except Exception as exc: + logger.exception(f"[Jira] Jira query ultimately failed: {exc}") + yield ConnectorFailure( + failure_message=f"Failed to query Jira: {exc}", + exception=exc, + ) + return JiraCheckpoint(has_more=False, start_at=checkpoint.start_at) + + def load_from_checkpoint_with_perm_sync( + self, + start: SecondsSinceUnixEpoch, + end: SecondsSinceUnixEpoch, + checkpoint: JiraCheckpoint, + ) -> Generator[Document | ConnectorFailure, None, JiraCheckpoint]: + """Permissions are not synced separately, so reuse the standard loader.""" + return (yield from self.load_from_checkpoint(start=start, end=end, checkpoint=checkpoint)) + + def _load_with_retry( + self, + start: SecondsSinceUnixEpoch, + end: SecondsSinceUnixEpoch, + checkpoint: JiraCheckpoint, + ) -> Generator[Document | ConnectorFailure, None, JiraCheckpoint]: + if not self.jira_client: + raise ConnectorMissingCredentialError("Jira") + + attempt_start = start + retried_with_buffer = False + attempt = 0 + + while True: + attempt += 1 + jql = self._build_jql(attempt_start, end) + logger.info(f"[Jira] Executing Jira JQL attempt {attempt} (start={attempt_start}, end={end}, buffered_retry={retried_with_buffer}): {jql}") + try: + return (yield from self._load_from_checkpoint_internal(jql, checkpoint, start_filter=start)) + except Exception as exc: + if attempt_start is not None and not retried_with_buffer and is_atlassian_date_error(exc): + attempt_start = attempt_start - ONE_HOUR + retried_with_buffer = True + logger.info(f"[Jira] Atlassian date error detected; retrying with start={attempt_start}.") + continue + raise + + def _handle_validation_error(self, exc: Exception) -> None: + status_code = getattr(exc, "status_code", None) + if status_code == 401: + raise InsufficientPermissionsError("Jira credential appears to be invalid or expired (HTTP 401).") from exc + if status_code == 403: + raise InsufficientPermissionsError("Jira token does not have permission to access the requested resources (HTTP 403).") from exc + if status_code == 404: + raise ConnectorValidationError("Jira resource not found (HTTP 404).") from exc + if status_code == 429: + raise ConnectorValidationError("Jira rate limit exceeded during validation (HTTP 429).") from exc + + message = getattr(exc, "text", str(exc)) + if not message: + raise UnexpectedValidationError("Unexpected Jira validation error.") from exc + + raise ConnectorValidationError(f"Jira validation failed: {message}") from exc + + def _load_from_checkpoint_internal( + self, + jql: str, + checkpoint: JiraCheckpoint, + start_filter: SecondsSinceUnixEpoch | None = None, + ) -> Generator[Document | ConnectorFailure, None, JiraCheckpoint]: + assert self.jira_client, "load_credentials must be called before loading issues." + + page_size = self._full_page_size() + new_checkpoint = copy.deepcopy(checkpoint) + starting_offset = new_checkpoint.start_at or 0 + current_offset = starting_offset + checkpoint_callback = self._make_checkpoint_callback(new_checkpoint) + + issue_iter = self._perform_jql_search( + jql=jql, + start=current_offset, + max_results=page_size, + fields=self._fields_param, + all_issue_ids=new_checkpoint.all_issue_ids, + checkpoint_callback=checkpoint_callback, + next_page_token=new_checkpoint.cursor, + ids_done=new_checkpoint.ids_done, + ) + + start_cutoff = float(start_filter) if start_filter is not None else None + + for issue in issue_iter: + current_offset += 1 + issue_key = getattr(issue, "key", "unknown") + if should_skip_issue(issue, self.labels_to_skip): + continue + + issue_updated = parse_jira_datetime(issue.raw.get("fields", {}).get("updated")) + if start_cutoff is not None and issue_updated is not None and issue_updated.timestamp() <= start_cutoff: + # Jira JQL only supports minute precision, so we discard already-processed + # issues here based on the original second-level cutoff. + continue + + try: + document = self._issue_to_document(issue) + except Exception as exc: # pragma: no cover - defensive + logger.exception(f"[Jira] Failed to convert Jira issue {issue_key}: {exc}") + yield ConnectorFailure( + failure_message=f"Failed to convert Jira issue {issue_key}: {exc}", + failed_document=DocumentFailure( + document_id=issue_key, + document_link=build_issue_url(self.jira_base_url, issue_key), + ), + exception=exc, + ) + continue + + if document is not None: + yield document + if self.include_attachments: + for attachment_document in self._attachment_documents(issue): + if attachment_document is not None: + yield attachment_document + + self._update_checkpoint_for_next_run( + checkpoint=new_checkpoint, + current_offset=current_offset, + starting_offset=starting_offset, + page_size=page_size, + ) + new_checkpoint.start_at = current_offset + return new_checkpoint + + def build_dummy_checkpoint(self) -> JiraCheckpoint: + """Create an empty checkpoint used to kick off ingestion.""" + return JiraCheckpoint(has_more=True, start_at=0) + + def validate_checkpoint_json(self, checkpoint_json: str) -> JiraCheckpoint: + """Validate a serialized checkpoint.""" + return JiraCheckpoint.model_validate_json(checkpoint_json) + + # ------------------------------------------------------------------------- + # Slim connector implementation + # ------------------------------------------------------------------------- + + def retrieve_all_slim_docs_perm_sync( + self, + start: SecondsSinceUnixEpoch | None = None, + end: SecondsSinceUnixEpoch | None = None, + callback: Any = None, # noqa: ARG002 - maintained for interface compatibility + ) -> Generator[list[SlimDocument], None, None]: + """Return lightweight references to Jira issues (used for permission syncing).""" + if not self.jira_client: + raise ConnectorMissingCredentialError("Jira") + + start_ts = start if start is not None else 0 + end_ts = end if end is not None else datetime.now(timezone.utc).timestamp() + jql = self._build_jql(start_ts, end_ts) + + checkpoint = self.build_dummy_checkpoint() + checkpoint_callback = self._make_checkpoint_callback(checkpoint) + prev_offset = 0 + current_offset = 0 + slim_batch: list[SlimDocument] = [] + + while checkpoint.has_more: + for issue in self._perform_jql_search( + jql=jql, + start=current_offset, + max_results=_JIRA_SLIM_PAGE_SIZE, + fields=self._slim_fields, + all_issue_ids=checkpoint.all_issue_ids, + checkpoint_callback=checkpoint_callback, + next_page_token=checkpoint.cursor, + ids_done=checkpoint.ids_done, + ): + current_offset += 1 + if should_skip_issue(issue, self.labels_to_skip): + continue + + doc_id = build_issue_url(self.jira_base_url, issue.key) + slim_batch.append(SlimDocument(id=doc_id)) + + if len(slim_batch) >= _JIRA_SLIM_PAGE_SIZE: + yield slim_batch + slim_batch = [] + + self._update_checkpoint_for_next_run( + checkpoint=checkpoint, + current_offset=current_offset, + starting_offset=prev_offset, + page_size=_JIRA_SLIM_PAGE_SIZE, + ) + prev_offset = current_offset + + if slim_batch: + yield slim_batch + + # ------------------------------------------------------------------------- + # Internal helpers + # ------------------------------------------------------------------------- + + def _build_jql(self, start: SecondsSinceUnixEpoch, end: SecondsSinceUnixEpoch) -> str: + clauses: list[str] = [] + if self.jql_query: + clauses.append(f"({self.jql_query})") + elif self.project_key: + clauses.append(f'project = "{self.project_key}"') + else: + raise ConnectorValidationError("Either project_key or jql_query must be provided for Jira connector.") + + if self.labels_to_skip: + labels = ", ".join(f'"{label}"' for label in self.labels_to_skip) + clauses.append(f"labels NOT IN ({labels})") + + if start is not None: + clauses.append(f'updated >= "{self._format_jql_time(start)}"') + if end is not None: + clauses.append(f'updated <= "{self._format_jql_time(end)}"') + + if not clauses: + raise ConnectorValidationError("Unable to build Jira JQL query.") + + jql = " AND ".join(clauses) + if "order by" not in jql.lower(): + jql = f"{jql} ORDER BY updated ASC" + return jql + + def _format_jql_time(self, timestamp: SecondsSinceUnixEpoch) -> str: + dt_utc = datetime.fromtimestamp(float(timestamp), tz=timezone.utc) + dt_local = dt_utc.astimezone(self.timezone) + # Jira only accepts minute-precision timestamps in JQL, so we format accordingly + # and rely on a post-query second-level filter to avoid duplicates. + return dt_local.strftime("%Y-%m-%d %H:%M") + + def _issue_to_document(self, issue: Issue) -> Document | None: + fields = issue.raw.get("fields", {}) + summary = fields.get("summary") or "" + description_text = extract_body_text(fields.get("description")) + comments_text = ( + format_comments( + fields.get("comment"), + blacklist=self.comment_email_blacklist, + ) + if self.include_comments + else "" + ) + attachments_text = format_attachments(fields.get("attachment")) + + reporter_name, reporter_email = extract_user(fields.get("reporter")) + assignee_name, assignee_email = extract_user(fields.get("assignee")) + status = extract_named_value(fields.get("status")) + priority = extract_named_value(fields.get("priority")) + issue_type = extract_named_value(fields.get("issuetype")) + project = fields.get("project") or {} + + issue_url = build_issue_url(self.jira_base_url, issue.key) + + metadata_lines = [ + f"key: {issue.key}", + f"url: {issue_url}", + f"summary: {summary}", + f"status: {status or 'Unknown'}", + f"priority: {priority or 'Unspecified'}", + f"issue_type: {issue_type or 'Unknown'}", + f"project: {project.get('name') or ''}", + f"project_key: {project.get('key') or self.project_key or ''}", + ] + + if reporter_name: + metadata_lines.append(f"reporter: {reporter_name}") + if reporter_email: + metadata_lines.append(f"reporter_email: {reporter_email}") + if assignee_name: + metadata_lines.append(f"assignee: {assignee_name}") + if assignee_email: + metadata_lines.append(f"assignee_email: {assignee_email}") + if fields.get("labels"): + metadata_lines.append(f"labels: {', '.join(fields.get('labels'))}") + + created_dt = parse_jira_datetime(fields.get("created")) + updated_dt = parse_jira_datetime(fields.get("updated")) or created_dt or datetime.now(timezone.utc) + metadata_lines.append(f"created: {created_dt.isoformat() if created_dt else ''}") + metadata_lines.append(f"updated: {updated_dt.isoformat() if updated_dt else ''}") + + sections: list[str] = [ + "---", + "\n".join(filter(None, metadata_lines)), + "---", + "", + "## Description", + description_text or "No description provided.", + ] + + if comments_text: + sections.extend(["", "## Comments", comments_text]) + if attachments_text: + sections.extend(["", "## Attachments", attachments_text]) + + blob_text = "\n".join(sections).strip() + "\n" + blob = blob_text.encode("utf-8") + + if len(blob) > self.max_ticket_size: + logger.info(f"[Jira] Skipping {issue.key} because it exceeds the maximum size of {self.max_ticket_size} bytes.") + return None + + semantic_identifier = f"{issue.key}: {summary}" if summary else issue.key + + return Document( + id=issue_url, + source=DocumentSource.JIRA, + semantic_identifier=semantic_identifier, + extension=".md", + blob=blob, + doc_updated_at=updated_dt, + size_bytes=len(blob), + ) + + def _attachment_documents(self, issue: Issue) -> Iterable[Document]: + attachments = issue.raw.get("fields", {}).get("attachment") or [] + for attachment in attachments: + try: + document = self._attachment_to_document(issue, attachment) + if document is not None: + yield document + except Exception as exc: # pragma: no cover - defensive + failed_id = attachment.get("id") or attachment.get("filename") + issue_key = getattr(issue, "key", "unknown") + logger.warning(f"[Jira] Failed to process attachment {failed_id} for issue {issue_key}: {exc}") + + def _attachment_to_document(self, issue: Issue, attachment: dict[str, Any]) -> Document | None: + if not self.include_attachments: + return None + + filename = attachment.get("filename") + content_url = attachment.get("content") + if not filename or not content_url: + return None + + try: + attachment_size = int(attachment.get("size", 0)) + except (TypeError, ValueError): + attachment_size = 0 + if attachment_size and attachment_size > self.attachment_size_limit: + logger.info(f"[Jira] Skipping attachment {filename} on {issue.key} because reported size exceeds limit ({self.attachment_size_limit} bytes).") + return None + + blob = self._download_attachment(content_url) + if blob is None: + return None + + if len(blob) > self.attachment_size_limit: + logger.info(f"[Jira] Skipping attachment {filename} on {issue.key} because it exceeds the size limit ({self.attachment_size_limit} bytes).") + return None + + attachment_time = parse_jira_datetime(attachment.get("created")) or parse_jira_datetime(attachment.get("updated")) + updated_dt = attachment_time or parse_jira_datetime(issue.raw.get("fields", {}).get("updated")) or datetime.now(timezone.utc) + + extension = os.path.splitext(filename)[1] or "" + document_id = f"{issue.key}::attachment::{attachment.get('id') or filename}" + semantic_identifier = f"{issue.key} attachment: {filename}" + + return Document( + id=document_id, + source=DocumentSource.JIRA, + semantic_identifier=semantic_identifier, + extension=extension, + blob=blob, + doc_updated_at=updated_dt, + size_bytes=len(blob), + ) + + def _download_attachment(self, url: str) -> bytes | None: + if not self.jira_client: + raise ConnectorMissingCredentialError("Jira") + response = self.jira_client._session.get(url) + response.raise_for_status() + return response.content + + def _sync_timezone_from_server(self) -> None: + if self._timezone_overridden or not self.jira_client: + return + try: + server_info = self.jira_client.server_info() + except Exception as exc: # pragma: no cover - defensive + logger.info(f"[Jira] Unable to determine timezone from server info; continuing with offset {self.timezone_offset}. Error: {exc}") + return + + detected_offset = self._extract_timezone_offset(server_info) + if detected_offset is None or detected_offset == self.timezone_offset: + return + + self.timezone_offset = detected_offset + self.timezone = timezone(offset=timedelta(hours=detected_offset)) + logger.info(f"[Jira] Timezone offset adjusted to {detected_offset} hours using Jira server info.") + + def _extract_timezone_offset(self, server_info: dict[str, Any]) -> float | None: + server_time_raw = server_info.get("serverTime") + if isinstance(server_time_raw, str): + offset = self._parse_offset_from_datetime_string(server_time_raw) + if offset is not None: + return offset + + tz_name = server_info.get("timeZone") + if isinstance(tz_name, str): + offset = self._offset_from_zone_name(tz_name) + if offset is not None: + return offset + return None + + @staticmethod + def _parse_offset_from_datetime_string(value: str) -> float | None: + normalized = JiraConnector._normalize_datetime_string(value) + try: + dt = datetime.fromisoformat(normalized) + except ValueError: + return None + + if dt.tzinfo is None: + return 0.0 + + offset = dt.tzinfo.utcoffset(dt) + if offset is None: + return None + return offset.total_seconds() / 3600.0 + + @staticmethod + def _normalize_datetime_string(value: str) -> str: + trimmed = (value or "").strip() + if trimmed.endswith("Z"): + return f"{trimmed[:-1]}+00:00" + + match = _TZ_OFFSET_PATTERN.search(trimmed) + if match and match.group(3) != ":": + sign, hours, _, minutes = match.groups() + trimmed = f"{trimmed[: match.start()]}{sign}{hours}:{minutes}" + return trimmed + + @staticmethod + def _offset_from_zone_name(name: str) -> float | None: + try: + tz = ZoneInfo(name) + except (ZoneInfoNotFoundError, ValueError): + return None + reference = datetime.now(tz) + offset = reference.utcoffset() + if offset is None: + return None + return offset.total_seconds() / 3600.0 + + def _is_cloud_client(self) -> bool: + if not self.jira_client: + return False + rest_version = str(self.jira_client._options.get("rest_api_version", "")).strip() + return rest_version == str(JIRA_CLOUD_API_VERSION) + + def _full_page_size(self) -> int: + return max(1, min(self.batch_size, _JIRA_FULL_PAGE_SIZE)) + + def _perform_jql_search( + self, + *, + jql: str, + start: int, + max_results: int, + fields: str | None = None, + all_issue_ids: list[list[str]] | None = None, + checkpoint_callback: Callable[[Iterable[list[str]], str | None], None] | None = None, + next_page_token: str | None = None, + ids_done: bool = False, + ) -> Iterable[Issue]: + assert self.jira_client, "Jira client not initialized." + is_cloud = self._is_cloud_client() + if is_cloud: + if all_issue_ids is None: + raise ValueError("all_issue_ids is required for Jira Cloud searches.") + yield from self._perform_jql_search_v3( + jql=jql, + max_results=max_results, + fields=fields, + all_issue_ids=all_issue_ids, + checkpoint_callback=checkpoint_callback, + next_page_token=next_page_token, + ids_done=ids_done, + ) + else: + yield from self._perform_jql_search_v2( + jql=jql, + start=start, + max_results=max_results, + fields=fields, + ) + + def _perform_jql_search_v3( + self, + *, + jql: str, + max_results: int, + all_issue_ids: list[list[str]], + fields: str | None = None, + checkpoint_callback: Callable[[Iterable[list[str]], str | None], None] | None = None, + next_page_token: str | None = None, + ids_done: bool = False, + ) -> Iterable[Issue]: + assert self.jira_client, "Jira client not initialized." + + if not ids_done: + new_ids, page_token = self._enhanced_search_ids(jql, next_page_token) + if checkpoint_callback is not None and new_ids: + checkpoint_callback( + self._chunk_issue_ids(new_ids, max_results), + page_token, + ) + elif checkpoint_callback is not None: + checkpoint_callback([], page_token) + + if all_issue_ids: + issue_ids = all_issue_ids.pop() + if issue_ids: + yield from self._bulk_fetch_issues(issue_ids, fields) + + def _perform_jql_search_v2( + self, + *, + jql: str, + start: int, + max_results: int, + fields: str | None = None, + ) -> Iterable[Issue]: + assert self.jira_client, "Jira client not initialized." + + issues = self.jira_client.search_issues( + jql_str=jql, + startAt=start, + maxResults=max_results, + fields=fields or self._fields_param, + expand="renderedFields", + ) + for issue in issues: + yield issue + + def _enhanced_search_ids( + self, + jql: str, + next_page_token: str | None, + ) -> tuple[list[str], str | None]: + assert self.jira_client, "Jira client not initialized." + enhanced_search_path = self.jira_client._get_url("search/jql") + params: dict[str, str | int | None] = { + "jql": jql, + "maxResults": _MAX_RESULTS_FETCH_IDS, + "nextPageToken": next_page_token, + "fields": "id", + } + response = self.jira_client._session.get(enhanced_search_path, params=params) + response.raise_for_status() + data = response.json() + return [str(issue["id"]) for issue in data.get("issues", [])], data.get("nextPageToken") + + def _bulk_fetch_issues( + self, + issue_ids: list[str], + fields: str | None, + ) -> Iterable[Issue]: + assert self.jira_client, "Jira client not initialized." + if not issue_ids: + return [] + + bulk_fetch_path = self.jira_client._get_url("issue/bulkfetch") + payload: dict[str, Any] = {"issueIdsOrKeys": issue_ids} + payload["fields"] = fields.split(",") if fields else ["*all"] + + response = self.jira_client._session.post(bulk_fetch_path, json=payload) + response.raise_for_status() + data = response.json() + return [Issue(self.jira_client._options, self.jira_client._session, raw=issue) for issue in data.get("issues", [])] + + @staticmethod + def _chunk_issue_ids(issue_ids: list[str], chunk_size: int) -> Iterable[list[str]]: + if chunk_size <= 0: + chunk_size = _JIRA_FULL_PAGE_SIZE + + for idx in range(0, len(issue_ids), chunk_size): + yield issue_ids[idx : idx + chunk_size] + + def _make_checkpoint_callback(self, checkpoint: JiraCheckpoint) -> Callable[[Iterable[list[str]], str | None], None]: + def checkpoint_callback( + issue_ids: Iterable[list[str]] | list[list[str]], + page_token: str | None, + ) -> None: + for id_batch in issue_ids: + checkpoint.all_issue_ids.append(list(id_batch)) + checkpoint.cursor = page_token + checkpoint.ids_done = page_token is None + + return checkpoint_callback + + def _update_checkpoint_for_next_run( + self, + *, + checkpoint: JiraCheckpoint, + current_offset: int, + starting_offset: int, + page_size: int, + ) -> None: + if self._is_cloud_client(): + checkpoint.has_more = bool(checkpoint.all_issue_ids) or not checkpoint.ids_done + else: + checkpoint.has_more = current_offset - starting_offset == page_size + checkpoint.cursor = None + checkpoint.ids_done = True + checkpoint.all_issue_ids = [] + + +def iterate_jira_documents( + connector: "JiraConnector", + start: SecondsSinceUnixEpoch, + end: SecondsSinceUnixEpoch, + iteration_limit: int = 100_000, +) -> Iterator[Document]: + """Yield documents without materializing the entire result set.""" + + checkpoint = connector.build_dummy_checkpoint() + iterations = 0 + + while checkpoint.has_more: + wrapper = CheckpointOutputWrapper[JiraCheckpoint]() + generator = wrapper(connector.load_from_checkpoint(start=start, end=end, checkpoint=checkpoint)) + + for document, failure, next_checkpoint in generator: + if failure is not None: + failure_message = getattr(failure, "failure_message", str(failure)) + raise RuntimeError(f"Failed to load Jira documents: {failure_message}") + if document is not None: + yield document + if next_checkpoint is not None: + checkpoint = next_checkpoint + + iterations += 1 + if iterations > iteration_limit: + raise RuntimeError("Too many iterations while loading Jira documents.") + + +def test_jira( + *, + base_url: str, + project_key: str | None = None, + jql_query: str | None = None, + credentials: dict[str, Any], + batch_size: int = INDEX_BATCH_SIZE, + start_ts: float | None = None, + end_ts: float | None = None, + connector_options: dict[str, Any] | None = None, +) -> list[Document]: + """Programmatic entry point that mirrors the CLI workflow.""" + + connector_kwargs = connector_options.copy() if connector_options else {} + connector = JiraConnector( + jira_base_url=base_url, + project_key=project_key, + jql_query=jql_query, + batch_size=batch_size, + **connector_kwargs, + ) + connector.load_credentials(credentials) + connector.validate_connector_settings() + + now_ts = datetime.now(timezone.utc).timestamp() + start = start_ts if start_ts is not None else 0.0 + end = end_ts if end_ts is not None else now_ts + + documents = list(iterate_jira_documents(connector, start=start, end=end)) + logger.info(f"[Jira] Fetched {len(documents)} Jira documents.") + for doc in documents[:5]: + logger.info(f"[Jira] Document {doc.semantic_identifier} ({doc.id}) size={doc.size_bytes} bytes") + return documents + + +def _build_arg_parser() -> argparse.ArgumentParser: + parser = argparse.ArgumentParser(description="Fetch Jira issues and print summary statistics.") + parser.add_argument("--base-url", dest="base_url", default=os.environ.get("JIRA_BASE_URL")) + parser.add_argument("--project", dest="project_key", default=os.environ.get("JIRA_PROJECT_KEY")) + parser.add_argument("--jql", dest="jql_query", default=os.environ.get("JIRA_JQL")) + parser.add_argument("--email", dest="user_email", default=os.environ.get("JIRA_USER_EMAIL")) + parser.add_argument("--token", dest="api_token", default=os.environ.get("JIRA_API_TOKEN")) + parser.add_argument("--password", dest="password", default=os.environ.get("JIRA_PASSWORD")) + parser.add_argument("--batch-size", dest="batch_size", type=int, default=int(os.environ.get("JIRA_BATCH_SIZE", INDEX_BATCH_SIZE))) + parser.add_argument("--include_comments", dest="include_comments", type=bool, default=True) + parser.add_argument("--include_attachments", dest="include_attachments", type=bool, default=True) + parser.add_argument("--attachment_size_limit", dest="attachment_size_limit", type=float, default=_DEFAULT_ATTACHMENT_SIZE_LIMIT) + parser.add_argument("--start-ts", dest="start_ts", type=float, default=None, help="Epoch seconds inclusive lower bound for updated issues.") + parser.add_argument("--end-ts", dest="end_ts", type=float, default=9999999999, help="Epoch seconds inclusive upper bound for updated issues.") + return parser + + +def main(config: dict[str, Any] | None = None) -> None: + if config is None: + args = _build_arg_parser().parse_args() + config = { + "base_url": args.base_url, + "project_key": args.project_key, + "jql_query": args.jql_query, + "batch_size": args.batch_size, + "start_ts": args.start_ts, + "end_ts": args.end_ts, + "include_comments": args.include_comments, + "include_attachments": args.include_attachments, + "attachment_size_limit": args.attachment_size_limit, + "credentials": { + "jira_user_email": args.user_email, + "jira_api_token": args.api_token, + "jira_password": args.password, + }, + } + + base_url = config.get("base_url") + credentials = config.get("credentials", {}) + + print(f"[Jira] {config=}", flush=True) + print(f"[Jira] {credentials=}", flush=True) + + if not base_url: + raise RuntimeError("Jira base URL must be provided via config or CLI arguments.") + if not (credentials.get("jira_api_token") or (credentials.get("jira_user_email") and credentials.get("jira_password"))): + raise RuntimeError("Provide either an API token or both email/password for Jira authentication.") + + connector_options = { + key: value + for key, value in ( + ("include_comments", config.get("include_comments")), + ("include_attachments", config.get("include_attachments")), + ("attachment_size_limit", config.get("attachment_size_limit")), + ("labels_to_skip", config.get("labels_to_skip")), + ("comment_email_blacklist", config.get("comment_email_blacklist")), + ("scoped_token", config.get("scoped_token")), + ("timezone_offset", config.get("timezone_offset")), + ) + if value is not None + } + + documents = test_jira( + base_url=base_url, + project_key=config.get("project_key"), + jql_query=config.get("jql_query"), + credentials=credentials, + batch_size=config.get("batch_size", INDEX_BATCH_SIZE), + start_ts=config.get("start_ts"), + end_ts=config.get("end_ts"), + connector_options=connector_options, + ) + + preview_count = min(len(documents), 5) + for idx in range(preview_count): + doc = documents[idx] + print(f"[Jira] [Sample {idx + 1}] {doc.semantic_identifier} | id={doc.id} | size={doc.size_bytes} bytes") + + print(f"[Jira] Jira connector test completed. Documents fetched: {len(documents)}") + + +if __name__ == "__main__": # pragma: no cover - manual execution path + logging.basicConfig(level=logging.DEBUG, format="%(asctime)s %(levelname)s %(name)s %(message)s") + main() diff --git a/common/data_source/jira/utils.py b/common/data_source/jira/utils.py new file mode 100644 index 000000000..62219d36d --- /dev/null +++ b/common/data_source/jira/utils.py @@ -0,0 +1,149 @@ +"""Helper utilities for the Jira connector.""" + +from __future__ import annotations + +import os +from collections.abc import Collection +from datetime import datetime, timezone +from typing import Any, Iterable + +from jira.resources import Issue + +from common.data_source.utils import datetime_from_string + +JIRA_SERVER_API_VERSION = os.environ.get("JIRA_SERVER_API_VERSION", "2") +JIRA_CLOUD_API_VERSION = os.environ.get("JIRA_CLOUD_API_VERSION", "3") + + +def build_issue_url(base_url: str, issue_key: str) -> str: + """Return the canonical UI URL for a Jira issue.""" + return f"{base_url.rstrip('/')}/browse/{issue_key}" + + +def parse_jira_datetime(value: Any) -> datetime | None: + """Best-effort parse of Jira datetime values to aware UTC datetimes.""" + if value is None: + return None + if isinstance(value, datetime): + return value.astimezone(timezone.utc) if value.tzinfo else value.replace(tzinfo=timezone.utc) + if isinstance(value, str): + return datetime_from_string(value) + return None + + +def extract_named_value(value: Any) -> str | None: + """Extract a readable string out of Jira's typed objects.""" + if value is None: + return None + if isinstance(value, str): + return value + if isinstance(value, dict): + return value.get("name") or value.get("value") + return getattr(value, "name", None) + + +def extract_user(value: Any) -> tuple[str | None, str | None]: + """Return display name + email tuple for a Jira user blob.""" + if value is None: + return None, None + if isinstance(value, dict): + return value.get("displayName"), value.get("emailAddress") + + display = getattr(value, "displayName", None) + email = getattr(value, "emailAddress", None) + return display, email + + +def extract_text_from_adf(adf: Any) -> str: + """Flatten Atlassian Document Format (ADF) structures to text.""" + texts: list[str] = [] + + def _walk(node: Any) -> None: + if node is None: + return + if isinstance(node, dict): + node_type = node.get("type") + if node_type == "text": + texts.append(node.get("text", "")) + for child in node.get("content", []): + _walk(child) + elif isinstance(node, list): + for child in node: + _walk(child) + + _walk(adf) + return "\n".join(part for part in texts if part) + + +def extract_body_text(value: Any) -> str: + """Normalize Jira description/comments (raw/adf/str) into plain text.""" + if value is None: + return "" + if isinstance(value, str): + return value.strip() + if isinstance(value, dict): + return extract_text_from_adf(value).strip() + return str(value).strip() + + +def format_comments( + comment_block: Any, + *, + blacklist: Collection[str], +) -> str: + """Convert Jira comments into a markdown-ish bullet list.""" + if not isinstance(comment_block, dict): + return "" + + comments = comment_block.get("comments") or [] + lines: list[str] = [] + normalized_blacklist = {email.lower() for email in blacklist if email} + + for comment in comments: + author = comment.get("author") or {} + author_email = (author.get("emailAddress") or "").lower() + if author_email and author_email in normalized_blacklist: + continue + + author_name = author.get("displayName") or author.get("name") or author_email or "Unknown" + created = parse_jira_datetime(comment.get("created")) + created_str = created.isoformat() if created else "Unknown time" + body = extract_body_text(comment.get("body")) + if not body: + continue + + lines.append(f"- {author_name} ({created_str}):\n{body}") + + return "\n\n".join(lines) + + +def format_attachments(attachments: Any) -> str: + """List Jira attachments as bullet points.""" + if not isinstance(attachments, list): + return "" + + attachment_lines: list[str] = [] + for attachment in attachments: + filename = attachment.get("filename") + if not filename: + continue + size = attachment.get("size") + size_text = f" ({size} bytes)" if isinstance(size, int) else "" + content_url = attachment.get("content") or "" + url_suffix = f" -> {content_url}" if content_url else "" + attachment_lines.append(f"- {filename}{size_text}{url_suffix}") + + return "\n".join(attachment_lines) + + +def should_skip_issue(issue: Issue, labels_to_skip: set[str]) -> bool: + """Return True if the issue contains any label from the skip list.""" + if not labels_to_skip: + return False + + fields = getattr(issue, "raw", {}).get("fields", {}) + labels: Iterable[str] = fields.get("labels") or [] + for label in labels: + if (label or "").lower() in labels_to_skip: + return True + return False diff --git a/common/data_source/jira_connector.py b/common/data_source/jira_connector.py deleted file mode 100644 index 4d6f1160e..000000000 --- a/common/data_source/jira_connector.py +++ /dev/null @@ -1,112 +0,0 @@ -"""Jira connector""" - -from typing import Any - -from jira import JIRA - -from common.data_source.config import INDEX_BATCH_SIZE -from common.data_source.exceptions import ( - ConnectorValidationError, - InsufficientPermissionsError, - UnexpectedValidationError, ConnectorMissingCredentialError -) -from common.data_source.interfaces import ( - CheckpointedConnectorWithPermSync, - SecondsSinceUnixEpoch, - SlimConnectorWithPermSync -) -from common.data_source.models import ( - ConnectorCheckpoint -) - - -class JiraConnector(CheckpointedConnectorWithPermSync, SlimConnectorWithPermSync): - """Jira connector for accessing Jira issues and projects""" - - def __init__(self, batch_size: int = INDEX_BATCH_SIZE) -> None: - self.batch_size = batch_size - self.jira_client: JIRA | None = None - - def load_credentials(self, credentials: dict[str, Any]) -> dict[str, Any] | None: - """Load Jira credentials""" - try: - url = credentials.get("url") - username = credentials.get("username") - password = credentials.get("password") - token = credentials.get("token") - - if not url: - raise ConnectorMissingCredentialError("Jira URL is required") - - if token: - # API token authentication - self.jira_client = JIRA(server=url, token_auth=token) - elif username and password: - # Basic authentication - self.jira_client = JIRA(server=url, basic_auth=(username, password)) - else: - raise ConnectorMissingCredentialError("Jira credentials are incomplete") - - return None - except Exception as e: - raise ConnectorMissingCredentialError(f"Jira: {e}") - - def validate_connector_settings(self) -> None: - """Validate Jira connector settings""" - if not self.jira_client: - raise ConnectorMissingCredentialError("Jira") - - try: - # Test connection by getting server info - self.jira_client.server_info() - except Exception as e: - if "401" in str(e) or "403" in str(e): - raise InsufficientPermissionsError("Invalid credentials or insufficient permissions") - elif "404" in str(e): - raise ConnectorValidationError("Jira instance not found") - else: - raise UnexpectedValidationError(f"Jira validation error: {e}") - - def poll_source(self, start: SecondsSinceUnixEpoch, end: SecondsSinceUnixEpoch) -> Any: - """Poll Jira for recent issues""" - # Simplified implementation - in production this would handle actual polling - return [] - - def load_from_checkpoint( - self, - start: SecondsSinceUnixEpoch, - end: SecondsSinceUnixEpoch, - checkpoint: ConnectorCheckpoint, - ) -> Any: - """Load documents from checkpoint""" - # Simplified implementation - return [] - - def load_from_checkpoint_with_perm_sync( - self, - start: SecondsSinceUnixEpoch, - end: SecondsSinceUnixEpoch, - checkpoint: ConnectorCheckpoint, - ) -> Any: - """Load documents from checkpoint with permission sync""" - # Simplified implementation - return [] - - def build_dummy_checkpoint(self) -> ConnectorCheckpoint: - """Build dummy checkpoint""" - return ConnectorCheckpoint() - - def validate_checkpoint_json(self, checkpoint_json: str) -> ConnectorCheckpoint: - """Validate checkpoint JSON""" - # Simplified implementation - return ConnectorCheckpoint() - - def retrieve_all_slim_docs_perm_sync( - self, - start: SecondsSinceUnixEpoch | None = None, - end: SecondsSinceUnixEpoch | None = None, - callback: Any = None, - ) -> Any: - """Retrieve all simplified documents with permission sync""" - # Simplified implementation - return [] \ No newline at end of file diff --git a/common/data_source/utils.py b/common/data_source/utils.py index 7c2cdf898..b42c3833b 100644 --- a/common/data_source/utils.py +++ b/common/data_source/utils.py @@ -48,17 +48,35 @@ from common.data_source.exceptions import RateLimitTriedTooManyTimesError from common.data_source.interfaces import CT, CheckpointedConnector, CheckpointOutputWrapper, ConfluenceUser, LoadFunction, OnyxExtensionType, SecondsSinceUnixEpoch, TokenResponse from common.data_source.models import BasicExpertInfo, Document +_TZ_SUFFIX_PATTERN = re.compile(r"([+-])([\d:]+)$") + def datetime_from_string(datetime_string: str) -> datetime: datetime_string = datetime_string.strip() + match_jira_format = _TZ_SUFFIX_PATTERN.search(datetime_string) + if match_jira_format: + sign, tz_field = match_jira_format.groups() + digits = tz_field.replace(":", "") + + if digits.isdigit() and 1 <= len(digits) <= 4: + if len(digits) >= 3: + hours = digits[:-2].rjust(2, "0") + minutes = digits[-2:] + else: + hours = digits.rjust(2, "0") + minutes = "00" + + normalized = f"{sign}{hours}:{minutes}" + datetime_string = f"{datetime_string[: match_jira_format.start()]}{normalized}" + # Handle the case where the datetime string ends with 'Z' (Zulu time) - if datetime_string.endswith('Z'): - datetime_string = datetime_string[:-1] + '+00:00' + if datetime_string.endswith("Z"): + datetime_string = datetime_string[:-1] + "+00:00" # Handle timezone format "+0000" -> "+00:00" - if datetime_string.endswith('+0000'): - datetime_string = datetime_string[:-5] + '+00:00' + if datetime_string.endswith("+0000"): + datetime_string = datetime_string[:-5] + "+00:00" datetime_object = datetime.fromisoformat(datetime_string) @@ -480,7 +498,7 @@ def get_file_ext(file_name: str) -> str: def is_accepted_file_ext(file_ext: str, extension_type: OnyxExtensionType) -> bool: - image_extensions = {'.jpg', '.jpeg', '.png', '.gif', '.bmp', '.tiff', '.webp'} + image_extensions = {".jpg", ".jpeg", ".png", ".gif", ".bmp", ".tiff", ".webp"} text_extensions = {".txt", ".md", ".mdx", ".conf", ".log", ".json", ".csv", ".tsv", ".xml", ".yml", ".yaml", ".sql"} document_extensions = {".pdf", ".docx", ".pptx", ".xlsx", ".eml", ".epub", ".html"} @@ -902,6 +920,18 @@ def load_all_docs_from_checkpoint_connector( ) +_ATLASSIAN_CLOUD_DOMAINS = (".atlassian.net", ".jira.com", ".jira-dev.com") + + +def is_atlassian_cloud_url(url: str) -> bool: + try: + host = urlparse(url).hostname or "" + except ValueError: + return False + host = host.lower() + return any(host.endswith(domain) for domain in _ATLASSIAN_CLOUD_DOMAINS) + + def get_cloudId(base_url: str) -> str: tenant_info_url = urljoin(base_url, "/_edge/tenant_info") response = requests.get(tenant_info_url, timeout=10) diff --git a/common/log_utils.py b/common/log_utils.py index e2110ebeb..abbcd286b 100644 --- a/common/log_utils.py +++ b/common/log_utils.py @@ -80,4 +80,4 @@ def log_exception(e, *args): raise Exception(a.text) else: logging.error(str(a)) - raise e \ No newline at end of file + raise e diff --git a/rag/svr/sync_data_source.py b/rag/svr/sync_data_source.py index 3dc9a7a3c..6925eb5f7 100644 --- a/rag/svr/sync_data_source.py +++ b/rag/svr/sync_data_source.py @@ -20,33 +20,40 @@ import copy +import faulthandler +import logging +import os +import signal import sys import threading import time import traceback +from datetime import datetime, timezone from typing import Any +import trio + from api.db.services.connector_service import ConnectorService, SyncLogsService from api.db.services.knowledgebase_service import KnowledgebaseService -from common.log_utils import init_root_logger -from common.config_utils import show_configs -from common.data_source import BlobStorageConnector, NotionConnector, DiscordConnector, GoogleDriveConnector -import logging -import os -from datetime import datetime, timezone -import signal -import trio -import faulthandler -from common.constants import FileSource, TaskStatus from common import settings -from common.versions import get_ragflow_version +from common.config_utils import show_configs +from common.constants import FileSource, TaskStatus +from common.data_source import ( + BlobStorageConnector, + DiscordConnector, + GoogleDriveConnector, + JiraConnector, + NotionConnector, +) +from common.data_source.config import INDEX_BATCH_SIZE from common.data_source.confluence_connector import ConfluenceConnector from common.data_source.interfaces import CheckpointOutputWrapper from common.data_source.utils import load_all_docs_from_checkpoint_connector -from common.data_source.config import INDEX_BATCH_SIZE +from common.log_utils import init_root_logger from common.signal_utils import start_tracemalloc_and_snapshot, stop_tracemalloc +from common.versions import get_ragflow_version -MAX_CONCURRENT_TASKS = int(os.environ.get('MAX_CONCURRENT_TASKS', "5")) +MAX_CONCURRENT_TASKS = int(os.environ.get("MAX_CONCURRENT_TASKS", "5")) task_limiter = trio.Semaphore(MAX_CONCURRENT_TASKS) @@ -72,31 +79,32 @@ class SyncBase: min_update = min([doc.doc_updated_at for doc in document_batch]) max_update = max([doc.doc_updated_at for doc in document_batch]) next_update = max([next_update, max_update]) - docs = [{ - "id": doc.id, - "connector_id": task["connector_id"], - "source": self.SOURCE_NAME, - "semantic_identifier": doc.semantic_identifier, - "extension": doc.extension, - "size_bytes": doc.size_bytes, - "doc_updated_at": doc.doc_updated_at, - "blob": doc.blob - } for doc in document_batch] + docs = [ + { + "id": doc.id, + "connector_id": task["connector_id"], + "source": self.SOURCE_NAME, + "semantic_identifier": doc.semantic_identifier, + "extension": doc.extension, + "size_bytes": doc.size_bytes, + "doc_updated_at": doc.doc_updated_at, + "blob": doc.blob, + } + for doc in document_batch + ] e, kb = KnowledgebaseService.get_by_id(task["kb_id"]) err, dids = SyncLogsService.duplicate_and_parse(kb, docs, task["tenant_id"], f"{self.SOURCE_NAME}/{task['connector_id']}", task["auto_parse"]) SyncLogsService.increase_docs(task["id"], min_update, max_update, len(docs), "\n".join(err), len(err)) doc_num += len(docs) - logging.info("{} docs synchronized till {}".format(doc_num, next_update)) + prefix = "[Jira] " if self.SOURCE_NAME == FileSource.JIRA else "" + logging.info(f"{prefix}{doc_num} docs synchronized till {next_update}") SyncLogsService.done(task["id"], task["connector_id"]) task["poll_range_start"] = next_update except Exception as ex: - msg = '\n'.join([ - ''.join(traceback.format_exception_only(None, ex)).strip(), - ''.join(traceback.format_exception(None, ex, ex.__traceback__)).strip() - ]) + msg = "\n".join(["".join(traceback.format_exception_only(None, ex)).strip(), "".join(traceback.format_exception(None, ex, ex.__traceback__)).strip()]) SyncLogsService.update_by_id(task["id"], {"status": TaskStatus.FAIL, "full_exception_trace": msg, "error_msg": str(ex)}) SyncLogsService.schedule(task["connector_id"], task["kb_id"], task["poll_range_start"]) @@ -109,21 +117,16 @@ class S3(SyncBase): SOURCE_NAME: str = FileSource.S3 async def _generate(self, task: dict): - self.connector = BlobStorageConnector( - bucket_type=self.conf.get("bucket_type", "s3"), - bucket_name=self.conf["bucket_name"], - prefix=self.conf.get("prefix", "") - ) + self.connector = BlobStorageConnector(bucket_type=self.conf.get("bucket_type", "s3"), bucket_name=self.conf["bucket_name"], prefix=self.conf.get("prefix", "")) self.connector.load_credentials(self.conf["credentials"]) - document_batch_generator = self.connector.load_from_state() if task["reindex"]=="1" or not task["poll_range_start"] \ - else self.connector.poll_source(task["poll_range_start"].timestamp(), datetime.now(timezone.utc).timestamp()) + document_batch_generator = ( + self.connector.load_from_state() + if task["reindex"] == "1" or not task["poll_range_start"] + else self.connector.poll_source(task["poll_range_start"].timestamp(), datetime.now(timezone.utc).timestamp()) + ) - begin_info = "totally" if task["reindex"]=="1" or not task["poll_range_start"] else "from {}".format(task["poll_range_start"]) - logging.info("Connect to {}: {}(prefix/{}) {}".format(self.conf.get("bucket_type", "s3"), - self.conf["bucket_name"], - self.conf.get("prefix", ""), - begin_info - )) + begin_info = "totally" if task["reindex"] == "1" or not task["poll_range_start"] else "from {}".format(task["poll_range_start"]) + logging.info("Connect to {}: {}(prefix/{}) {}".format(self.conf.get("bucket_type", "s3"), self.conf["bucket_name"], self.conf.get("prefix", ""), begin_info)) return document_batch_generator @@ -131,8 +134,8 @@ class Confluence(SyncBase): SOURCE_NAME: str = FileSource.CONFLUENCE async def _generate(self, task: dict): - from common.data_source.interfaces import StaticCredentialsProvider from common.data_source.config import DocumentSource + from common.data_source.interfaces import StaticCredentialsProvider self.connector = ConfluenceConnector( wiki_base=self.conf["wiki_base"], @@ -141,11 +144,7 @@ class Confluence(SyncBase): # page_id=self.conf.get("page_id", ""), ) - credentials_provider = StaticCredentialsProvider( - tenant_id=task["tenant_id"], - connector_name=DocumentSource.CONFLUENCE, - credential_json=self.conf["credentials"] - ) + credentials_provider = StaticCredentialsProvider(tenant_id=task["tenant_id"], connector_name=DocumentSource.CONFLUENCE, credential_json=self.conf["credentials"]) self.connector.set_credentials_provider(credentials_provider) # Determine the time range for synchronization based on reindex or poll_range_start @@ -174,10 +173,13 @@ class Notion(SyncBase): async def _generate(self, task: dict): self.connector = NotionConnector(root_page_id=self.conf["root_page_id"]) self.connector.load_credentials(self.conf["credentials"]) - document_generator = self.connector.load_from_state() if task["reindex"]=="1" or not task["poll_range_start"] \ - else self.connector.poll_source(task["poll_range_start"].timestamp(), datetime.now(timezone.utc).timestamp()) + document_generator = ( + self.connector.load_from_state() + if task["reindex"] == "1" or not task["poll_range_start"] + else self.connector.poll_source(task["poll_range_start"].timestamp(), datetime.now(timezone.utc).timestamp()) + ) - begin_info = "totally" if task["reindex"]=="1" or not task["poll_range_start"] else "from {}".format(task["poll_range_start"]) + begin_info = "totally" if task["reindex"] == "1" or not task["poll_range_start"] else "from {}".format(task["poll_range_start"]) logging.info("Connect to Notion: root({}) {}".format(self.conf["root_page_id"], begin_info)) return document_generator @@ -194,13 +196,16 @@ class Discord(SyncBase): server_ids=server_ids.split(",") if server_ids else [], channel_names=channel_names.split(",") if channel_names else [], start_date=datetime(1970, 1, 1, tzinfo=timezone.utc).strftime("%Y-%m-%d"), - batch_size=self.conf.get("batch_size", 1024) + batch_size=self.conf.get("batch_size", 1024), ) self.connector.load_credentials(self.conf["credentials"]) - document_generator = self.connector.load_from_state() if task["reindex"]=="1" or not task["poll_range_start"] \ - else self.connector.poll_source(task["poll_range_start"].timestamp(), datetime.now(timezone.utc).timestamp()) + document_generator = ( + self.connector.load_from_state() + if task["reindex"] == "1" or not task["poll_range_start"] + else self.connector.poll_source(task["poll_range_start"].timestamp(), datetime.now(timezone.utc).timestamp()) + ) - begin_info = "totally" if task["reindex"]=="1" or not task["poll_range_start"] else "from {}".format(task["poll_range_start"]) + begin_info = "totally" if task["reindex"] == "1" or not task["poll_range_start"] else "from {}".format(task["poll_range_start"]) logging.info("Connect to Discord: servers({}), channel({}) {}".format(server_ids, channel_names, begin_info)) return document_generator @@ -285,7 +290,7 @@ class GoogleDrive(SyncBase): admin_email = self.connector.primary_admin_email except RuntimeError: admin_email = "unknown" - logging.info("Connect to Google Drive as %s %s", admin_email, begin_info) + logging.info(f"Connect to Google Drive as {admin_email} {begin_info}") return document_batches() def _persist_rotated_credentials(self, connector_id: str, credentials: dict[str, Any]) -> None: @@ -303,7 +308,93 @@ class Jira(SyncBase): SOURCE_NAME: str = FileSource.JIRA async def _generate(self, task: dict): - pass + connector_kwargs = { + "jira_base_url": self.conf["base_url"], + "project_key": self.conf.get("project_key"), + "jql_query": self.conf.get("jql_query"), + "batch_size": self.conf.get("batch_size", INDEX_BATCH_SIZE), + "include_comments": self.conf.get("include_comments", True), + "include_attachments": self.conf.get("include_attachments", False), + "labels_to_skip": self._normalize_list(self.conf.get("labels_to_skip")), + "comment_email_blacklist": self._normalize_list(self.conf.get("comment_email_blacklist")), + "scoped_token": self.conf.get("scoped_token", False), + "attachment_size_limit": self.conf.get("attachment_size_limit"), + "timezone_offset": self.conf.get("timezone_offset"), + } + + self.connector = JiraConnector(**connector_kwargs) + + credentials = self.conf.get("credentials") + if not credentials: + raise ValueError("Jira connector is missing credentials.") + + self.connector.load_credentials(credentials) + self.connector.validate_connector_settings() + + if task["reindex"] == "1" or not task["poll_range_start"]: + start_time = 0.0 + begin_info = "totally" + else: + start_time = task["poll_range_start"].timestamp() + begin_info = f"from {task['poll_range_start']}" + + end_time = datetime.now(timezone.utc).timestamp() + + raw_batch_size = self.conf.get("sync_batch_size") or self.conf.get("batch_size") or INDEX_BATCH_SIZE + try: + batch_size = int(raw_batch_size) + except (TypeError, ValueError): + batch_size = INDEX_BATCH_SIZE + if batch_size <= 0: + batch_size = INDEX_BATCH_SIZE + + def document_batches(): + checkpoint = self.connector.build_dummy_checkpoint() + pending_docs = [] + iterations = 0 + iteration_limit = 100_000 + + while checkpoint.has_more: + wrapper = CheckpointOutputWrapper() + generator = wrapper( + self.connector.load_from_checkpoint( + start_time, + end_time, + checkpoint, + ) + ) + for document, failure, next_checkpoint in generator: + if failure is not None: + logging.warning( + f"[Jira] Jira connector failure: {getattr(failure, 'failure_message', failure)}" + ) + continue + if document is not None: + pending_docs.append(document) + if len(pending_docs) >= batch_size: + yield pending_docs + pending_docs = [] + if next_checkpoint is not None: + checkpoint = next_checkpoint + + iterations += 1 + if iterations > iteration_limit: + logging.error(f"[Jira] Task {task.get('id')} exceeded iteration limit ({iteration_limit}).") + raise RuntimeError("Too many iterations while loading Jira documents.") + + if pending_docs: + yield pending_docs + + logging.info(f"[Jira] Connect to Jira {connector_kwargs['jira_base_url']} {begin_info}") + return document_batches() + + @staticmethod + def _normalize_list(values: Any) -> list[str] | None: + if values is None: + return None + if isinstance(values, str): + values = [item.strip() for item in values.split(",")] + return [str(value).strip() for value in values if value is not None and str(value).strip()] class SharePoint(SyncBase): @@ -337,9 +428,10 @@ func_factory = { FileSource.JIRA: Jira, FileSource.SHAREPOINT: SharePoint, FileSource.SLACK: Slack, - FileSource.TEAMS: Teams + FileSource.TEAMS: Teams, } + async def dispatch_tasks(): async with trio.open_nursery() as nursery: while True: @@ -385,7 +477,7 @@ async def main(): __/ | |___/ """) - logging.info(f'RAGFlow version: {get_ragflow_version()}') + logging.info(f"RAGFlow version: {get_ragflow_version()}") show_configs() settings.init_settings() if sys.platform != "win32": diff --git a/web/src/assets/svg/data-source/jira.svg b/web/src/assets/svg/data-source/jira.svg new file mode 100644 index 000000000..8f9cd8b97 --- /dev/null +++ b/web/src/assets/svg/data-source/jira.svg @@ -0,0 +1,16 @@ + + + + + + + + + + + + + + + + diff --git a/web/src/locales/en.ts b/web/src/locales/en.ts index e2035a378..350a64db8 100644 --- a/web/src/locales/en.ts +++ b/web/src/locales/en.ts @@ -732,6 +732,33 @@ Example: general/v2/`, 'Comma-separated emails whose “My Drive” contents should be indexed (include the primary admin).', google_driveSharedFoldersTip: 'Comma-separated Google Drive folder links to crawl.', + jiraDescription: + 'Connect your Jira workspace to sync issues, comments, and attachments.', + jiraBaseUrlTip: + 'Base URL of your Jira site (e.g., https://your-domain.atlassian.net).', + jiraProjectKeyTip: + 'Optional: limit syncing to a single project key (e.g., ENG).', + jiraJqlTip: + 'Optional JQL filter. Leave blank to rely on project/time filters.', + jiraBatchSizeTip: + 'Maximum number of issues requested from Jira per batch.', + jiraCommentsTip: + 'Include Jira comments in the generated markdown document.', + jiraAttachmentsTip: + 'Download attachments as separate documents during sync.', + jiraAttachmentSizeTip: + 'Attachments larger than this number of bytes will be skipped.', + jiraLabelsTip: + 'Labels that should be skipped while indexing (comma separated).', + jiraBlacklistTip: + 'Comments whose author email matches these entries will be ignored.', + jiraScopedTokenTip: + 'Enable this when using scoped Atlassian tokens (api.atlassian.com).', + jiraEmailTip: 'Email associated with the Jira account/API token.', + jiraTokenTip: + 'API token generated from https://id.atlassian.com/manage-profile/security/api-tokens.', + jiraPasswordTip: + 'Optional password for Jira Server/Data Center environments.', availableSourcesDescription: 'Select a data source to add', availableSources: 'Available sources', datasourceDescription: 'Manage your data source and connections', diff --git a/web/src/locales/zh.ts b/web/src/locales/zh.ts index 301719117..b6d25dc1f 100644 --- a/web/src/locales/zh.ts +++ b/web/src/locales/zh.ts @@ -716,6 +716,23 @@ General:实体和关系提取提示来自 GitHub - microsoft/graphrag:基于 '需要索引其 “我的云端硬盘” 的邮箱,多个邮箱用逗号分隔(建议包含管理员)。', google_driveSharedFoldersTip: '需要同步的 Google Drive 文件夹链接,多个链接用逗号分隔。', + jiraDescription: '接入 Jira 工作区,持续同步Issues、评论与附件。', + jiraBaseUrlTip: + 'Jira 的 Base URL,例如:https://your-domain.atlassian.net。', + jiraProjectKeyTip: '可选:仅同步指定的项目(如 RAG)。', + jiraJqlTip: '可选:自定义 JQL 过滤条件,留空则使用项目 / 时间范围。', + jiraBatchSizeTip: '每次向 Jira 请求的 Issue 数量上限。', + jiraCommentsTip: '同步评论。', + jiraAttachmentsTip: '开启后会将附件下载为独立文档。', + jiraAttachmentSizeTip: '超过该字节阈值的附件会被跳过。', + jiraLabelsTip: '需要跳过的标签(逗号分隔)。', + jiraBlacklistTip: '这些邮箱作者的评论会被忽略。', + jiraScopedTokenTip: + '仅当凭证为 Atlassian scoped token(api.atlassian.com)时生效。', + jiraEmailTip: '与 API Token 对应的 Jira 账户邮箱。', + jiraTokenTip: + '在 https://id.atlassian.com/manage-profile/security/api-tokens 生成的 API Token。 (Clould only)', + jiraPasswordTip: '可选:仅 Jira Server/Data Center 环境需要的密码字段。', availableSourcesDescription: '选择要添加的数据源', availableSources: '可用数据源', datasourceDescription: '管理您的数据源和连接', diff --git a/web/src/pages/user-setting/data-source/contant.tsx b/web/src/pages/user-setting/data-source/contant.tsx index 7acf3036d..3c8c55826 100644 --- a/web/src/pages/user-setting/data-source/contant.tsx +++ b/web/src/pages/user-setting/data-source/contant.tsx @@ -9,8 +9,8 @@ export enum DataSourceKey { NOTION = 'notion', DISCORD = 'discord', GOOGLE_DRIVE = 'google_drive', - // GMAIL = 'gmail', - // JIRA = 'jira', + // GMAIL = 'gmail', + JIRA = 'jira', // SHAREPOINT = 'sharepoint', // SLACK = 'slack', // TEAMS = 'teams', @@ -42,6 +42,11 @@ export const DataSourceInfo = { description: t(`setting.${DataSourceKey.GOOGLE_DRIVE}Description`), icon: , }, + [DataSourceKey.JIRA]: { + name: 'Jira', + description: t(`setting.${DataSourceKey.JIRA}Description`), + icon: , + }, }; export const DataSourceFormBaseFields = [ @@ -270,6 +275,106 @@ export const DataSourceFormFields = { defaultValue: 'uploaded', }, ], + [DataSourceKey.JIRA]: [ + { + label: 'Jira Base URL', + name: 'config.base_url', + type: FormFieldType.Text, + required: true, + placeholder: 'https://your-domain.atlassian.net', + tooltip: t('setting.jiraBaseUrlTip'), + }, + { + label: 'Project Key', + name: 'config.project_key', + type: FormFieldType.Text, + required: false, + placeholder: 'RAGFlow', + tooltip: t('setting.jiraProjectKeyTip'), + }, + { + label: 'Custom JQL', + name: 'config.jql_query', + type: FormFieldType.Textarea, + required: false, + placeholder: 'project = RAG AND updated >= -7d', + tooltip: t('setting.jiraJqlTip'), + }, + { + label: 'Batch Size', + name: 'config.batch_size', + type: FormFieldType.Number, + required: false, + tooltip: t('setting.jiraBatchSizeTip'), + }, + { + label: 'Include Comments', + name: 'config.include_comments', + type: FormFieldType.Checkbox, + required: false, + defaultValue: true, + tooltip: t('setting.jiraCommentsTip'), + }, + { + label: 'Include Attachments', + name: 'config.include_attachments', + type: FormFieldType.Checkbox, + required: false, + defaultValue: false, + tooltip: t('setting.jiraAttachmentsTip'), + }, + { + label: 'Attachment Size Limit (bytes)', + name: 'config.attachment_size_limit', + type: FormFieldType.Number, + required: false, + defaultValue: 10 * 1024 * 1024, + tooltip: t('setting.jiraAttachmentSizeTip'), + }, + { + label: 'Labels to Skip', + name: 'config.labels_to_skip', + type: FormFieldType.Tag, + required: false, + tooltip: t('setting.jiraLabelsTip'), + }, + { + label: 'Comment Email Blacklist', + name: 'config.comment_email_blacklist', + type: FormFieldType.Tag, + required: false, + tooltip: t('setting.jiraBlacklistTip'), + }, + { + label: 'Use Scoped Token (Clould only)', + name: 'config.scoped_token', + type: FormFieldType.Checkbox, + required: false, + tooltip: t('setting.jiraScopedTokenTip'), + }, + { + label: 'Jira User Email (Cloud) or User Name (Server)', + name: 'config.credentials.jira_user_email', + type: FormFieldType.Text, + required: true, + placeholder: 'you@example.com', + tooltip: t('setting.jiraEmailTip'), + }, + { + label: 'Jira API Token (Cloud only)', + name: 'config.credentials.jira_api_token', + type: FormFieldType.Password, + required: false, + tooltip: t('setting.jiraTokenTip'), + }, + { + label: 'Jira Password (Server only)', + name: 'config.credentials.jira_password', + type: FormFieldType.Password, + required: false, + tooltip: t('setting.jiraPasswordTip'), + }, + ], // [DataSourceKey.GOOGLE_DRIVE]: [ // { // label: 'Primary Admin Email', @@ -433,4 +538,25 @@ export const DataSourceFormDefaultValues = { }, }, }, + [DataSourceKey.JIRA]: { + name: '', + source: DataSourceKey.JIRA, + config: { + base_url: '', + project_key: '', + jql_query: '', + batch_size: 2, + include_comments: true, + include_attachments: false, + attachment_size_limit: 10 * 1024 * 1024, + labels_to_skip: [], + comment_email_blacklist: [], + scoped_token: false, + credentials: { + jira_user_email: '', + jira_api_token: '', + jira_password: '', + }, + }, + }, }; diff --git a/web/src/pages/user-setting/data-source/index.tsx b/web/src/pages/user-setting/data-source/index.tsx index 80ceea1d7..9cb58672a 100644 --- a/web/src/pages/user-setting/data-source/index.tsx +++ b/web/src/pages/user-setting/data-source/index.tsx @@ -44,6 +44,12 @@ const dataSourceTemplates = [ description: DataSourceInfo[DataSourceKey.NOTION].description, icon: DataSourceInfo[DataSourceKey.NOTION].icon, }, + { + id: DataSourceKey.JIRA, + name: DataSourceInfo[DataSourceKey.JIRA].name, + description: DataSourceInfo[DataSourceKey.JIRA].description, + icon: DataSourceInfo[DataSourceKey.JIRA].icon, + }, ]; const DataSource = () => { const { t } = useTranslation(); From e7e89d3ecbf9638865b15f951549875534b62538 Mon Sep 17 00:00:00 2001 From: Billy Bao Date: Mon, 17 Nov 2025 11:16:34 +0800 Subject: [PATCH 14/15] Doc: style fix (#11295) ### What problem does this PR solve? Style fix based on #11283 ### Type of change - [x] Documentation Update --- deepdoc/parser/mineru_parser.py | 2 +- docs/guides/accessing_admin_ui.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/deepdoc/parser/mineru_parser.py b/deepdoc/parser/mineru_parser.py index bb663de0d..6d3b292d0 100644 --- a/deepdoc/parser/mineru_parser.py +++ b/deepdoc/parser/mineru_parser.py @@ -434,7 +434,7 @@ class MinerUParser(RAGFlowPdfParser): if not section.strip(): section = "FAILED TO PARSE TABLE" case MinerUContentType.IMAGE: - section = "".join(output.get(["image_caption"],[])) + "\n" + "".join(output.get(["image_footnote"],[])) + section = "".join(output.get("image_caption", [])) + "\n" + "".join(output.get("image_footnote", [])) case MinerUContentType.EQUATION: section = output["text"] case MinerUContentType.CODE: diff --git a/docs/guides/accessing_admin_ui.md b/docs/guides/accessing_admin_ui.md index 23521244b..181cff5ac 100644 --- a/docs/guides/accessing_admin_ui.md +++ b/docs/guides/accessing_admin_ui.md @@ -15,7 +15,7 @@ To access the RAGFlow admin UI, append `/admin` to the web UI's address, e.g. `h ### Default Credentials | Username | Password | |----------|----------| -| admin@ragflow.io | admin | +| `admin@ragflow.io` | `admin` | ## Admin UI Overview From 9cef3a26250667fa6761dbe9893a194de0c28aef Mon Sep 17 00:00:00 2001 From: chanx <1243304602@qq.com> Date: Mon, 17 Nov 2025 11:16:55 +0800 Subject: [PATCH 15/15] Fix: Fixed the issue of not being able to select the time zone in the user center. (#11298) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit … user center. ### What problem does this PR solve? Fix: Fixed the issue of not being able to select the time zone in the user center. ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue) --- web/src/components/ui/modal/modal.tsx | 4 +++ web/src/pages/user-setting/profile/index.tsx | 30 ++++++-------------- 2 files changed, 12 insertions(+), 22 deletions(-) diff --git a/web/src/components/ui/modal/modal.tsx b/web/src/components/ui/modal/modal.tsx index acae6c147..af516b1e6 100644 --- a/web/src/components/ui/modal/modal.tsx +++ b/web/src/components/ui/modal/modal.tsx @@ -86,6 +86,9 @@ const Modal: ModalType = ({ onOk?.(); }, [onOk, onOpenChange]); const handleChange = (open: boolean) => { + if (!open && !maskClosable) { + return; + } onOpenChange?.(open); console.log('open', open, onOpenChange); if (open && !disabled) { @@ -185,6 +188,7 @@ const Modal: ModalType = ({ diff --git a/web/src/pages/user-setting/profile/index.tsx b/web/src/pages/user-setting/profile/index.tsx index dceb2cdf3..5c2741cf6 100644 --- a/web/src/pages/user-setting/profile/index.tsx +++ b/web/src/pages/user-setting/profile/index.tsx @@ -13,13 +13,7 @@ import { } from '@/components/ui/form'; import { Input } from '@/components/ui/input'; import { Modal } from '@/components/ui/modal/modal'; -import { - Select, - SelectContent, - SelectItem, - SelectTrigger, - SelectValue, -} from '@/components/ui/select'; +import { RAGFlowSelect } from '@/components/ui/select'; import { useTranslate } from '@/hooks/common-hooks'; import { TimezoneList } from '@/pages/user-setting/constants'; import { zodResolver } from '@hookform/resolvers/zod'; @@ -230,6 +224,7 @@ const ProfilePage: FC = () => { title={modalTitle[editType]} open={isEditing} showfooter={false} + maskClosable={false} titleClassName="text-base" onOpenChange={(open) => { if (!open) { @@ -281,23 +276,14 @@ const ProfilePage: FC = () => { {t('timezone')} - + />