Merge branch 'main' of github.com:infiniflow/ragflow into feature/1111

This commit is contained in:
chanx 2025-11-14 17:48:38 +08:00
commit ea2fc2046a
25 changed files with 441 additions and 47 deletions

View file

@ -96,7 +96,7 @@ jobs:
args: "check"
- name: Check comments of changed Python files
if: ${{ !cancelled() && !failure() }}
if: ${{ false }}
run: |
if [[ ${{ github.event_name }} == 'pull_request_target' ]]; then
CHANGED_FILES=$(git diff --name-only ${{ github.event.pull_request.base.sha }}...${{ github.event.pull_request.head.sha }} \
@ -110,7 +110,7 @@ jobs:
for file in "${files[@]}"; do
if [ -f "$file" ]; then
if python3 check_comment_ascii.py $file"; then
if python3 check_comment_ascii.py "$file"; then
echo "✅ $file"
else
echo "❌ $file"

View file

@ -4,7 +4,7 @@
Admin Service is a dedicated management component designed to monitor, maintain, and administrate the RAGFlow system. It provides comprehensive tools for ensuring system stability, performing operational tasks, and managing users and permissions efficiently.
The service offers real-time monitoring of critical components, including the RAGFlow server, Task Executor processes, and dependent services such as MySQL, Elasticsearch, Redis, and MinIO. It automatically checks their health status, resource usage, and uptime, and performs restarts in case of failures to minimize downtime.
The service offers real-time monitoring of critical components, including the RAGFlow server, Task Executor processes, and dependent services such as MySQL, Infinity, Elasticsearch, Redis, and MinIO. It automatically checks their health status, resource usage, and uptime, and performs restarts in case of failures to minimize downtime.
For user and system management, it supports listing, creating, modifying, and deleting users and their associated resources like knowledge bases and Agents.

View file

@ -393,7 +393,9 @@ class AdminCLI(Cmd):
print(f"Can't access {self.host}, port: {self.port}")
def _format_service_detail_table(self, data):
if not any([isinstance(v, list) for v in data.values()]):
if isinstance(data, list):
return data
if not all([isinstance(v, list) for v in data.values()]):
# normal table
return data
# handle task_executor heartbeats map, for example {'name': [{'done': 2, 'now': timestamp1}, {'done': 3, 'now': timestamp2}]
@ -404,7 +406,7 @@ class AdminCLI(Cmd):
task_executor_list.append({
"task_executor_name": k,
**heartbeats[0],
})
} if heartbeats else {"task_executor_name": k})
return task_executor_list
def _print_table_simple(self, data):
@ -415,7 +417,8 @@ class AdminCLI(Cmd):
# handle single row data
data = [data]
columns = list(data[0].keys())
columns = list(set().union(*(d.keys() for d in data)))
columns.sort()
col_widths = {}
def get_string_width(text):

View file

@ -169,7 +169,7 @@ def login_verify(f):
username = auth.parameters['username']
password = auth.parameters['password']
try:
if check_admin(username, password) is False:
if not check_admin(username, password):
return jsonify({
"code": 500,
"message": "Access denied",

View file

@ -25,8 +25,21 @@ from common.config_utils import read_config
from urllib.parse import urlparse
class BaseConfig(BaseModel):
id: int
name: str
host: str
port: int
service_type: str
detail_func_name: str
def to_dict(self) -> dict[str, Any]:
return {'id': self.id, 'name': self.name, 'host': self.host, 'port': self.port,
'service_type': self.service_type}
class ServiceConfigs:
configs = dict
configs = list[BaseConfig]
def __init__(self):
self.configs = []
@ -45,19 +58,6 @@ class ServiceType(Enum):
FILE_STORE = "file_store"
class BaseConfig(BaseModel):
id: int
name: str
host: str
port: int
service_type: str
detail_func_name: str
def to_dict(self) -> dict[str, Any]:
return {'id': self.id, 'name': self.name, 'host': self.host, 'port': self.port,
'service_type': self.service_type}
class MetaConfig(BaseConfig):
meta_type: str
@ -227,7 +227,7 @@ def load_configurations(config_path: str) -> list[BaseConfig]:
ragflow_count = 0
id_count = 0
for k, v in raw_configs.items():
match (k):
match k:
case "ragflow":
name: str = f'ragflow_{ragflow_count}'
host: str = v['host']

View file

@ -13,8 +13,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
#
import logging
import re
from werkzeug.security import check_password_hash
from common.constants import ActiveEnum
@ -190,7 +189,8 @@ class ServiceMgr:
config_dict['status'] = service_detail['status']
else:
config_dict['status'] = 'timeout'
except Exception:
except Exception as e:
logging.warning(f"Can't get service details, error: {e}")
config_dict['status'] = 'timeout'
if not config_dict['host']:
config_dict['host'] = '-'
@ -205,17 +205,13 @@ class ServiceMgr:
@staticmethod
def get_service_details(service_id: int):
service_id = int(service_id)
service_idx = int(service_id)
configs = SERVICE_CONFIGS.configs
service_config_mapping = {
c.id: {
'name': c.name,
'detail_func_name': c.detail_func_name
} for c in configs
}
service_info = service_config_mapping.get(service_id, {})
if not service_info:
raise AdminException(f"invalid service_id: {service_id}")
if service_idx < 0 or service_idx >= len(configs):
raise AdminException(f"invalid service_index: {service_idx}")
service_config = configs[service_idx]
service_info = {'name': service_config.name, 'detail_func_name': service_config.detail_func_name}
detail_func = getattr(health_utils, service_info.get('detail_func_name'))
res = detail_func()

View file

@ -0,0 +1,149 @@
from abc import ABC
import os
from agent.component.base import ComponentBase, ComponentParamBase
from api.utils.api_utils import timeout
class ListOperationsParam(ComponentParamBase):
"""
Define the List Operations component parameters.
"""
def __init__(self):
super().__init__()
self.query = ""
self.operations = "topN"
self.n=0
self.sort_method = "asc"
self.filter = {
"operator": "=",
"value": ""
}
self.outputs = {
"result": {
"value": [],
"type": "Array of ?"
},
"first": {
"value": "",
"type": "?"
},
"last": {
"value": "",
"type": "?"
}
}
def check(self):
self.check_empty(self.query, "query")
self.check_valid_value(self.operations, "Support operations", ["topN","head","tail","filter","sort","drop_duplicates"])
def get_input_form(self) -> dict[str, dict]:
return {}
class ListOperations(ComponentBase,ABC):
component_name = "ListOperations"
@timeout(int(os.environ.get("COMPONENT_EXEC_TIMEOUT", 10*60)))
def _invoke(self, **kwargs):
self.input_objects=[]
inputs = getattr(self._param, "query", None)
self.inputs=self._canvas.get_variable_value(inputs)
self.set_input_value(inputs, self.inputs)
if self._param.operations == "topN":
self._topN()
elif self._param.operations == "head":
self._head()
elif self._param.operations == "tail":
self._tail()
elif self._param.operations == "filter":
self._filter()
elif self._param.operations == "sort":
self._sort()
elif self._param.operations == "drop_duplicates":
self._drop_duplicates()
def _coerce_n(self):
try:
return int(getattr(self._param, "n", 0))
except Exception:
return 0
def _set_outputs(self, outputs):
self._param.outputs["result"]["value"] = outputs
self._param.outputs["first"]["value"] = outputs[0] if outputs else None
self._param.outputs["last"]["value"] = outputs[-1] if outputs else None
def _topN(self):
n = self._coerce_n()
if n < 1:
outputs = []
else:
n = min(n, len(self.inputs))
outputs = self.inputs[:n]
self._set_outputs(outputs)
def _head(self):
n = self._coerce_n()
if 1 <= n <= len(self.inputs):
outputs = [self.inputs[n - 1]]
else:
outputs = []
self._set_outputs(outputs)
def _tail(self):
n = self._coerce_n()
if 1 <= n <= len(self.inputs):
outputs = [self.inputs[-n]]
else:
outputs = []
self._set_outputs(outputs)
def _filter(self):
self._set_outputs([i for i in self.inputs if self._eval(self._norm(i),self._param.filter["operator"],self._param.filter["value"])])
def _norm(self,v):
s = "" if v is None else str(v)
return s
def _eval(self, v, operator, value):
if operator == "=":
return v == value
elif operator == "":
return v != value
elif operator == "contains":
return value in v
elif operator == "start with":
return v.startswith(value)
elif operator == "end with":
return v.endswith(value)
else:
return False
def _sort(self):
if self._param.sort_method == "asc":
self._set_outputs(sorted(self.inputs))
elif self._param.sort_method == "desc":
self._set_outputs(sorted(self.inputs, reverse=True))
def _drop_duplicates(self):
seen = set()
outs = []
for item in self.inputs:
k = self._hashable(item)
if k in seen:
continue
seen.add(k)
outs.append(item)
self._set_outputs(outs)
def _hashable(self,x):
if isinstance(x, dict):
return tuple(sorted((k, self._hashable(v)) for k, v in x.items()))
if isinstance(x, (list, tuple)):
return tuple(self._hashable(v) for v in x)
if isinstance(x, set):
return tuple(sorted(self._hashable(v) for v in x))
return x
def thoughts(self) -> str:
return "ListOperation in progress"

View file

@ -83,10 +83,10 @@
"value": []
}
},
"password": "20010812Yy!",
"password": "",
"port": 3306,
"sql": "{Agent:WickedGoatsDivide@content}",
"username": "13637682833@163.com"
"username": ""
}
},
"upstream": [
@ -527,10 +527,10 @@
"value": []
}
},
"password": "20010812Yy!",
"password": "",
"port": 3306,
"sql": "{Agent:WickedGoatsDivide@content}",
"username": "13637682833@163.com"
"username": ""
},
"label": "ExeSQL",
"name": "ExeSQL"

View file

@ -173,7 +173,8 @@ def check_task_executor_alive():
heartbeats = [json.loads(heartbeat) for heartbeat in heartbeats]
task_executor_heartbeats[task_executor_id] = heartbeats
if task_executor_heartbeats:
return {"status": "alive", "message": task_executor_heartbeats}
status = "alive" if any(task_executor_heartbeats.values()) else "timeout"
return {"status": status, "message": task_executor_heartbeats}
else:
return {"status": "timeout", "message": "Not found any task executor."}
except Exception as e:

View file

@ -1,16 +1,28 @@
#!/usr/bin/env python3
"""
Check whether given python files contain non-ASCII comments.
How to check the whole git repo:
```
$ git ls-files -z -- '*.py' | xargs -0 python3 check_comment_ascii.py
```
"""
import sys
import tokenize
import ast
import pathlib
import re
ASCII = re.compile(r"^[ -~]*\Z") # Only printable ASCII
ASCII = re.compile(r"^[\n -~]*\Z") # Printable ASCII + newline
def check(src: str, name: str) -> int:
"""
I'm a docstring
docstring line 1
docstring line 2
"""
ok = 1
# A common comment begins with `#`

View file

@ -434,7 +434,7 @@ class MinerUParser(RAGFlowPdfParser):
if not section.strip():
section = "FAILED TO PARSE TABLE"
case MinerUContentType.IMAGE:
section = "".join(output["image_caption"]) + "\n" + "".join(output["image_footnote"])
section = "".join(output.get(["image_caption"],[])) + "\n" + "".join(output.get(["image_footnote"],[]))
case MinerUContentType.EQUATION:
section = output["text"]
case MinerUContentType.CODE:

View file

@ -347,7 +347,7 @@ class Dealer:
## For rank feature(tag_fea) scores.
rank_fea = self._rank_feature_scores(rank_feature, sres)
return tkweight * (np.array(tksim)+rank_fea) + vtweight * vtsim, tksim, vtsim
return tkweight * np.array(tksim) + vtweight * vtsim + rank_fea, tksim, vtsim
def hybrid_similarity(self, ans_embd, ins_embd, ans, inst):
return self.qryr.hybrid_similarity(ans_embd,

View file

@ -110,7 +110,7 @@ class RedisDB:
info = self.REDIS.info()
return {
'redis_version': info["redis_version"],
'server_mode': info["server_mode"],
'server_mode': info["server_mode"] if "server_mode" in info else info.get("redis_mode", ""),
'used_memory': info["used_memory_human"],
'total_system_memory': info["total_system_memory_human"],
'mem_fragmentation_ratio': info["mem_fragmentation_ratio"],

View file

@ -109,6 +109,7 @@ export enum Operator {
SearXNG = 'SearXNG',
Placeholder = 'Placeholder',
DataOperations = 'DataOperations',
ListOperations = 'ListOperations',
VariableAssigner = 'VariableAssigner',
VariableAggregator = 'VariableAggregator',
File = 'File', // pipeline

View file

@ -1593,6 +1593,8 @@ This delimiter is used to split the input text into several text pieces echo of
codeDescription: 'It allows developers to write custom Python logic.',
dataOperations: 'Data operations',
dataOperationsDescription: 'Perform various operations on a Data object.',
listOperations: 'List operations',
listOperationsDescription: 'Perform operations on a list.',
variableAssigner: 'Variable assigner',
variableAssignerDescription:
'This component performs operations on Data objects, including extracting, filtering, and editing keys and values in the Data.',
@ -1808,6 +1810,19 @@ Important structured information may include: names, dates, locations, events, k
removeKeys: 'Remove keys',
renameKeys: 'Rename keys',
},
ListOperationsOptions: {
topN: 'Top N',
head: 'Head',
tail: 'Tail',
sort: 'Sort',
filter: 'Filter',
dropDuplicates: 'Drop duplicates',
},
sortMethod: 'Sort method',
SortMethodOptions: {
asc: 'Ascending',
desc: 'Descending',
},
},
llmTools: {
bad_calculator: {

View file

@ -1510,6 +1510,8 @@ General实体和关系提取提示来自 GitHub - microsoft/graphrag基于
codeDescription: '它允许开发人员编写自定义 Python 逻辑。',
dataOperations: '数据操作',
dataOperationsDescription: '对数据对象执行各种操作。',
listOperations: '列表操作',
listOperationsDescription: '对列表对象执行各种操作。',
variableAssigner: '变量赋值器',
variableAssignerDescription:
'此组件对数据对象执行操作,包括提取、筛选和编辑数据中的键和值。',
@ -1681,6 +1683,19 @@ Tokenizer 会根据所选方式将内容存储为对应的数据结构。`,
removeKeys: '删除键',
renameKeys: '重命名键',
},
ListOperationsOptions: {
topN: '取前N项',
head: '取前第N项',
tail: '取后第N项',
sort: '排序',
filter: '筛选',
dropDuplicates: '去重',
},
sortMethod: '排序方式',
SortMethodOptions: {
asc: '升序',
desc: '降序',
},
},
footer: {
profile: 'All rights reserved @ React',

View file

@ -61,6 +61,7 @@ import { FileNode } from './node/file-node';
import { InvokeNode } from './node/invoke-node';
import { IterationNode, IterationStartNode } from './node/iteration-node';
import { KeywordNode } from './node/keyword-node';
import { ListOperationsNode } from './node/list-operations-node';
import { MessageNode } from './node/message-node';
import NoteNode from './node/note-node';
import ParserNode from './node/parser-node';
@ -101,6 +102,7 @@ export const nodeTypes: NodeTypes = {
splitterNode: SplitterNode,
contextNode: ExtractorNode,
dataOperationsNode: DataOperationsNode,
listOperationsNode: ListOperationsNode,
variableAssignerNode: VariableAssignerNode,
variableAggregatorNode: VariableAggregatorNode,
};

View file

@ -79,6 +79,7 @@ export function AccordionOperators({
Operator.Code,
Operator.StringTransform,
Operator.DataOperations,
Operator.ListOperations,
// Operator.VariableAssigner,
Operator.VariableAggregator,
]}

View file

@ -0,0 +1,22 @@
import { BaseNode } from '@/interfaces/database/agent';
import { NodeProps } from '@xyflow/react';
import { camelCase } from 'lodash';
import { useTranslation } from 'react-i18next';
import { RagNode } from '.';
import { ListOperationsFormSchemaType } from '../../form/list-operations-form';
import { LabelCard } from './card';
export function ListOperationsNode({
...props
}: NodeProps<BaseNode<ListOperationsFormSchemaType>>) {
const { data } = props;
const { t } = useTranslation();
return (
<RagNode {...props}>
<LabelCard>
{t(`flow.ListOperationsOptions.${camelCase(data.form?.operations)}`)}
</LabelCard>
</RagNode>
);
}

View file

@ -596,6 +596,35 @@ export const initialDataOperationsValues = {
},
},
};
export enum SortMethod {
Asc = 'asc',
Desc = 'desc',
}
export enum ListOperations {
TopN = 'topN',
Head = 'head',
Tail = 'tail',
Filter = 'filter',
Sort = 'sort',
DropDuplicates = 'drop_duplicates',
}
export const initialListOperationsValues = {
query: '',
operations: ListOperations.TopN,
outputs: {
result: {
type: 'Array<?>',
},
first: {
type: '?',
},
last: {
type: '?',
},
},
};
export const initialVariableAssignerValues = {};
@ -674,6 +703,7 @@ export const RestrictedUpstreamMap = {
[Operator.Tool]: [Operator.Begin],
[Operator.Placeholder]: [Operator.Begin],
[Operator.DataOperations]: [Operator.Begin],
[Operator.ListOperations]: [Operator.Begin],
[Operator.Parser]: [Operator.Begin], // pipeline
[Operator.Splitter]: [Operator.Begin],
[Operator.HierarchicalMerger]: [Operator.Begin],
@ -730,6 +760,7 @@ export const NodeMap = {
[Operator.HierarchicalMerger]: 'splitterNode',
[Operator.Extractor]: 'contextNode',
[Operator.DataOperations]: 'dataOperationsNode',
[Operator.ListOperations]: 'listOperationsNode',
[Operator.VariableAssigner]: 'variableAssignerNode',
[Operator.VariableAggregator]: 'variableAggregatorNode',
};

View file

@ -21,6 +21,7 @@ import IterationForm from '../form/iteration-form';
import IterationStartForm from '../form/iteration-start-from';
import Jin10Form from '../form/jin10-form';
import KeywordExtractForm from '../form/keyword-extract-form';
import ListOperationsForm from '../form/list-operations-form';
import MessageForm from '../form/message-form';
import ParserForm from '../form/parser-form';
import PubMedForm from '../form/pubmed-form';
@ -184,6 +185,9 @@ export const FormConfigMap = {
[Operator.DataOperations]: {
component: DataOperationsForm,
},
[Operator.ListOperations]: {
component: ListOperationsForm,
},
[Operator.VariableAssigner]: {
component: VariableAssignerForm,
},

View file

@ -0,0 +1,140 @@
import NumberInput from '@/components/originui/number-input';
import { SelectWithSearch } from '@/components/originui/select-with-search';
import { RAGFlowFormItem } from '@/components/ragflow-form';
import {
Form,
FormControl,
FormField,
FormItem,
FormLabel,
FormMessage,
} from '@/components/ui/form';
import { Separator } from '@/components/ui/separator';
import { useBuildSwitchOperatorOptions } from '@/hooks/logic-hooks/use-build-operator-options';
import { buildOptions } from '@/utils/form';
import { zodResolver } from '@hookform/resolvers/zod';
import { memo } from 'react';
import { useForm, useWatch } from 'react-hook-form';
import { useTranslation } from 'react-i18next';
import { z } from 'zod';
import {
DataOperationsOperatorOptions,
JsonSchemaDataType,
ListOperations,
SortMethod,
initialListOperationsValues,
} from '../../constant';
import { useFormValues } from '../../hooks/use-form-values';
import { useWatchFormChange } from '../../hooks/use-watch-form-change';
import { INextOperatorForm } from '../../interface';
import { buildOutputList } from '../../utils/build-output-list';
import { FormWrapper } from '../components/form-wrapper';
import { Output, OutputSchema } from '../components/output';
import { PromptEditor } from '../components/prompt-editor';
import { QueryVariable } from '../components/query-variable';
export const RetrievalPartialSchema = {
query: z.string(),
operations: z.string(),
n: z.number().int().min(0).optional(),
sort_method: z.string().optional(),
filter: z
.object({
value: z.string().optional(),
operator: z.string().optional(),
})
.optional(),
...OutputSchema,
};
export const FormSchema = z.object(RetrievalPartialSchema);
export type ListOperationsFormSchemaType = z.infer<typeof FormSchema>;
const outputList = buildOutputList(initialListOperationsValues.outputs);
function ListOperationsForm({ node }: INextOperatorForm) {
const { t } = useTranslation();
const defaultValues = useFormValues(initialListOperationsValues, node);
const form = useForm<ListOperationsFormSchemaType>({
defaultValues: defaultValues,
mode: 'onChange',
resolver: zodResolver(FormSchema),
shouldUnregister: true,
});
const operations = useWatch({ control: form.control, name: 'operations' });
const ListOperationsOptions = buildOptions(
ListOperations,
t,
`flow.ListOperationsOptions`,
true,
);
const SortMethodOptions = buildOptions(
SortMethod,
t,
`flow.SortMethodOptions`,
true,
);
const operatorOptions = useBuildSwitchOperatorOptions(
DataOperationsOperatorOptions,
);
useWatchFormChange(node?.id, form, true);
return (
<Form {...form}>
<FormWrapper>
<QueryVariable
name="query"
className="flex-1"
types={[JsonSchemaDataType.Array]}
></QueryVariable>
<Separator />
<RAGFlowFormItem name="operations" label={t('flow.operations')}>
<SelectWithSearch options={ListOperationsOptions} />
</RAGFlowFormItem>
{[
ListOperations.TopN,
ListOperations.Head,
ListOperations.Tail,
].includes(operations as ListOperations) && (
<FormField
control={form.control}
name="n"
render={({ field }) => (
<FormItem>
<FormLabel>{t('flowNum')}</FormLabel>
<FormControl>
<NumberInput {...field} className="w-full"></NumberInput>
</FormControl>
<FormMessage />
</FormItem>
)}
/>
)}
{[ListOperations.Sort].includes(operations as ListOperations) && (
<RAGFlowFormItem name="sort_method" label={t('flow.sortMethod')}>
<SelectWithSearch options={SortMethodOptions} />
</RAGFlowFormItem>
)}
{[ListOperations.Filter].includes(operations as ListOperations) && (
<div className="flex items-center gap-2">
<RAGFlowFormItem name="filter.operator" className="flex-1">
<SelectWithSearch options={operatorOptions}></SelectWithSearch>
</RAGFlowFormItem>
<Separator className="w-2" />
<RAGFlowFormItem name="filter.value" className="flex-1">
<PromptEditor showToolbar={false} multiLine={false} />
</RAGFlowFormItem>
</div>
)}
<Output list={outputList} isFormRequired></Output>
</FormWrapper>
</Form>
);
}
export default memo(ListOperationsForm);

View file

@ -31,6 +31,7 @@ import {
initialIterationValues,
initialJin10Values,
initialKeywordExtractValues,
initialListOperationsValues,
initialMessageValues,
initialNoteValues,
initialParserValues,
@ -129,6 +130,7 @@ export const useInitializeOperatorParams = () => {
prompts: t('flow.prompts.user.summary'),
},
[Operator.DataOperations]: initialDataOperationsValues,
[Operator.ListOperations]: initialListOperationsValues,
[Operator.VariableAssigner]: initialVariableAssignerValues,
[Operator.VariableAggregator]: initialVariableAggregatorValues,
};

View file

@ -14,7 +14,7 @@ import { ReactComponent as YahooFinanceIcon } from '@/assets/svg/yahoo-finance.s
import { IconFont } from '@/components/icon-font';
import { cn } from '@/lib/utils';
import { Equal, FileCode, HousePlus, Variable } from 'lucide-react';
import { Columns3, Equal, FileCode, HousePlus, Variable } from 'lucide-react';
import { Operator } from './constant';
interface IProps {
@ -57,6 +57,7 @@ export const SVGIconMap = {
};
export const LucideIconMap = {
[Operator.DataOperations]: FileCode,
[Operator.ListOperations]: Columns3,
[Operator.VariableAssigner]: Equal,
[Operator.VariableAggregator]: Variable,
};

View file

@ -328,7 +328,6 @@ export const buildDslComponentsByGraph = (
case Operator.DataOperations:
params = transformDataOperationsParams(params);
break;
default:
break;
}