This commit is contained in:
YngvarHuang 2025-12-02 11:42:08 +08:00 committed by GitHub
commit 424482c3c1
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
22 changed files with 2722 additions and 11 deletions

View file

@ -627,9 +627,9 @@ class DocumentService(CommonService):
def begin2parse(cls, doc_id, keep_progress=False):
info = {
"progress_msg": "Task is queued...",
"process_begin_at": get_format_time(),
}
if not keep_progress:
info["process_begin_at"] = get_format_time()
info["progress"] = random.random() * 1 / 100.
info["run"] = TaskStatus.RUNNING.value
# keep the doc in DONE state when keep_progress=True for GraphRAG, RAPTOR and Mindmap tasks
@ -720,10 +720,12 @@ class DocumentService(CommonService):
freeze_progress = special_task_running and doc_progress >= 1 and not finished
msg = "\n".join(sorted(msg))
info = {
"process_duration": datetime.timestamp(
datetime.now()) -
d["process_begin_at"].timestamp(),
"run": status}
"run": status
}
if not freeze_progress and 0 < doc_progress < 1:
info["process_duration"] = (
datetime.timestamp(datetime.now()) - d["process_begin_at"].timestamp()
)
if prg != 0 and not freeze_progress:
info["progress"] = prg
if msg:

View file

@ -151,6 +151,7 @@ dependencies = [
"pip>=25.2",
"moodlepy>=0.23.0",
"pypandoc>=1.16",
"croniter>=2.0.1,<3.0.0",
"pyobvector==0.2.18",
"exceptiongroup>=1.3.0,<2.0.0",
"ffmpeg-python>=0.2.0",

201
pyproject.toml.orig Normal file
View file

@ -0,0 +1,201 @@
[project]
name = "ragflow"
version = "0.22.1"
description = "[RAGFlow](https://ragflow.io/) is an open-source RAG (Retrieval-Augmented Generation) engine based on deep document understanding. It offers a streamlined RAG workflow for businesses of any scale, combining LLM (Large Language Models) to provide truthful question-answering capabilities, backed by well-founded citations from various complex formatted data."
authors = [{ name = "Zhichang Yu", email = "yuzhichang@gmail.com" }]
license-files = ["LICENSE"]
readme = "README.md"
requires-python = ">=3.10,<3.13"
dependencies = [
"datrie>=0.8.3,<0.9.0",
"akshare>=1.15.78,<2.0.0",
"azure-storage-blob==12.22.0",
"azure-identity==1.17.1",
"azure-storage-file-datalake==12.16.0",
"anthropic==0.34.1",
"arxiv==2.1.3",
"aspose-slides>=25.10.0,<26.0.0; platform_machine == 'x86_64' or (sys_platform == 'darwin' and platform_machine == 'arm64')",
"atlassian-python-api==4.0.7",
"beartype>=0.18.5,<0.19.0",
"bio==1.7.1",
"blinker==1.7.0",
"boto3==1.34.140",
"botocore==1.34.140",
"cachetools==5.3.3",
"chardet==5.2.0",
"cn2an==0.5.22",
"cohere==5.6.2",
"Crawl4AI>=0.3.8",
"dashscope==1.20.11",
"deepl==1.18.0",
"demjson3==3.0.6",
"discord-py==2.3.2",
"dropbox==12.0.2",
"duckduckgo-search>=7.2.0,<8.0.0",
"editdistance==0.8.1",
"elastic-transport==8.12.0",
"elasticsearch==8.12.1",
"elasticsearch-dsl==8.12.0",
"extract-msg>=0.39.0",
"filelock==3.15.4",
"flask==3.0.3",
"flask-cors==5.0.0",
"flask-login==0.6.3",
"flask-session==0.8.0",
"google-search-results==2.4.2",
"google-auth-oauthlib>=1.2.0,<2.0.0",
"groq==0.9.0",
"hanziconv==0.3.2",
"html-text==0.6.2",
"httpx[socks]>=0.28.1,<0.29.0",
"huggingface-hub>=0.25.0,<0.26.0",
"infinity-sdk==0.6.5",
"infinity-emb>=0.0.66,<0.0.67",
"itsdangerous==2.1.2",
"json-repair==0.35.0",
"jira==3.10.5",
"markdown==3.6",
"markdown-to-json==2.1.1",
"minio==7.2.4",
"mistralai==0.4.2",
"mypy-boto3-s3==1.40.26",
"nltk==3.9.1",
"numpy>=1.26.0,<2.0.0",
"Office365-REST-Python-Client==2.6.2",
"ollama>=0.5.0",
"onnxruntime==1.19.2; sys_platform == 'darwin' or platform_machine != 'x86_64'",
"onnxruntime-gpu==1.19.2; sys_platform != 'darwin' and platform_machine == 'x86_64'",
"openai>=1.45.0",
"opencv-python==4.10.0.84",
"opencv-python-headless==4.10.0.84",
"openpyxl>=3.1.0,<4.0.0",
"opendal>=0.45.0,<0.46.0",
"ormsgpack==1.5.0",
"pandas>=2.2.0,<3.0.0",
"pdfplumber==0.10.4",
"peewee==3.17.1",
"pillow==10.4.0",
"protobuf==5.27.2",
"psycopg2-binary==2.9.9",
"pyclipper==1.3.0.post5",
"pycryptodomex==3.20.0",
"pymysql>=1.1.1,<2.0.0",
"pypdf==6.0.0",
"python-dotenv==1.0.1",
"python-dateutil==2.8.2",
"python-pptx>=1.0.2,<2.0.0",
"pywencai==0.12.2",
"qianfan==0.4.6",
"quart-auth==0.11.0",
"quart-cors==0.8.0",
"Quart==0.20.0",
"ranx==0.3.20",
"readability-lxml==0.8.1",
"valkey==6.0.2",
"requests==2.32.2",
"replicate==0.31.0",
"roman-numbers==1.0.2",
"ruamel-base==1.0.0",
"ruamel-yaml>=0.18.6,<0.19.0",
"scholarly==1.7.11",
"scikit-learn==1.5.0",
"selenium==4.22.0",
"selenium-wire==5.1.0",
"setuptools>=75.2.0,<76.0.0",
"shapely==2.0.5",
"six==1.16.0",
"slack-sdk==3.37.0",
"strenum==0.4.15",
"tabulate==0.9.0",
"tavily-python==0.5.1",
"tencentcloud-sdk-python==3.0.1478",
"tika==2.6.0",
"tiktoken==0.7.0",
"umap_learn==0.5.6",
"vertexai==1.70.0",
"google-genai>=1.41.0,<2.0.0",
"volcengine==1.0.194",
"voyageai==0.2.3",
"webdriver-manager==4.0.1",
"werkzeug==3.0.6",
"wikipedia==1.4.0",
"word2number==1.1",
"xgboost==1.6.0",
"xpinyin==0.7.6",
"yfinance==0.2.65",
"zhipuai==2.0.1",
"google-generativeai>=0.8.1,<0.9.0", # Needed for cv_model and embedding_model
"python-docx>=1.1.2,<2.0.0",
"pypdf2>=3.0.1,<4.0.0",
"graspologic>=3.4.1,<4.0.0",
"mini-racer>=0.12.4,<0.13.0",
"pyodbc>=5.2.0,<6.0.0",
"pyicu>=2.15.3,<3.0.0",
"flasgger>=0.9.7.1,<0.10.0",
"xxhash>=3.5.0,<4.0.0",
"trio>=0.29.0",
"langfuse>=2.60.0",
"debugpy>=1.8.13",
"mcp>=1.9.4",
"opensearch-py==2.7.1",
"pluginlib==0.9.4",
"click>=8.1.8",
"python-calamine>=0.4.0",
"litellm>=1.74.15.post1",
"flask-mail>=0.10.0",
"lark>=1.2.2",
"mammoth>=1.11.0",
"markdownify>=1.2.0",
"captcha>=0.7.1",
"pip>=25.2",
"pypandoc>=1.16",
<<<<<<< HEAD
"croniter>=2.0.1,<3.0.0",
=======
"pyobvector==0.2.18",
>>>>>>> main
]
[dependency-groups]
test = [
"hypothesis>=6.132.0",
"openpyxl>=3.1.5",
"pillow>=10.4.0",
"pytest>=8.3.5",
"python-docx>=1.1.2",
"python-pptx>=1.0.2",
"reportlab>=4.4.1",
"requests>=2.32.2",
"requests-toolbelt>=1.0.0",
]
[[tool.uv.index]]
url = "https://pypi.tuna.tsinghua.edu.cn/simple"
[tool.setuptools]
packages = [
'agent',
'agentic_reasoning',
'api',
'deepdoc',
'graphrag',
'intergrations.chatgpt-on-wechat.plugins',
'mcp.server',
'rag',
'sdk.python.ragflow_sdk',
]
[tool.ruff]
line-length = 200
exclude = [".venv", "rag/svr/discord_svr.py"]
[tool.ruff.lint]
extend-select = ["ASYNC", "ASYNC1"]
ignore = ["E402"]
[tool.pytest.ini_options]
markers = [
"p1: high priority test cases",
"p2: medium priority test cases",
"p3: low priority test cases",
]

View file

@ -51,7 +51,7 @@ import faulthandler
import numpy as np
from peewee import DoesNotExist
from common.constants import LLMType, ParserType, PipelineTaskType
from api.db.services.document_service import DocumentService
from api.db.services.document_service import DocumentService, queue_raptor_o_graphrag_tasks
from api.db.services.llm_service import LLMBundle
from api.db.services.task_service import TaskService, has_canceled, CANVAS_DEBUG_DOC_ID, GRAPH_RAPTOR_FAKE_DOC_ID
from api.db.services.file2document_service import File2DocumentService
@ -68,6 +68,7 @@ from common.signal_utils import start_tracemalloc_and_snapshot, stop_tracemalloc
from common.exceptions import TaskCanceledException
from common import settings
from common.constants import PAGERANK_FLD, TAG_FLD, SVR_CONSUMER_GROUP_NAME
from croniter import croniter
BATCH_SIZE = 64
@ -638,6 +639,8 @@ async def run_dataflow(task: dict):
logging.info("[Done], chunks({}), token({}), elapsed:{:.2f}".format(len(chunks), embedding_token_consumption, task_time_cost))
PipelineOperationLogService.create(document_id=doc_id, pipeline_id=dataflow_id, task_type=PipelineTaskType.PARSE, dsl=str(pipeline))
trigger_update_after(task_dataset_id, doc_id)
@timeout(3600)
async def run_raptor_for_kb(row, kb_parser_config, chat_mdl, embd_mdl, vector_size, callback=None, doc_ids=[]):
@ -772,6 +775,27 @@ async def insert_es(task_id, task_tenant_id, task_dataset_id, chunks, progress_c
return True
def trigger_update_after(kb_id: str, doc_id: str):
try:
ok, kb = KnowledgebaseService.get_by_id(kb_id)
if not ok:
return
conf = kb.parser_config or {}
gconf = conf.get("graphrag") or {}
rconf = conf.get("raptor") or {}
if gconf.get("use_graphrag") and gconf.get("strategy") == "update_after":
docs, _ = DocumentService.get_by_kb_id(kb_id=kb.id, page_number=0, items_per_page=0, orderby="create_time", desc=False, keywords="", run_status=[], types=[], suffix=[])
sample_document = docs[0] if docs else {"id": doc_id}
tid = queue_raptor_o_graphrag_tasks(sample_doc_id=sample_document, ty="graphrag", priority=0, fake_doc_id=GRAPH_RAPTOR_FAKE_DOC_ID, doc_ids=[doc_id])
KnowledgebaseService.update_by_id(kb.id, {"graphrag_task_id": tid})
if rconf.get("use_raptor") and rconf.get("strategy") == "update_after":
docs, _ = DocumentService.get_by_kb_id(kb_id=kb.id, page_number=0, items_per_page=0, orderby="create_time", desc=False, keywords="", run_status=[], types=[], suffix=[])
sample_document = docs[0] if docs else {"id": doc_id}
tid = queue_raptor_o_graphrag_tasks(sample_doc_id=sample_document, ty="raptor", priority=0, fake_doc_id=GRAPH_RAPTOR_FAKE_DOC_ID, doc_ids=[doc_id])
KnowledgebaseService.update_by_id(kb.id, {"raptor_task_id": tid})
except Exception:
pass
@timeout(60*60*3, 1)
async def do_handle_task(task):
task_type = task.get("task_type", "")
@ -973,6 +997,7 @@ async def do_handle_task(task):
"Chunk doc({}), page({}-{}), chunks({}), token({}), elapsed:{:.2f}".format(task_document_name, task_from_page,
task_to_page, len(chunks),
token_count, task_time_cost))
trigger_update_after(task_dataset_id, task_doc_id)
async def handle_task():
@ -1087,6 +1112,90 @@ async def task_manager():
task_limiter.release()
async def _due(cron: str, last_finish: datetime):
try:
if not cron:
return False
if not croniter.is_valid(cron):
return False
slot = datetime.now().replace(second=0, microsecond=0)
prev_time = croniter(cron, slot).get_prev(datetime)
if last_finish and last_finish >= prev_time:
return False
return True
except Exception:
return False
async def scheduler():
while not stop_event.is_set():
try:
def _doc_finish_ts_ms(doc):
pb = doc.get("process_begin_at")
dur = doc.get("process_duration") or 0
if not pb:
return None
try:
pb_ts_ms = int(pb.timestamp() * 1000)
except Exception:
return None
return pb_ts_ms + int(dur * 1000)
def _schedule_if_needed(kb, changed_docs, ty):
if not changed_docs:
return
if ty == "graphrag":
task_id = kb.graphrag_task_id
else:
task_id = kb.raptor_task_id
skip = False
if task_id:
ok, t = TaskService.get_by_id(task_id)
skip = bool(ok and t and t.progress not in [-1, 1])
if skip:
return
sample_document = changed_docs[0]
document_ids = [d["id"] for d in changed_docs]
tid = queue_raptor_o_graphrag_tasks(sample_doc_id=sample_document, ty=ty, priority=0, fake_doc_id=GRAPH_RAPTOR_FAKE_DOC_ID, doc_ids=document_ids)
if ty == "graphrag":
KnowledgebaseService.update_by_id(kb.id, {"graphrag_task_id": tid})
else:
KnowledgebaseService.update_by_id(kb.id, {"raptor_task_id": tid})
ids = KnowledgebaseService.get_all_ids()
for kb_id in ids:
ok, kb = KnowledgebaseService.get_by_id(kb_id)
if not ok:
continue
conf = kb.parser_config or {}
gconf = (conf.get("graphrag") or {})
rconf = (conf.get("raptor") or {})
if gconf.get("use_graphrag") and gconf.get("strategy") == "timed" and gconf.get("cron"):
if await _due(gconf.get("cron"), kb.graphrag_task_finish_at):
documents, _ = DocumentService.get_by_kb_id(kb_id=kb.id, page_number=0, items_per_page=0, orderby="create_time", desc=False, keywords="", run_status=[], types=[], suffix=[])
if documents:
finish_dt = kb.graphrag_task_finish_at
changed_docs = documents
if finish_dt:
finish_ts_ms = int(finish_dt.timestamp() * 1000)
changed_docs = [d for d in documents if (lambda t: t is not None and t > finish_ts_ms)(_doc_finish_ts_ms(d))]
_schedule_if_needed(kb, changed_docs, "graphrag")
if rconf.get("use_raptor") and rconf.get("strategy") == "timed" and rconf.get("cron"):
if await _due(rconf.get("cron"), kb.raptor_task_finish_at):
documents, _ = DocumentService.get_by_kb_id(kb_id=kb.id, page_number=0, items_per_page=0, orderby="create_time", desc=False, keywords="", run_status=[], types=[], suffix=[])
if documents:
finish_dt = kb.raptor_task_finish_at
changed_docs = documents
if finish_dt:
finish_ts_ms = int(finish_dt.timestamp() * 1000)
changed_docs = [d for d in documents if (lambda t: t is not None and t > finish_ts_ms)(_doc_finish_ts_ms(d))]
_schedule_if_needed(kb, changed_docs, "raptor")
except Exception as e:
logging.exception(e)
pass
await trio.sleep(60) # Special tasks take a long time to run, so the start time of scheduled tasks does not need to be very precise
async def main():
logging.info(r"""
____ __ _
@ -1114,6 +1223,7 @@ async def main():
async with trio.open_nursery() as nursery:
nursery.start_soon(report_status)
nursery.start_soon(scheduler)
while not stop_event.is_set():
await task_limiter.acquire()
nursery.start_soon(task_manager)

View file

@ -69,6 +69,7 @@
"classnames": "^2.5.1",
"clsx": "^2.1.1",
"cmdk": "^1.0.4",
"cron-validate": "^1.4.5",
"dayjs": "^1.11.10",
"dompurify": "^3.1.6",
"eventsource-parser": "^1.1.2",

View file

@ -18,6 +18,8 @@ import {
FormLabel,
FormMessage,
} from '../ui/form';
import { ExpandedInput } from '../ui/input';
import { Radio } from '../ui/radio';
import { RAGFlowSelect } from '../ui/select';
import { Switch } from '../ui/switch';
@ -119,6 +121,10 @@ const GraphRagItems = ({
control: form.control,
name: 'parser_config.graphrag.use_graphrag',
});
const strategy = useWatch({
control: form.control,
name: 'parser_config.graphrag.strategy',
});
const methodOptions = useMemo(() => {
return [MethodValue.Light, MethodValue.General].map((x) => ({
@ -136,6 +142,60 @@ const GraphRagItems = ({
return (
<FormContainer className={cn({ 'mb-4': marginBottom }, className)}>
<FormField
control={form.control}
name={'parser_config.graphrag.strategy'}
render={({ field }) => {
return (
<FormItem className=" items-center space-y-0 ">
<div className="flex items-start">
<FormLabel className="text-sm whitespace-nowrap w-1/4">
{t('graphRagStrategy')}
</FormLabel>
<div className="w-3/4">
<FormControl>
<Radio.Group {...field}>
<div className={'flex gap-4 w-full text-text-secondary '}>
<Radio value="manual">{t('strategyManual')}</Radio>
<Radio value="update_after">{t('strategyUpdateAfter')}</Radio>
<Radio value="timed">{t('strategyTimed')}</Radio>
</div>
</Radio.Group>
</FormControl>
</div>
</div>
<div className="flex pt-1">
<div className="w-1/4"></div>
<FormMessage />
</div>
</FormItem>
);
}}
/>
{strategy === 'timed' && (
<FormField
control={form.control}
name={'parser_config.graphrag.cron'}
render={({ field }) => (
<FormItem className=" items-center space-y-0 ">
<div className="flex items-center">
<FormLabel className="text-sm whitespace-nowrap w-1/4">
{t('cronExpression')}
</FormLabel>
<div className="w-3/4">
<FormControl>
<ExpandedInput {...field} className="w-full" placeholder={t('cronPlaceholder')} />
</FormControl>
</div>
</div>
<div className="flex pt-1">
<div className="w-1/4"></div>
<FormMessage />
</div>
</FormItem>
)}
/>
)}
<UseGraphRagFormField
data={data}
onDelete={onDelete}

View file

@ -67,6 +67,10 @@ const RaptorFormFields = ({
const form = useFormContext();
const { t } = useTranslate('knowledgeConfiguration');
const useRaptor = useWatch({ name: UseRaptorField });
const strategy = useWatch({
control: form.control,
name: 'parser_config.raptor.strategy',
});
const handleGenerate = useCallback(() => {
form.setValue(RandomSeedField, random(10000));
@ -74,6 +78,60 @@ const RaptorFormFields = ({
return (
<>
<FormField
control={form.control}
name={'parser_config.raptor.strategy'}
render={({ field }) => {
return (
<FormItem className=" items-center space-y-0 ">
<div className="flex items-start">
<FormLabel className="text-sm whitespace-nowrap w-1/4">
{t('raptorStrategy')}
</FormLabel>
<div className="w-3/4">
<FormControl>
<Radio.Group {...field}>
<div className={'flex gap-4 w-full text-text-secondary '}>
<Radio value="manual">{t('strategyManual')}</Radio>
<Radio value="update_after">{t('strategyUpdateAfter')}</Radio>
<Radio value="timed">{t('strategyTimed')}</Radio>
</div>
</Radio.Group>
</FormControl>
</div>
</div>
<div className="flex pt-1">
<div className="w-1/4"></div>
<FormMessage />
</div>
</FormItem>
);
}}
/>
{strategy === 'timed' && (
<FormField
control={form.control}
name={'parser_config.raptor.cron'}
render={({ field }) => (
<FormItem className=" items-center space-y-0 ">
<div className="flex items-center">
<FormLabel className="text-sm whitespace-nowrap w-1/4">
{t('cronExpression')}
</FormLabel>
<div className="w-3/4">
<FormControl>
<ExpandedInput {...field} className="w-full" placeholder={t('cronPlaceholder')} />
</FormControl>
</div>
</div>
<div className="flex pt-1">
<div className="w-1/4"></div>
<FormMessage />
</div>
</FormItem>
)}
/>
)}
<FormField
control={form.control}
name={UseRaptorField}

View file

@ -52,6 +52,8 @@ export interface IKnowledgeResult {
export interface Raptor {
use_raptor: boolean;
strategy?: string;
cron?: string;
}
export interface ParserConfig {
@ -66,7 +68,7 @@ export interface ParserConfig {
raptor?: Raptor;
tag_kb_ids?: string[];
topn_tags?: number;
graphrag?: { use_graphrag?: boolean };
graphrag?: { use_graphrag?: boolean; entity_types?: string[]; method?: string; resolution?: boolean; community?: boolean; strategy?: string; cron?: string };
}
export interface IKnowledgeFileParserConfig {

View file

@ -208,6 +208,15 @@ export default {
plainText: 'Einfach',
},
knowledgeConfiguration: {
useGraphRag: 'WissensgraphGenerierung',
useRaptor: 'RAPTOR zur Verbesserung des Abrufs verwenden',
raptorStrategy: 'RAPTORGenerierungsstrategie',
graphRagStrategy: 'GraphRAGGenerierungsstrategie',
strategyManual: 'Manuell',
strategyUpdateAfter: 'Nach Aktualisierung',
strategyTimed: 'Zeitgesteuert',
cronExpression: 'CronAusdruck',
cronPlaceholder: 'Bitte CronAusdruck eingeben',
titleDescription:
'Aktualisieren Sie hier Ihre Wissensdatenbank-Konfiguration, insbesondere die Chunk-Methode.',
name: 'Name der Wissensdatenbank',
@ -334,6 +343,7 @@ export default {
<i>Textzeilen, die nicht den obigen Regeln entsprechen, werden ignoriert.</i>
`,
useRaptor: 'RAPTOR zur Verbesserung des Abrufs verwenden',
useRaptorTip:
'RAPTOR für Multi-Hop-Frage-Antwort-Aufgaben aktivieren. Details unter https://ragflow.io/docs/dev/enable_raptor.',
prompt: 'Prompt',

View file

@ -302,8 +302,17 @@ export default {
<br/>
Do you want to continue?
`,
useGraphRag: 'Knowledge Graph Generation',
useRaptor: 'RAPTOR Generation',
raptorStrategy: 'RAPTOR Generation Strategy',
extractRaptor: 'Extract Raptor',
extractKnowledgeGraph: 'Extract Knowledge Graph',
graphRagStrategy: 'GraphRAG Generation Strategy',
strategyManual: 'Manual',
strategyUpdateAfter: 'Update after',
strategyTimed: 'Timed',
cronExpression: 'Cron expression',
cronPlaceholder: 'Please input cron expression',
filterPlaceholder: 'please input filter',
fileFilterTip: '',
fileFilter: 'File Filter',

View file

@ -158,6 +158,17 @@ export default {
html4excel: 'Excel a HTML',
html4excelTip: `Usar junto con el método de fragmentación General. Cuando está desactivado, los archivos de hoja de cálculo (XLSX, XLS (Excel 97-2003)) se analizan línea por línea como pares clave-valor. Cuando está activado, los archivos de hoja de cálculo se convierten en tablas HTML. Si la tabla original tiene más de 12 filas, el sistema la dividirá automáticamente en varias tablas HTML cada 12 filas. Para más información, consulte https://ragflow.io/docs/dev/enable_excel2html.`,
},
knowledgeConfiguration: {
useGraphRag: 'Generación de grafo de conocimiento',
useRaptor: 'Generación de RAPTOR',
raptorStrategy: 'Estrategia de generación de RAPTOR',
graphRagStrategy: 'Estrategia de generación de GraphRAG',
strategyManual: 'Manual',
strategyUpdateAfter: 'Tras actualización',
strategyTimed: 'Programado',
cronExpression: 'Expresión cron',
cronPlaceholder: 'Introduzca la expresión cron',
},
// Otros bloques de traducción
// Continua con la misma estructura

View file

@ -194,6 +194,15 @@ export default {
'Le modèle de réordonnancement est très consommateur de temps.',
},
knowledgeConfiguration: {
useGraphRag: 'Génération du graphe de connaissances',
useRaptor: 'Utiliser RAPTOR pour améliorer la récupération',
raptorStrategy: 'Stratégie de génération RAPTOR',
graphRagStrategy: 'Stratégie de génération GraphRAG',
strategyManual: 'Manuel',
strategyUpdateAfter: 'Après mise à jour',
strategyTimed: 'Planifié',
cronExpression: 'Expression cron',
cronPlaceholder: 'Veuillez saisir une expression cron',
titleDescription:
'Modifiez ici la configuration de votre base de connaissances, notamment la méthode de découpage.',
name: 'Nom de la base de connaissances',
@ -234,6 +243,7 @@ export default {
'Affichera une explication visuelle des catégories de base de connaissances',
// Les contenus HTML comme "book", "laws", etc. sont laissés en létat pour ne pas altérer leur structure technique.
useRaptor: 'Utiliser RAPTOR pour améliorer la récupération',
useRaptorTip:
'Activez RAPTOR pour les questions nécessitant plusieurs étapes. Voir https: //ragflow.io/docs/dev/enable_raptor pour plus dinformations.',
prompt: 'Prompt',

View file

@ -163,6 +163,13 @@ export default {
html4excelTip: `Gunakan bersama dengan metode pemotongan General. Ketika dinonaktifkan, file spreadsheet (XLSX, XLS (Excel 97-2003)) akan dianalisis baris demi baris menjadi pasangan kunci-nilai. Ketika diaktifkan, file spreadsheet akan dianalisis menjadi tabel HTML. Jika tabel asli memiliki lebih dari 12 baris, sistem akan secara otomatis membagi menjadi beberapa tabel HTML setiap 12 baris. Untuk informasi lebih lanjut, lihat https://ragflow.io/docs/dev/enable_excel2html.`,
},
knowledgeConfiguration: {
useGraphRag: 'Generasi grafik pengetahuan',
graphRagStrategy: 'Strategi generasi GraphRAG',
strategyManual: 'Manual',
strategyUpdateAfter: 'Setelah pembaruan',
strategyTimed: 'Terjadwal',
cronExpression: 'Ekspresi cron',
cronPlaceholder: 'Masukkan ekspresi cron',
titleDescription:
'Perbarui detail basis pengetahuan Anda terutama metode parsing di sini.',
name: 'Nama basis pengetahuan',
@ -290,6 +297,7 @@ export default {
Perhatikan jenis entitas yang perlu Anda tentukan.</p>`,
useRaptor: 'Gunakan RAPTOR untuk meningkatkan pengambilan',
useRaptorTip:
'Aktifkan RAPTOR untuk tugas tanya jawab multi-langkah. Lihat https://ragflow.io/docs/dev/enable_raptor untuk informasi lebih lanjut.',
prompt: 'Prompt',

View file

@ -171,6 +171,13 @@ export default {
autoQuestionsTip: `ランキングスコアを向上させるために、「システムモデル設定」で定義されたチャットモデルを使用して、ナレッジベースのチャンクごとにN個の質問を抽出します。 これにより、追加のトークンが消費されることに注意してください。 結果はチャンクリストで表示および編集できます。 質問抽出エラーはチャンク処理をブロックしません。空の結果が元のチャンクに追加されます。詳細は https://ragflow.io/docs/dev/autokeyword_autoquestion をご覧ください。`,
},
knowledgeConfiguration: {
useGraphRag: 'ナレッジグラフ生成',
graphRagStrategy: 'GraphRAG 生成戦略',
strategyManual: '手動',
strategyUpdateAfter: '更新後',
strategyTimed: '定期',
cronExpression: 'Cron 式',
cronPlaceholder: 'Cron 式を入力してください',
titleDescription:
'ナレッジベースの設定、特にチャンク方法をここで更新してください。',
name: 'ナレッジベース名',
@ -287,6 +294,7 @@ export default {
<p>LLMに入力され</p>
<p><b></b></p>`,
useRaptor: 'RAPTORを使用して検索を強化',
raptorStrategy: 'RAPTOR 生成戦略',
useRaptorTip:
'マルチホップ質問応答タスクでRAPTORを有効にしてください。詳細は https://ragflow.io/docs/dev/enable_raptor をご覧ください。',
prompt: 'プロンプト',

View file

@ -201,6 +201,13 @@ export default {
metaData: 'Metadados',
},
knowledgeConfiguration: {
useGraphRag: 'Geração de grafo de conhecimento',
graphRagStrategy: 'Estratégia de geração GraphRAG',
strategyManual: 'Manual',
strategyUpdateAfter: 'Após atualização',
strategyTimed: 'Agendado',
cronExpression: 'Expressão cron',
cronPlaceholder: 'Insira a expressão cron',
titleDescription:
'Atualize a configuração da sua base de conhecimento aqui, especialmente o método de fragmentação.',
name: 'Nome da base de conhecimento',
@ -262,6 +269,7 @@ export default {
<li>Se o arquivo estiver no formato <b>CSV/TXT</b>, ele deve estar codificado em UTF-8 e usar TAB como delimitador para separar perguntas e respostas.</li>
<p><i>Linhas de texto que não seguirem essas regras serão ignoradas, e cada par de Pergunta & Resposta será tratado como um fragmento distinto.</i></p>`,
useRaptor: 'Usar RAPTOR para melhorar a recuperação',
raptorStrategy: 'Estratégia de geração RAPTOR',
useRaptorTip:
'Ative o RAPTOR para tarefas de perguntas e respostas multi-hop. Veja https://ragflow.io/docs/dev/enable_raptor para mais detalhes.',
prompt: 'Prompt',

View file

@ -277,6 +277,15 @@ export default {
reRankModelWaring: 'Re-rank модель очень требовательна ко времени.',
},
knowledgeConfiguration: {
useGraphRag: 'Генерация графа знаний',
useRaptor: 'Генерация RAPTOR',
raptorStrategy: 'Стратегия генерации RAPTOR',
graphRagStrategy: 'Стратегия генерации GraphRAG',
strategyManual: 'Вручную',
strategyUpdateAfter: 'После обновления',
strategyTimed: 'По расписанию',
cronExpression: 'Cron-выражение',
cronPlaceholder: 'Введите cron-выражение',
generationScopeTip:
'Определяет, генерируется ли RAPTOR для всего набора данных или для одного файла.',
scopeDataset: 'Датасет',

2139
web/src/locales/ru.ts.orig Normal file

File diff suppressed because it is too large Load diff

View file

@ -182,6 +182,13 @@ export default {
documentMetaTips: `<p>Dữ liệu meta ở định dạng Json (không thể tìm kiếm). Nó sẽ được thêm vào prompt cho LLM nếu bất kỳ đoạn nào của tài liệu này được đưa vào prompt.</p> <p>Ví dụ:</p> <b>Dữ liệu meta là:</b><br> <code> { "Author": "Alex Dowson", "Date": "2024-11-12" } </code><br><b>Prompt sẽ là:</b><br> <p>Tài liệu: the_name_of_document</p> <p>Tác giả: Alex Dowson</p> <p>Ngày: 2024-11-12</p> <p>Các đoạn liên quan như sau:</p> <ul> <li> Đây là nội dung của đoạn...</li> <li> Đây là nội dung của đoạn...</li> </ul>`,
},
knowledgeConfiguration: {
useGraphRag: 'Tạo đồ thị tri thức',
graphRagStrategy: 'Chiến lược tạo GraphRAG',
strategyManual: 'Thủ công',
strategyUpdateAfter: 'Sau khi cập nhật',
strategyTimed: 'Định kỳ',
cronExpression: 'Biểu thức cron',
cronPlaceholder: 'Nhập biểu thức cron',
titleDescription:
'Cập nhật cấu hình cơ sở kiến thức của bạn tại đây, đặc biệt là phương thức phân khối.',
name: 'Tên cơ sở kiến thức',
@ -295,6 +302,7 @@ export default {
<p>Các khối sau đó đưc đưa vào LLM đ trích xuất các thực thể mối quan hệ cho biểu đ tri thức đ duy.</p>
<p>Đm bảo bạn đã đt <b>Loại thực thể</b>.</p>`,
useRaptor: 'Sử dụng RAPTOR để cải thiện truy xuất',
raptorStrategy: 'Chiến lược tạo RAPTOR',
useRaptorTip:
'Kích hoạt RAPTOR cho các tác vụ hỏi đáp đa bước. Xem chi tiết tại https://ragflow.io/docs/dev/enable_raptor.',
prompt: 'Nhắc nhở',

View file

@ -318,7 +318,8 @@ export default {
<p></p>
<i></i>
`,
useRaptor: '使用 RAPTOR 文件增強策略',
useRaptor: '召回增強RAPTOR生成資訊',
raptorStrategy: '召回增強RAPTOR生成策略',
useRaptorTip:
'啟用 RAPTOR 以用於多跳問答任務。詳情請參見https://ragflow.io/docs/dev/enable_raptor',
prompt: '提示詞',
@ -364,7 +365,13 @@ export default {
`,
tags: '標籤',
addTag: '增加標籤',
useGraphRag: '提取知識圖譜',
useGraphRag: '知識圖譜生成資訊',
graphRagStrategy: '知識圖譜生成策略',
strategyManual: '手動',
strategyUpdateAfter: '更新後',
strategyTimed: '定時',
cronExpression: '定時生成 cron 表達式',
cronPlaceholder: '請輸入 cron 表達式',
useGraphRagTip:
'基於知識庫內所有切好的文本塊構建知識圖譜,用以提升多跳和複雜問題回答的正確率。請注意:構建知識圖譜將消耗大量 token 和時間。詳見 https://ragflow.io/docs/dev/construct_knowledge_graph。',
graphRagMethod: '方法',

View file

@ -439,7 +439,8 @@ export default {
<p>使</p>
<i></i>
`,
useRaptor: '使用召回增强 RAPTOR 策略',
useRaptor: '召回增强RAPTOR生成信息',
raptorStrategy: '召回增强RAPTOR生成策略',
useRaptorTip:
'RAPTOR 常应用于复杂的多跳问答任务。如需打开,请跳转至知识库的文件页面,点击生成 > RAPTOR 开启。详见: https://ragflow.io/docs/dev/enable_raptor。',
prompt: '提示词',
@ -485,7 +486,13 @@ export default {
`,
tags: '标签',
addTag: '增加标签',
useGraphRag: '提取知识图谱',
useGraphRag: '知识图谱生成信息',
graphRagStrategy: '知识图谱生成策略',
strategyManual: '手动',
strategyUpdateAfter: '更新后',
strategyTimed: '定时',
cronExpression: '定时生成 cron 表达式',
cronPlaceholder: '请输入 cron 表达式',
useGraphRagTip:
'基于知识库内所有切好的文本块构建知识图谱,用以提升多跳和复杂问题回答的正确率。请注意:构建知识图谱将消耗大量 token 和时间。详见 https://ragflow.io/docs/dev/construct_knowledge_graph。',
graphRagMethod: '方法',

View file

@ -1,5 +1,15 @@
import { t } from 'i18next';
import { z } from 'zod';
import cron from 'cron-validate';
const isValidCron = (s?: string) => {
if (!s) return false;
const result = cron(s, {
preset: 'default',
override: { useSeconds: false, useYears: false },
});
return result.isValid();
};
export const formSchema = z
.object({
@ -38,6 +48,8 @@ export const formSchema = z
max_cluster: z.number().optional(),
random_seed: z.number().optional(),
scope: z.string().optional(),
strategy: z.string().optional(),
cron: z.string().optional(),
})
.refine(
(data) => {
@ -50,6 +62,18 @@ export const formSchema = z
message: 'Prompt is required',
path: ['prompt'],
},
)
.refine(
(data) => {
if (data.strategy === 'timed') {
return isValidCron(data.cron || '');
}
return true;
},
{
message: 'Invalid cron expression',
path: ['cron'],
},
),
graphrag: z
.object({
@ -58,6 +82,8 @@ export const formSchema = z
method: z.string().optional(),
resolution: z.boolean().optional(),
community: z.boolean().optional(),
strategy: z.string().optional(),
cron: z.string().optional(),
})
.refine(
(data) => {
@ -73,6 +99,18 @@ export const formSchema = z
message: 'Please enter Entity types',
path: ['entity_types'],
},
)
.refine(
(data) => {
if (data.strategy === 'timed') {
return isValidCron(data.cron || '');
}
return true;
},
{
message: 'Invalid cron expression',
path: ['cron'],
},
),
})
.optional(),

View file

@ -75,11 +75,15 @@ export default function DatasetSettings() {
random_seed: 0,
scope: 'file',
prompt: t('knowledgeConfiguration.promptText'),
strategy: 'manual',
cron: '',
},
graphrag: {
use_graphrag: true,
entity_types: initialEntityTypes,
method: MethodValue.Light,
strategy: 'manual',
cron: '',
},
},
pipeline_id: '',