Merge branch 'main' into feature/websocket-streaming-api

This commit is contained in:
aka James4u 2025-12-04 05:41:37 -08:00 committed by GitHub
commit 82d621c111
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
54 changed files with 1009 additions and 557006 deletions

View file

@ -10,7 +10,6 @@ WORKDIR /ragflow
# Copy models downloaded via download_deps.py
RUN mkdir -p /ragflow/rag/res/deepdoc /root/.ragflow
RUN --mount=type=bind,from=infiniflow/ragflow_deps:latest,source=/huggingface.co,target=/huggingface.co \
cp /huggingface.co/InfiniFlow/huqie/huqie.txt.trie /ragflow/rag/res/ && \
tar --exclude='.*' -cf - \
/huggingface.co/InfiniFlow/text_concat_xgb_v1.0 \
/huggingface.co/InfiniFlow/deepdoc \

View file

@ -91,9 +91,6 @@ class Graph:
def load(self):
self.components = self.dsl["components"]
cpn_nms = set([])
for k, cpn in self.components.items():
cpn_nms.add(cpn["obj"]["component_name"])
for k, cpn in self.components.items():
cpn_nms.add(cpn["obj"]["component_name"])
param = component_class(cpn["obj"]["component_name"] + "Param")()

View file

@ -18,7 +18,6 @@ import json
import logging
import os
import re
from concurrent.futures import ThreadPoolExecutor
from copy import deepcopy
from functools import partial
from typing import Any
@ -30,8 +29,8 @@ from api.db.services.llm_service import LLMBundle
from api.db.services.tenant_llm_service import TenantLLMService
from api.db.services.mcp_server_service import MCPServerService
from common.connection_utils import timeout
from rag.prompts.generator import next_step, COMPLETE_TASK, analyze_task, \
citation_prompt, reflect, rank_memories, kb_prompt, citation_plus, full_question, message_fit_in, structured_output_prompt
from rag.prompts.generator import next_step_async, COMPLETE_TASK, analyze_task_async, \
citation_prompt, reflect_async, kb_prompt, citation_plus, full_question, message_fit_in, structured_output_prompt
from common.mcp_tool_call_conn import MCPToolCallSession, mcp_tool_metadata_to_openai_tool
from agent.component.llm import LLMParam, LLM
@ -154,96 +153,19 @@ class Agent(LLM, ToolBase):
return None
def _force_format_to_schema(self, text: str, schema_prompt: str) -> str:
async def _force_format_to_schema_async(self, text: str, schema_prompt: str) -> str:
fmt_msgs = [
{"role": "system", "content": schema_prompt + "\nIMPORTANT: Output ONLY valid JSON. No markdown, no extra text."},
{"role": "user", "content": text},
]
_, fmt_msgs = message_fit_in(fmt_msgs, int(self.chat_mdl.max_length * 0.97))
return self._generate(fmt_msgs)
return await self._generate_async(fmt_msgs)
def _invoke(self, **kwargs):
return asyncio.run(self._invoke_async(**kwargs))
@timeout(int(os.environ.get("COMPONENT_EXEC_TIMEOUT", 20*60)))
def _invoke(self, **kwargs):
if self.check_if_canceled("Agent processing"):
return
if kwargs.get("user_prompt"):
usr_pmt = ""
if kwargs.get("reasoning"):
usr_pmt += "\nREASONING:\n{}\n".format(kwargs["reasoning"])
if kwargs.get("context"):
usr_pmt += "\nCONTEXT:\n{}\n".format(kwargs["context"])
if usr_pmt:
usr_pmt += "\nQUERY:\n{}\n".format(str(kwargs["user_prompt"]))
else:
usr_pmt = str(kwargs["user_prompt"])
self._param.prompts = [{"role": "user", "content": usr_pmt}]
if not self.tools:
if self.check_if_canceled("Agent processing"):
return
return LLM._invoke(self, **kwargs)
prompt, msg, user_defined_prompt = self._prepare_prompt_variables()
output_schema = self._get_output_schema()
schema_prompt = ""
if output_schema:
schema = json.dumps(output_schema, ensure_ascii=False, indent=2)
schema_prompt = structured_output_prompt(schema)
downstreams = self._canvas.get_component(self._id)["downstream"] if self._canvas.get_component(self._id) else []
ex = self.exception_handler()
if any([self._canvas.get_component_obj(cid).component_name.lower()=="message" for cid in downstreams]) and not (ex and ex["goto"]) and not output_schema:
self.set_output("content", partial(self.stream_output_with_tools, prompt, msg, user_defined_prompt))
return
_, msg = message_fit_in([{"role": "system", "content": prompt}, *msg], int(self.chat_mdl.max_length * 0.97))
use_tools = []
ans = ""
for delta_ans, tk in self._react_with_tools_streamly(prompt, msg, use_tools, user_defined_prompt,schema_prompt=schema_prompt):
if self.check_if_canceled("Agent processing"):
return
ans += delta_ans
if ans.find("**ERROR**") >= 0:
logging.error(f"Agent._chat got error. response: {ans}")
if self.get_exception_default_value():
self.set_output("content", self.get_exception_default_value())
else:
self.set_output("_ERROR", ans)
return
if output_schema:
error = ""
for _ in range(self._param.max_retries + 1):
try:
def clean_formated_answer(ans: str) -> str:
ans = re.sub(r"^.*</think>", "", ans, flags=re.DOTALL)
ans = re.sub(r"^.*```json", "", ans, flags=re.DOTALL)
return re.sub(r"```\n*$", "", ans, flags=re.DOTALL)
obj = json_repair.loads(clean_formated_answer(ans))
self.set_output("structured", obj)
if use_tools:
self.set_output("use_tools", use_tools)
return obj
except Exception:
error = "The answer cannot be parsed as JSON"
ans = self._force_format_to_schema(ans, schema_prompt)
if ans.find("**ERROR**") >= 0:
continue
self.set_output("_ERROR", error)
return
self.set_output("content", ans)
if use_tools:
self.set_output("use_tools", use_tools)
return ans
async def _invoke_async(self, **kwargs):
"""
Async entry: reuse existing logic but offload heavy sync parts via async wrappers to reduce blocking.
"""
if self.check_if_canceled("Agent processing"):
return
@ -262,7 +184,7 @@ class Agent(LLM, ToolBase):
if not self.tools:
if self.check_if_canceled("Agent processing"):
return
return await asyncio.to_thread(LLM._invoke, self, **kwargs)
return await LLM._invoke_async(self, **kwargs)
prompt, msg, user_defined_prompt = self._prepare_prompt_variables()
output_schema = self._get_output_schema()
@ -274,13 +196,13 @@ class Agent(LLM, ToolBase):
downstreams = self._canvas.get_component(self._id)["downstream"] if self._canvas.get_component(self._id) else []
ex = self.exception_handler()
if any([self._canvas.get_component_obj(cid).component_name.lower()=="message" for cid in downstreams]) and not (ex and ex["goto"]) and not output_schema:
self.set_output("content", partial(self.stream_output_with_tools_async, prompt, msg, user_defined_prompt))
self.set_output("content", partial(self.stream_output_with_tools_async, prompt, deepcopy(msg), user_defined_prompt))
return
_, msg = message_fit_in([{"role": "system", "content": prompt}, *msg], int(self.chat_mdl.max_length * 0.97))
use_tools = []
ans = ""
async for delta_ans, tk in self._react_with_tools_streamly_async(prompt, msg, use_tools, user_defined_prompt, schema_prompt=schema_prompt):
async for delta_ans, _tk in self._react_with_tools_streamly_async(prompt, msg, use_tools, user_defined_prompt,schema_prompt=schema_prompt):
if self.check_if_canceled("Agent processing"):
return
ans += delta_ans
@ -308,7 +230,7 @@ class Agent(LLM, ToolBase):
return obj
except Exception:
error = "The answer cannot be parsed as JSON"
ans = self._force_format_to_schema(ans, schema_prompt)
ans = await self._force_format_to_schema_async(ans, schema_prompt)
if ans.find("**ERROR**") >= 0:
continue
@ -320,28 +242,6 @@ class Agent(LLM, ToolBase):
self.set_output("use_tools", use_tools)
return ans
def stream_output_with_tools(self, prompt, msg, user_defined_prompt={}):
_, msg = message_fit_in([{"role": "system", "content": prompt}, *msg], int(self.chat_mdl.max_length * 0.97))
answer_without_toolcall = ""
use_tools = []
for delta_ans,_ in self._react_with_tools_streamly(prompt, msg, use_tools, user_defined_prompt):
if self.check_if_canceled("Agent streaming"):
return
if delta_ans.find("**ERROR**") >= 0:
if self.get_exception_default_value():
self.set_output("content", self.get_exception_default_value())
yield self.get_exception_default_value()
else:
self.set_output("_ERROR", delta_ans)
return
answer_without_toolcall += delta_ans
yield delta_ans
self.set_output("content", answer_without_toolcall)
if use_tools:
self.set_output("use_tools", use_tools)
async def stream_output_with_tools_async(self, prompt, msg, user_defined_prompt={}):
_, msg = message_fit_in([{"role": "system", "content": prompt}, *msg], int(self.chat_mdl.max_length * 0.97))
answer_without_toolcall = ""
@ -365,64 +265,22 @@ class Agent(LLM, ToolBase):
self.set_output("use_tools", use_tools)
async def _react_with_tools_streamly_async(self, prompt, history: list[dict], use_tools, user_defined_prompt={}, schema_prompt: str = ""):
"""
Async wrapper that offloads synchronous flow to a thread, yielding results without blocking the event loop.
"""
loop = asyncio.get_running_loop()
queue: asyncio.Queue = asyncio.Queue()
def worker():
try:
for delta_ans, tk in self._react_with_tools_streamly(prompt, history, use_tools, user_defined_prompt, schema_prompt=schema_prompt):
asyncio.run_coroutine_threadsafe(queue.put((delta_ans, tk)), loop)
except Exception as e:
asyncio.run_coroutine_threadsafe(queue.put(e), loop)
finally:
asyncio.run_coroutine_threadsafe(queue.put(StopAsyncIteration), loop)
await asyncio.to_thread(worker)
while True:
item = await queue.get()
if item is StopAsyncIteration:
break
if isinstance(item, Exception):
raise item
yield item
def _gen_citations(self, text):
retrievals = self._canvas.get_reference()
retrievals = {"chunks": list(retrievals["chunks"].values()), "doc_aggs": list(retrievals["doc_aggs"].values())}
formated_refer = kb_prompt(retrievals, self.chat_mdl.max_length, True)
for delta_ans in self._generate_streamly([{"role": "system", "content": citation_plus("\n\n".join(formated_refer))},
{"role": "user", "content": text}
]):
yield delta_ans
def _react_with_tools_streamly(self, prompt, history: list[dict], use_tools, user_defined_prompt={}, schema_prompt: str = ""):
token_count = 0
tool_metas = self.tool_meta
hist = deepcopy(history)
last_calling = ""
if len(hist) > 3:
st = timer()
user_request = full_question(messages=history, chat_mdl=self.chat_mdl)
user_request = await asyncio.to_thread(full_question, messages=history, chat_mdl=self.chat_mdl)
self.callback("Multi-turn conversation optimization", {}, user_request, elapsed_time=timer()-st)
else:
user_request = history[-1]["content"]
def use_tool(name, args):
nonlocal hist, use_tools, token_count,last_calling,user_request
async def use_tool_async(name, args):
nonlocal hist, use_tools, last_calling
logging.info(f"{last_calling=} == {name=}")
# Summarize of function calling
#if all([
# isinstance(self.toolcall_session.get_tool_obj(name), Agent),
# last_calling,
# last_calling != name
#]):
# self.toolcall_session.get_tool_obj(name).add2system_prompt(f"The chat history with other agents are as following: \n" + self.get_useful_memory(user_request, str(args["user_prompt"]),user_defined_prompt))
last_calling = name
tool_response = self.toolcall_session.tool_call(name, args)
tool_response = await self.toolcall_session.tool_call_async(name, args)
use_tools.append({
"name": name,
"arguments": args,
@ -433,7 +291,7 @@ class Agent(LLM, ToolBase):
return name, tool_response
def complete():
async def complete():
nonlocal hist
need2cite = self._param.cite and self._canvas.get_reference()["chunks"] and self._id.find("-->") < 0
if schema_prompt:
@ -451,7 +309,7 @@ class Agent(LLM, ToolBase):
if len(hist) > 12:
_hist = [hist[0], hist[1], *hist[-10:]]
entire_txt = ""
for delta_ans in self._generate_streamly(_hist):
async for delta_ans in self._generate_streamly_async(_hist):
if not need2cite or cited:
yield delta_ans, 0
entire_txt += delta_ans
@ -460,7 +318,7 @@ class Agent(LLM, ToolBase):
st = timer()
txt = ""
for delta_ans in self._gen_citations(entire_txt):
async for delta_ans in self._gen_citations_async(entire_txt):
if self.check_if_canceled("Agent streaming"):
return
yield delta_ans, 0
@ -475,14 +333,14 @@ class Agent(LLM, ToolBase):
hist.append({"role": "user", "content": content})
st = timer()
task_desc = analyze_task(self.chat_mdl, prompt, user_request, tool_metas, user_defined_prompt)
task_desc = await analyze_task_async(self.chat_mdl, prompt, user_request, tool_metas, user_defined_prompt)
self.callback("analyze_task", {}, task_desc, elapsed_time=timer()-st)
for _ in range(self._param.max_rounds + 1):
if self.check_if_canceled("Agent streaming"):
return
response, tk = next_step(self.chat_mdl, hist, tool_metas, task_desc, user_defined_prompt)
response, tk = await next_step_async(self.chat_mdl, hist, tool_metas, task_desc, user_defined_prompt)
# self.callback("next_step", {}, str(response)[:256]+"...")
token_count += tk
token_count += tk or 0
hist.append({"role": "assistant", "content": response})
try:
functions = json_repair.loads(re.sub(r"```.*", "", response))
@ -491,23 +349,24 @@ class Agent(LLM, ToolBase):
for f in functions:
if not isinstance(f, dict):
raise TypeError(f"An object type should be returned, but `{f}`")
with ThreadPoolExecutor(max_workers=5) as executor:
thr = []
for func in functions:
name = func["name"]
args = func["arguments"]
if name == COMPLETE_TASK:
append_user_content(hist, f"Respond with a formal answer. FORGET(DO NOT mention) about `{COMPLETE_TASK}`. The language for the response MUST be as the same as the first user request.\n")
for txt, tkcnt in complete():
yield txt, tkcnt
return
thr.append(executor.submit(use_tool, name, args))
tool_tasks = []
for func in functions:
name = func["name"]
args = func["arguments"]
if name == COMPLETE_TASK:
append_user_content(hist, f"Respond with a formal answer. FORGET(DO NOT mention) about `{COMPLETE_TASK}`. The language for the response MUST be as the same as the first user request.\n")
async for txt, tkcnt in complete():
yield txt, tkcnt
return
st = timer()
reflection = reflect(self.chat_mdl, hist, [th.result() for th in thr], user_defined_prompt)
append_user_content(hist, reflection)
self.callback("reflection", {}, str(reflection), elapsed_time=timer()-st)
tool_tasks.append(asyncio.create_task(use_tool_async(name, args)))
results = await asyncio.gather(*tool_tasks) if tool_tasks else []
st = timer()
reflection = await reflect_async(self.chat_mdl, hist, results, user_defined_prompt)
append_user_content(hist, reflection)
self.callback("reflection", {}, str(reflection), elapsed_time=timer()-st)
except Exception as e:
logging.exception(msg=f"Wrong JSON argument format in LLM ReAct response: {e}")
@ -531,21 +390,17 @@ Respond immediately with your final comprehensive answer.
return
append_user_content(hist, final_instruction)
for txt, tkcnt in complete():
async for txt, tkcnt in complete():
yield txt, tkcnt
def get_useful_memory(self, goal: str, sub_goal:str, topn=3, user_defined_prompt:dict={}) -> str:
# self.callback("get_useful_memory", {"topn": 3}, "...")
mems = self._canvas.get_memory()
rank = rank_memories(self.chat_mdl, goal, sub_goal, [summ for (user, assist, summ) in mems], user_defined_prompt)
try:
rank = json_repair.loads(re.sub(r"```.*", "", rank))[:topn]
mems = [mems[r] for r in rank]
return "\n\n".join([f"User: {u}\nAgent: {a}" for u, a,_ in mems])
except Exception as e:
logging.exception(e)
return "Error occurred."
async def _gen_citations_async(self, text):
retrievals = self._canvas.get_reference()
retrievals = {"chunks": list(retrievals["chunks"].values()), "doc_aggs": list(retrievals["doc_aggs"].values())}
formated_refer = kb_prompt(retrievals, self.chat_mdl.max_length, True)
async for delta_ans in self._generate_streamly_async([{"role": "system", "content": citation_plus("\n\n".join(formated_refer))},
{"role": "user", "content": text}
]):
yield delta_ans
def reset(self, only_output=False):
"""

View file

@ -327,7 +327,7 @@ class LLM(ComponentBase):
self.set_output("content", answer)
@timeout(int(os.environ.get("COMPONENT_EXEC_TIMEOUT", 10*60)))
def _invoke(self, **kwargs):
async def _invoke_async(self, **kwargs):
if self.check_if_canceled("LLM processing"):
return
@ -338,22 +338,25 @@ class LLM(ComponentBase):
prompt, msg, _ = self._prepare_prompt_variables()
error: str = ""
output_structure=None
output_structure = None
try:
output_structure = self._param.outputs['structured']
output_structure = self._param.outputs["structured"]
except Exception:
pass
if output_structure and isinstance(output_structure, dict) and output_structure.get("properties") and len(output_structure["properties"]) > 0:
schema=json.dumps(output_structure, ensure_ascii=False, indent=2)
prompt += structured_output_prompt(schema)
for _ in range(self._param.max_retries+1):
schema = json.dumps(output_structure, ensure_ascii=False, indent=2)
prompt_with_schema = prompt + structured_output_prompt(schema)
for _ in range(self._param.max_retries + 1):
if self.check_if_canceled("LLM processing"):
return
_, msg = message_fit_in([{"role": "system", "content": prompt}, *msg], int(self.chat_mdl.max_length * 0.97))
_, msg_fit = message_fit_in(
[{"role": "system", "content": prompt_with_schema}, *deepcopy(msg)],
int(self.chat_mdl.max_length * 0.97),
)
error = ""
ans = self._generate(msg)
msg.pop(0)
ans = await self._generate_async(msg_fit)
msg_fit.pop(0)
if ans.find("**ERROR**") >= 0:
logging.error(f"LLM response error: {ans}")
error = ans
@ -362,7 +365,7 @@ class LLM(ComponentBase):
self.set_output("structured", json_repair.loads(clean_formated_answer(ans)))
return
except Exception:
msg.append({"role": "user", "content": "The answer can't not be parsed as JSON"})
msg_fit.append({"role": "user", "content": "The answer can't not be parsed as JSON"})
error = "The answer can't not be parsed as JSON"
if error:
self.set_output("_ERROR", error)
@ -370,18 +373,23 @@ class LLM(ComponentBase):
downstreams = self._canvas.get_component(self._id)["downstream"] if self._canvas.get_component(self._id) else []
ex = self.exception_handler()
if any([self._canvas.get_component_obj(cid).component_name.lower()=="message" for cid in downstreams]) and not (ex and ex["goto"]):
self.set_output("content", partial(self._stream_output_async, prompt, msg))
if any([self._canvas.get_component_obj(cid).component_name.lower() == "message" for cid in downstreams]) and not (
ex and ex["goto"]
):
self.set_output("content", partial(self._stream_output_async, prompt, deepcopy(msg)))
return
for _ in range(self._param.max_retries+1):
error = ""
for _ in range(self._param.max_retries + 1):
if self.check_if_canceled("LLM processing"):
return
_, msg = message_fit_in([{"role": "system", "content": prompt}, *msg], int(self.chat_mdl.max_length * 0.97))
_, msg_fit = message_fit_in(
[{"role": "system", "content": prompt}, *deepcopy(msg)], int(self.chat_mdl.max_length * 0.97)
)
error = ""
ans = self._generate(msg)
msg.pop(0)
ans = await self._generate_async(msg_fit)
msg_fit.pop(0)
if ans.find("**ERROR**") >= 0:
logging.error(f"LLM response error: {ans}")
error = ans
@ -395,23 +403,9 @@ class LLM(ComponentBase):
else:
self.set_output("_ERROR", error)
def _stream_output(self, prompt, msg):
_, msg = message_fit_in([{"role": "system", "content": prompt}, *msg], int(self.chat_mdl.max_length * 0.97))
answer = ""
for ans in self._generate_streamly(msg):
if self.check_if_canceled("LLM streaming"):
return
if ans.find("**ERROR**") >= 0:
if self.get_exception_default_value():
self.set_output("content", self.get_exception_default_value())
yield self.get_exception_default_value()
else:
self.set_output("_ERROR", ans)
return
yield ans
answer += ans
self.set_output("content", answer)
@timeout(int(os.environ.get("COMPONENT_EXEC_TIMEOUT", 10*60)))
def _invoke(self, **kwargs):
return asyncio.run(self._invoke_async(**kwargs))
def add_memory(self, user:str, assist:str, func_name: str, params: dict, results: str, user_defined_prompt:dict={}):
summ = tool_call_summary(self.chat_mdl, func_name, params, results, user_defined_prompt)

View file

@ -49,16 +49,19 @@ class LLMToolPluginCallSession(ToolCallSession):
self.callback = callback
def tool_call(self, name: str, arguments: dict[str, Any]) -> Any:
return asyncio.run(self.tool_call_async(name, arguments))
async def tool_call_async(self, name: str, arguments: dict[str, Any]) -> Any:
assert name in self.tools_map, f"LLM tool {name} does not exist"
st = timer()
tool_obj = self.tools_map[name]
if isinstance(tool_obj, MCPToolCallSession):
resp = tool_obj.tool_call(name, arguments, 60)
resp = await asyncio.to_thread(tool_obj.tool_call, name, arguments, 60)
else:
if hasattr(tool_obj, "invoke_async") and asyncio.iscoroutinefunction(tool_obj.invoke_async):
resp = asyncio.run(tool_obj.invoke_async(**arguments))
resp = await tool_obj.invoke_async(**arguments)
else:
resp = asyncio.run(asyncio.to_thread(tool_obj.invoke, **arguments))
resp = await asyncio.to_thread(tool_obj.invoke, **arguments)
self.callback(name, arguments, resp, elapsed_time=timer()-st)
return resp

View file

@ -33,7 +33,7 @@ from api.db.services.file_service import FileService
from api.db.services.knowledgebase_service import KnowledgebaseService
from api.db.services.llm_service import LLMBundle
from api.db.services.tenant_llm_service import TenantLLMService
from api.db.services.task_service import TaskService, queue_tasks
from api.db.services.task_service import TaskService, queue_tasks, cancel_all_task_of
from api.db.services.dialog_service import meta_filter, convert_conditions
from api.utils.api_utils import check_duplicate_ids, construct_json_result, get_error_data_result, get_parser_config, get_result, server_error_response, token_required, \
get_request_json
@ -321,9 +321,7 @@ async def update_doc(tenant_id, dataset_id, document_id):
try:
if not DocumentService.update_by_id(doc.id, {"status": str(status)}):
return get_error_data_result(message="Database error (Document update)!")
settings.docStoreConn.update({"doc_id": doc.id}, {"available_int": status}, search.index_name(kb.tenant_id), doc.kb_id)
return get_result(data=True)
except Exception as e:
return server_error_response(e)
@ -350,12 +348,10 @@ async def update_doc(tenant_id, dataset_id, document_id):
}
renamed_doc = {}
for key, value in doc.to_dict().items():
if key == "run":
renamed_doc["run"] = run_mapping.get(str(value))
new_key = key_mapping.get(key, key)
renamed_doc[new_key] = value
if key == "run":
renamed_doc["run"] = run_mapping.get(value)
renamed_doc["run"] = run_mapping.get(str(value))
return get_result(data=renamed_doc)
@ -839,6 +835,8 @@ async def stop_parsing(tenant_id, dataset_id):
return get_error_data_result(message=f"You don't own the document {id}.")
if int(doc[0].progress) == 1 or doc[0].progress == 0:
return get_error_data_result("Can't stop parsing document with progress at 0 or 1")
# Send cancellation signal via Redis to stop background task
cancel_all_task_of(id)
info = {"run": "2", "progress": 0, "chunk_num": 0}
DocumentService.update_by_id(id, info)
settings.docStoreConn.delete({"doc_id": doc[0].id}, search.index_name(tenant_id), dataset_id)

View file

@ -148,6 +148,7 @@ class Storage(Enum):
AWS_S3 = 4
OSS = 5
OPENDAL = 6
GCS = 7
# environment
# ENV_STRONG_TEST_COUNT = "STRONG_TEST_COUNT"

View file

@ -31,6 +31,7 @@ import rag.utils.ob_conn
import rag.utils.opensearch_conn
from rag.utils.azure_sas_conn import RAGFlowAzureSasBlob
from rag.utils.azure_spn_conn import RAGFlowAzureSpnBlob
from rag.utils.gcs_conn import RAGFlowGCS
from rag.utils.minio_conn import RAGFlowMinio
from rag.utils.opendal_conn import OpenDALStorage
from rag.utils.s3_conn import RAGFlowS3
@ -109,6 +110,7 @@ MINIO = {}
OB = {}
OSS = {}
OS = {}
GCS = {}
DOC_MAXIMUM_SIZE: int = 128 * 1024 * 1024
DOC_BULK_SIZE: int = 4
@ -151,7 +153,8 @@ class StorageFactory:
Storage.AZURE_SAS: RAGFlowAzureSasBlob,
Storage.AWS_S3: RAGFlowS3,
Storage.OSS: RAGFlowOSS,
Storage.OPENDAL: OpenDALStorage
Storage.OPENDAL: OpenDALStorage,
Storage.GCS: RAGFlowGCS,
}
@classmethod
@ -250,7 +253,7 @@ def init_settings():
else:
raise Exception(f"Not supported doc engine: {DOC_ENGINE}")
global AZURE, S3, MINIO, OSS
global AZURE, S3, MINIO, OSS, GCS
if STORAGE_IMPL_TYPE in ['AZURE_SPN', 'AZURE_SAS']:
AZURE = get_base_config("azure", {})
elif STORAGE_IMPL_TYPE == 'AWS_S3':
@ -259,6 +262,8 @@ def init_settings():
MINIO = decrypt_database_config(name="minio")
elif STORAGE_IMPL_TYPE == 'OSS':
OSS = get_base_config("oss", {})
elif STORAGE_IMPL_TYPE == 'GCS':
GCS = get_base_config("gcs", {})
global STORAGE_IMPL
STORAGE_IMPL = StorageFactory.create(Storage[STORAGE_IMPL_TYPE])

View file

@ -60,6 +60,8 @@ user_default_llm:
# access_key: 'access_key'
# secret_key: 'secret_key'
# region: 'region'
#gcs:
# bucket: 'bridgtl-edm-d-bucket-ragflow'
# oss:
# access_key: 'access_key'
# secret_key: 'secret_key'

View file

@ -25,6 +25,8 @@ from rag.prompts.generator import vision_llm_figure_describe_prompt
def vision_figure_parser_figure_data_wrapper(figures_data_without_positions):
if not figures_data_without_positions:
return []
return [
(
(figure_data[1], [figure_data[0]]),
@ -35,7 +37,9 @@ def vision_figure_parser_figure_data_wrapper(figures_data_without_positions):
]
def vision_figure_parser_docx_wrapper(sections,tbls,callback=None,**kwargs):
def vision_figure_parser_docx_wrapper(sections, tbls, callback=None,**kwargs):
if not tbls:
return []
try:
vision_model = LLMBundle(kwargs["tenant_id"], LLMType.IMAGE2TEXT)
callback(0.7, "Visual model detected. Attempting to enhance figure extraction...")
@ -53,6 +57,8 @@ def vision_figure_parser_docx_wrapper(sections,tbls,callback=None,**kwargs):
def vision_figure_parser_pdf_wrapper(tbls, callback=None, **kwargs):
if not tbls:
return []
try:
vision_model = LLMBundle(kwargs["tenant_id"], LLMType.IMAGE2TEXT)
callback(0.7, "Visual model detected. Attempting to enhance figure extraction...")

View file

@ -23,7 +23,7 @@ services:
env_file: .env
networks:
- ragflow
restart: on-failure
restart: unless-stopped
# https://docs.docker.com/engine/daemon/prometheus/#create-a-prometheus-configuration
# If you're using Docker Desktop, the --add-host flag is optional. This flag makes sure that the host's internal IP gets exposed to the Prometheus container.
extra_hosts:
@ -48,7 +48,7 @@ services:
env_file: .env
networks:
- ragflow
restart: on-failure
restart: unless-stopped
# https://docs.docker.com/engine/daemon/prometheus/#create-a-prometheus-configuration
# If you're using Docker Desktop, the --add-host flag is optional. This flag makes sure that the host's internal IP gets exposed to the Prometheus container.
extra_hosts:

View file

@ -31,7 +31,7 @@ services:
retries: 120
networks:
- ragflow
restart: on-failure
restart: unless-stopped
opensearch01:
profiles:
@ -67,12 +67,12 @@ services:
retries: 120
networks:
- ragflow
restart: on-failure
restart: unless-stopped
infinity:
profiles:
- infinity
image: infiniflow/infinity:v0.6.8
image: infiniflow/infinity:v0.6.10
volumes:
- infinity_data:/var/infinity
- ./infinity_conf.toml:/infinity_conf.toml
@ -94,7 +94,7 @@ services:
interval: 10s
timeout: 10s
retries: 120
restart: on-failure
restart: unless-stopped
oceanbase:
profiles:
@ -119,7 +119,7 @@ services:
timeout: 10s
networks:
- ragflow
restart: on-failure
restart: unless-stopped
sandbox-executor-manager:
profiles:
@ -147,7 +147,7 @@ services:
interval: 10s
timeout: 10s
retries: 120
restart: on-failure
restart: unless-stopped
mysql:
# mysql:5.7 linux/arm64 image is unavailable.
@ -175,7 +175,7 @@ services:
interval: 10s
timeout: 10s
retries: 120
restart: on-failure
restart: unless-stopped
minio:
image: quay.io/minio/minio:RELEASE.2025-06-13T11-33-47Z
@ -191,7 +191,7 @@ services:
- minio_data:/data
networks:
- ragflow
restart: on-failure
restart: unless-stopped
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:9000/minio/health/live"]
interval: 10s
@ -209,7 +209,7 @@ services:
- redis_data:/data
networks:
- ragflow
restart: on-failure
restart: unless-stopped
healthcheck:
test: ["CMD", "redis-cli", "-a", "${REDIS_PASSWORD}", "ping"]
interval: 10s
@ -228,7 +228,7 @@ services:
networks:
- ragflow
command: ["--model-id", "/data/${TEI_MODEL}", "--auto-truncate"]
restart: on-failure
restart: unless-stopped
tei-gpu:
@ -249,7 +249,7 @@ services:
- driver: nvidia
count: all
capabilities: [gpu]
restart: on-failure
restart: unless-stopped
kibana:
@ -271,7 +271,7 @@ services:
retries: 120
networks:
- ragflow
restart: on-failure
restart: unless-stopped
volumes:

View file

@ -22,7 +22,7 @@ services:
env_file: .env
networks:
- ragflow
restart: on-failure
restart: unless-stopped
# https://docs.docker.com/engine/daemon/prometheus/#create-a-prometheus-configuration
# If you're using Docker Desktop, the --add-host flag is optional. This flag makes sure that the host's internal IP gets exposed to the Prometheus container.
extra_hosts:
@ -39,7 +39,7 @@ services:
# entrypoint: "/ragflow/entrypoint_task_executor.sh 1 3"
# networks:
# - ragflow
# restart: on-failure
# restart: unless-stopped
# # https://docs.docker.com/engine/daemon/prometheus/#create-a-prometheus-configuration
# # If you're using Docker Desktop, the --add-host flag is optional. This flag makes sure that the host's internal IP gets exposed to the Prometheus container.
# extra_hosts:

View file

@ -45,7 +45,7 @@ services:
env_file: .env
networks:
- ragflow
restart: on-failure
restart: unless-stopped
# https://docs.docker.com/engine/daemon/prometheus/#create-a-prometheus-configuration
# If you use Docker Desktop, the --add-host flag is optional. This flag ensures that the host's internal IP is exposed to the Prometheus container.
extra_hosts:
@ -94,7 +94,7 @@ services:
env_file: .env
networks:
- ragflow
restart: on-failure
restart: unless-stopped
# https://docs.docker.com/engine/daemon/prometheus/#create-a-prometheus-configuration
# If you use Docker Desktop, the --add-host flag is optional. This flag ensures that the host's internal IP is exposed to the Prometheus container.
extra_hosts:
@ -120,7 +120,7 @@ services:
# entrypoint: "/ragflow/entrypoint_task_executor.sh 1 3"
# networks:
# - ragflow
# restart: on-failure
# restart: unless-stopped
# # https://docs.docker.com/engine/daemon/prometheus/#create-a-prometheus-configuration
# # If you're using Docker Desktop, the --add-host flag is optional. This flag makes sure that the host's internal IP gets exposed to the Prometheus container.
# extra_hosts:

View file

@ -1,5 +1,5 @@
[general]
version = "0.6.8"
version = "0.6.10"
time_zone = "utc-8"
[network]

View file

@ -512,13 +512,16 @@ curl --request POST \
- Maximum: `2048`
- `"delimiter"`: `string`
- Defaults to `"\n"`.
- `"html4excel"`: `bool` Indicates whether to convert Excel documents into HTML format.
- `"html4excel"`: `bool`
- Whether to convert Excel documents into HTML format.
- Defaults to `false`
- `"layout_recognize"`: `string`
- Defaults to `DeepDOC`
- `"tag_kb_ids"`: `array<string>` refer to [Use tag set](https://ragflow.io/docs/dev/use_tag_sets)
- Must include a list of dataset IDs, where each dataset is parsed using the Tag Chunking Method
- `"task_page_size"`: `int` For PDF only.
- `"tag_kb_ids"`: `array<string>`
- IDs of datasets to be parsed using the Tag chunk method.
- Before setting this, ensure a tag set is created and properly configured. For details, see [Use tag set](https://ragflow.io/docs/dev/use_tag_sets).
- `"task_page_size"`: `int`
- For PDFs only.
- Defaults to `12`
- Minimum: `1`
- `"raptor"`: `object` RAPTOR-specific settings.

View file

@ -43,7 +43,6 @@ def get_urls(use_china_mirrors=False) -> list[Union[str, list[str]]]:
repos = [
"InfiniFlow/text_concat_xgb_v1.0",
"InfiniFlow/deepdoc",
"InfiniFlow/huqie",
]

View file

@ -96,7 +96,7 @@ ragflow:
infinity:
image:
repository: infiniflow/infinity
tag: v0.6.8
tag: v0.6.10
pullPolicy: IfNotPresent
pullSecrets: []
storage:

View file

@ -49,7 +49,7 @@ dependencies = [
"html-text==0.6.2",
"httpx[socks]>=0.28.1,<0.29.0",
"huggingface-hub>=0.25.0,<0.26.0",
"infinity-sdk==0.6.8",
"infinity-sdk==0.6.10",
"infinity-emb>=0.0.66,<0.0.67",
"itsdangerous==2.1.2",
"json-repair==0.35.0",

View file

@ -86,9 +86,11 @@ class Pdf(PdfParser):
# (A) Add text
for b in self.boxes:
if not (from_page < b["page_number"] <= to_page + from_page):
# b["page_number"] is relative page numbermust + from_page
global_page_num = b["page_number"] + from_page
if not (from_page < global_page_num <= to_page + from_page):
continue
page_items[b["page_number"]].append({
page_items[global_page_num].append({
"top": b["top"],
"x0": b["x0"],
"text": b["text"],
@ -100,7 +102,6 @@ class Pdf(PdfParser):
if not positions:
continue
# Handle content type (list vs str)
if isinstance(content, list):
final_text = "\n".join(content)
elif isinstance(content, str):
@ -109,10 +110,11 @@ class Pdf(PdfParser):
final_text = str(content)
try:
# Parse positions
pn_index = positions[0][0]
if isinstance(pn_index, list):
pn_index = pn_index[0]
# pn_index in tbls is absolute page number
current_page_num = int(pn_index) + 1
except Exception as e:
print(f"Error parsing position: {e}")

View file

@ -343,7 +343,8 @@ def form_history(history, limit=-6):
return context
def analyze_task(chat_mdl, prompt, task_name, tools_description: list[dict], user_defined_prompts: dict={}):
async def analyze_task_async(chat_mdl, prompt, task_name, tools_description: list[dict], user_defined_prompts: dict={}):
tools_desc = tool_schema(tools_description)
context = ""
@ -352,7 +353,7 @@ def analyze_task(chat_mdl, prompt, task_name, tools_description: list[dict], use
else:
template = PROMPT_JINJA_ENV.from_string(ANALYZE_TASK_SYSTEM + "\n\n" + ANALYZE_TASK_USER)
context = template.render(task=task_name, context=context, agent_prompt=prompt, tools_desc=tools_desc)
kwd = chat_mdl.chat(context, [{"role": "user", "content": "Please analyze it."}])
kwd = await _chat_async(chat_mdl, context, [{"role": "user", "content": "Please analyze it."}])
if isinstance(kwd, tuple):
kwd = kwd[0]
kwd = re.sub(r"^.*</think>", "", kwd, flags=re.DOTALL)
@ -361,13 +362,17 @@ def analyze_task(chat_mdl, prompt, task_name, tools_description: list[dict], use
return kwd
async def analyze_task_async(chat_mdl, prompt, task_name, tools_description: list[dict], user_defined_prompts: dict={}):
return await asyncio.to_thread(analyze_task, chat_mdl, prompt, task_name, tools_description, user_defined_prompts)
async def _chat_async(chat_mdl, system: str, history: list, **kwargs):
chat_async = getattr(chat_mdl, "async_chat", None)
if chat_async and asyncio.iscoroutinefunction(chat_async):
return await chat_async(system, history, **kwargs)
return await asyncio.to_thread(chat_mdl.chat, system, history, **kwargs)
def next_step(chat_mdl, history:list, tools_description: list[dict], task_desc, user_defined_prompts: dict={}):
async def next_step_async(chat_mdl, history:list, tools_description: list[dict], task_desc, user_defined_prompts: dict={}):
if not tools_description:
return ""
return "", 0
desc = tool_schema(tools_description)
template = PROMPT_JINJA_ENV.from_string(user_defined_prompts.get("plan_generation", NEXT_STEP))
user_prompt = "\nWhat's the next tool to call? If ready OR IMPOSSIBLE TO BE READY, then call `complete_task`."
@ -376,18 +381,18 @@ def next_step(chat_mdl, history:list, tools_description: list[dict], task_desc,
hist[-1]["content"] += user_prompt
else:
hist.append({"role": "user", "content": user_prompt})
json_str = chat_mdl.chat(template.render(task_analysis=task_desc, desc=desc, today=datetime.datetime.now().strftime("%Y-%m-%d")),
hist[1:], stop=["<|stop|>"])
json_str = await _chat_async(
chat_mdl,
template.render(task_analysis=task_desc, desc=desc, today=datetime.datetime.now().strftime("%Y-%m-%d")),
hist[1:],
stop=["<|stop|>"],
)
tk_cnt = num_tokens_from_string(json_str)
json_str = re.sub(r"^.*</think>", "", json_str, flags=re.DOTALL)
return json_str, tk_cnt
async def next_step_async(chat_mdl, history:list, tools_description: list[dict], task_desc, user_defined_prompts: dict={}):
return await asyncio.to_thread(next_step, chat_mdl, history, tools_description, task_desc, user_defined_prompts)
def reflect(chat_mdl, history: list[dict], tool_call_res: list[Tuple], user_defined_prompts: dict={}):
async def reflect_async(chat_mdl, history: list[dict], tool_call_res: list[Tuple], user_defined_prompts: dict={}):
tool_calls = [{"name": p[0], "result": p[1]} for p in tool_call_res]
goal = history[1]["content"]
template = PROMPT_JINJA_ENV.from_string(user_defined_prompts.get("reflection", REFLECT))
@ -398,7 +403,7 @@ def reflect(chat_mdl, history: list[dict], tool_call_res: list[Tuple], user_defi
else:
hist.append({"role": "user", "content": user_prompt})
_, msg = message_fit_in(hist, chat_mdl.max_length)
ans = chat_mdl.chat(msg[0]["content"], msg[1:])
ans = await _chat_async(chat_mdl, msg[0]["content"], msg[1:])
ans = re.sub(r"^.*</think>", "", ans, flags=re.DOTALL)
return """
**Observation**
@ -429,23 +434,15 @@ def tool_call_summary(chat_mdl, name: str, params: dict, result: str, user_defin
return re.sub(r"^.*</think>", "", ans, flags=re.DOTALL)
def rank_memories(chat_mdl, goal:str, sub_goal:str, tool_call_summaries: list[str], user_defined_prompts: dict={}):
async def rank_memories_async(chat_mdl, goal:str, sub_goal:str, tool_call_summaries: list[str], user_defined_prompts: dict={}):
template = PROMPT_JINJA_ENV.from_string(RANK_MEMORY)
system_prompt = template.render(goal=goal, sub_goal=sub_goal, results=[{"i": i, "content": s} for i,s in enumerate(tool_call_summaries)])
user_prompt = " → rank: "
_, msg = message_fit_in(form_message(system_prompt, user_prompt), chat_mdl.max_length)
ans = chat_mdl.chat(msg[0]["content"], msg[1:], stop="<|stop|>")
ans = await _chat_async(chat_mdl, msg[0]["content"], msg[1:], stop="<|stop|>")
return re.sub(r"^.*</think>", "", ans, flags=re.DOTALL)
async def reflect_async(chat_mdl, history: list[dict], tool_call_res: list[Tuple], user_defined_prompts: dict={}):
return await asyncio.to_thread(reflect, chat_mdl, history, tool_call_res, user_defined_prompts)
async def rank_memories_async(chat_mdl, goal:str, sub_goal:str, tool_call_summaries: list[str], user_defined_prompts: dict={}):
return await asyncio.to_thread(rank_memories, chat_mdl, goal, sub_goal, tool_call_summaries, user_defined_prompts)
def gen_meta_filter(chat_mdl, meta_data:dict, query: str) -> dict:
meta_data_structure = {}
for key, values in meta_data.items():

File diff suppressed because it is too large Load diff

View file

@ -157,11 +157,30 @@ class Confluence(SyncBase):
from common.data_source.config import DocumentSource
from common.data_source.interfaces import StaticCredentialsProvider
index_mode = (self.conf.get("index_mode") or "everything").lower()
if index_mode not in {"everything", "space", "page"}:
index_mode = "everything"
space = ""
page_id = ""
index_recursively = False
if index_mode == "space":
space = (self.conf.get("space") or "").strip()
if not space:
raise ValueError("Space Key is required when indexing a specific Confluence space.")
elif index_mode == "page":
page_id = (self.conf.get("page_id") or "").strip()
if not page_id:
raise ValueError("Page ID is required when indexing a specific Confluence page.")
index_recursively = bool(self.conf.get("index_recursively", False))
self.connector = ConfluenceConnector(
wiki_base=self.conf["wiki_base"],
space=self.conf.get("space", ""),
is_cloud=self.conf.get("is_cloud", True),
# page_id=self.conf.get("page_id", ""),
space=space,
page_id=page_id,
index_recursively=index_recursively,
)
credentials_provider = StaticCredentialsProvider(tenant_id=task["tenant_id"], connector_name=DocumentSource.CONFLUENCE, credential_json=self.conf["credentials"])

207
rag/utils/gcs_conn.py Normal file
View file

@ -0,0 +1,207 @@
# Copyright 2025 The InfiniFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
import logging
import time
import datetime
from io import BytesIO
from google.cloud import storage
from google.api_core.exceptions import NotFound
from common.decorator import singleton
from common import settings
@singleton
class RAGFlowGCS:
def __init__(self):
self.client = None
self.bucket_name = None
self.__open__()
def __open__(self):
try:
if self.client:
self.client = None
except Exception:
pass
try:
self.client = storage.Client()
self.bucket_name = settings.GCS["bucket"]
except Exception:
logging.exception("Fail to connect to GCS")
def _get_blob_path(self, folder, filename):
"""Helper to construct the path: folder/filename"""
if not folder:
return filename
return f"{folder}/{filename}"
def health(self):
folder, fnm, binary = "ragflow-health", "health_check", b"_t@@@1"
try:
bucket_obj = self.client.bucket(self.bucket_name)
if not bucket_obj.exists():
logging.error(f"Health check failed: Main bucket '{self.bucket_name}' does not exist.")
return False
blob_path = self._get_blob_path(folder, fnm)
blob = bucket_obj.blob(blob_path)
blob.upload_from_file(BytesIO(binary), content_type='application/octet-stream')
return True
except Exception as e:
logging.exception(f"Health check failed: {e}")
return False
def put(self, bucket, fnm, binary, tenant_id=None):
# RENAMED PARAMETER: bucket_name -> bucket (to match interface)
for _ in range(3):
try:
bucket_obj = self.client.bucket(self.bucket_name)
blob_path = self._get_blob_path(bucket, fnm)
blob = bucket_obj.blob(blob_path)
blob.upload_from_file(BytesIO(binary), content_type='application/octet-stream')
return True
except NotFound:
logging.error(f"Fail to put: Main bucket {self.bucket_name} does not exist.")
return False
except Exception:
logging.exception(f"Fail to put {bucket}/{fnm}:")
self.__open__()
time.sleep(1)
return False
def rm(self, bucket, fnm, tenant_id=None):
# RENAMED PARAMETER: bucket_name -> bucket
try:
bucket_obj = self.client.bucket(self.bucket_name)
blob_path = self._get_blob_path(bucket, fnm)
blob = bucket_obj.blob(blob_path)
blob.delete()
except NotFound:
pass
except Exception:
logging.exception(f"Fail to remove {bucket}/{fnm}:")
def get(self, bucket, filename, tenant_id=None):
# RENAMED PARAMETER: bucket_name -> bucket
for _ in range(1):
try:
bucket_obj = self.client.bucket(self.bucket_name)
blob_path = self._get_blob_path(bucket, filename)
blob = bucket_obj.blob(blob_path)
return blob.download_as_bytes()
except NotFound:
logging.warning(f"File not found {bucket}/{filename} in {self.bucket_name}")
return None
except Exception:
logging.exception(f"Fail to get {bucket}/{filename}")
self.__open__()
time.sleep(1)
return None
def obj_exist(self, bucket, filename, tenant_id=None):
# RENAMED PARAMETER: bucket_name -> bucket
try:
bucket_obj = self.client.bucket(self.bucket_name)
blob_path = self._get_blob_path(bucket, filename)
blob = bucket_obj.blob(blob_path)
return blob.exists()
except Exception:
logging.exception(f"obj_exist {bucket}/{filename} got exception")
return False
def bucket_exists(self, bucket):
# RENAMED PARAMETER: bucket_name -> bucket
try:
bucket_obj = self.client.bucket(self.bucket_name)
return bucket_obj.exists()
except Exception:
logging.exception(f"bucket_exist check for {self.bucket_name} got exception")
return False
def get_presigned_url(self, bucket, fnm, expires, tenant_id=None):
# RENAMED PARAMETER: bucket_name -> bucket
for _ in range(10):
try:
bucket_obj = self.client.bucket(self.bucket_name)
blob_path = self._get_blob_path(bucket, fnm)
blob = bucket_obj.blob(blob_path)
expiration = expires
if isinstance(expires, int):
expiration = datetime.timedelta(seconds=expires)
url = blob.generate_signed_url(
version="v4",
expiration=expiration,
method="GET"
)
return url
except Exception:
logging.exception(f"Fail to get_presigned {bucket}/{fnm}:")
self.__open__()
time.sleep(1)
return None
def remove_bucket(self, bucket):
# RENAMED PARAMETER: bucket_name -> bucket
try:
bucket_obj = self.client.bucket(self.bucket_name)
prefix = f"{bucket}/"
blobs = list(self.client.list_blobs(self.bucket_name, prefix=prefix))
if blobs:
bucket_obj.delete_blobs(blobs)
except Exception:
logging.exception(f"Fail to remove virtual bucket (folder) {bucket}")
def copy(self, src_bucket, src_path, dest_bucket, dest_path):
# RENAMED PARAMETERS to match original interface
try:
bucket_obj = self.client.bucket(self.bucket_name)
src_blob_path = self._get_blob_path(src_bucket, src_path)
dest_blob_path = self._get_blob_path(dest_bucket, dest_path)
src_blob = bucket_obj.blob(src_blob_path)
if not src_blob.exists():
logging.error(f"Source object not found: {src_blob_path}")
return False
bucket_obj.copy_blob(src_blob, bucket_obj, dest_blob_path)
return True
except NotFound:
logging.error(f"Copy failed: Main bucket {self.bucket_name} does not exist.")
return False
except Exception:
logging.exception(f"Fail to copy {src_bucket}/{src_path} -> {dest_bucket}/{dest_path}")
return False
def move(self, src_bucket, src_path, dest_bucket, dest_path):
try:
if self.copy(src_bucket, src_path, dest_bucket, dest_path):
self.rm(src_bucket, src_path)
return True
else:
logging.error(f"Copy failed, move aborted: {src_bucket}/{src_path}")
return False
except Exception:
logging.exception(f"Fail to move {src_bucket}/{src_path} -> {dest_bucket}/{dest_path}")
return False

View file

@ -1,323 +0,0 @@
#
# Copyright 2025 The InfiniFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
"""
Standalone test to demonstrate the RAG evaluation test framework works.
This test doesn't require RAGFlow dependencies.
"""
import pytest
from unittest.mock import Mock
class TestEvaluationFrameworkDemo:
"""Demo tests to verify the evaluation test framework is working"""
def test_basic_assertion(self):
"""Test basic assertion works"""
assert 1 + 1 == 2
def test_mock_evaluation_service(self):
"""Test mocking evaluation service"""
mock_service = Mock()
mock_service.create_dataset.return_value = (True, "dataset_123")
success, dataset_id = mock_service.create_dataset(
name="Test Dataset",
kb_ids=["kb_1"]
)
assert success is True
assert dataset_id == "dataset_123"
mock_service.create_dataset.assert_called_once()
def test_mock_test_case_addition(self):
"""Test mocking test case addition"""
mock_service = Mock()
mock_service.add_test_case.return_value = (True, "case_123")
success, case_id = mock_service.add_test_case(
dataset_id="dataset_123",
question="Test question?",
reference_answer="Test answer"
)
assert success is True
assert case_id == "case_123"
def test_mock_evaluation_run(self):
"""Test mocking evaluation run"""
mock_service = Mock()
mock_service.start_evaluation.return_value = (True, "run_123")
success, run_id = mock_service.start_evaluation(
dataset_id="dataset_123",
dialog_id="dialog_456",
user_id="user_1"
)
assert success is True
assert run_id == "run_123"
def test_mock_metrics_computation(self):
"""Test mocking metrics computation"""
mock_service = Mock()
# Mock retrieval metrics
metrics = {
"precision": 0.85,
"recall": 0.78,
"f1_score": 0.81,
"hit_rate": 1.0,
"mrr": 0.9
}
mock_service._compute_retrieval_metrics.return_value = metrics
result = mock_service._compute_retrieval_metrics(
retrieved_ids=["chunk_1", "chunk_2", "chunk_3"],
relevant_ids=["chunk_1", "chunk_2", "chunk_4"]
)
assert result["precision"] == 0.85
assert result["recall"] == 0.78
assert result["f1_score"] == 0.81
def test_mock_recommendations(self):
"""Test mocking recommendations"""
mock_service = Mock()
recommendations = [
{
"issue": "Low Precision",
"severity": "high",
"suggestions": [
"Increase similarity_threshold",
"Enable reranking"
]
}
]
mock_service.get_recommendations.return_value = recommendations
recs = mock_service.get_recommendations("run_123")
assert len(recs) == 1
assert recs[0]["issue"] == "Low Precision"
assert len(recs[0]["suggestions"]) == 2
@pytest.mark.parametrize("precision,recall,expected_f1", [
(1.0, 1.0, 1.0),
(0.8, 0.6, 0.69),
(0.5, 0.5, 0.5),
(0.0, 0.0, 0.0),
])
def test_f1_score_calculation(self, precision, recall, expected_f1):
"""Test F1 score calculation with different inputs"""
if precision + recall > 0:
f1 = 2 * (precision * recall) / (precision + recall)
else:
f1 = 0.0
assert abs(f1 - expected_f1) < 0.01
def test_dataset_list_structure(self):
"""Test dataset list structure"""
mock_service = Mock()
expected_result = {
"total": 3,
"datasets": [
{"id": "dataset_1", "name": "Dataset 1"},
{"id": "dataset_2", "name": "Dataset 2"},
{"id": "dataset_3", "name": "Dataset 3"}
]
}
mock_service.list_datasets.return_value = expected_result
result = mock_service.list_datasets(
tenant_id="tenant_1",
user_id="user_1",
page=1,
page_size=10
)
assert result["total"] == 3
assert len(result["datasets"]) == 3
assert result["datasets"][0]["id"] == "dataset_1"
def test_evaluation_run_status_flow(self):
"""Test evaluation run status transitions"""
mock_service = Mock()
# Simulate status progression
statuses = ["PENDING", "RUNNING", "COMPLETED"]
for status in statuses:
mock_run = {"id": "run_123", "status": status}
mock_service.get_run_results.return_value = {"run": mock_run}
result = mock_service.get_run_results("run_123")
assert result["run"]["status"] == status
def test_bulk_import_success_count(self):
"""Test bulk import success/failure counting"""
mock_service = Mock()
# Simulate 8 successes, 2 failures
mock_service.import_test_cases.return_value = (8, 2)
success_count, failure_count = mock_service.import_test_cases(
dataset_id="dataset_123",
cases=[{"question": f"Q{i}"} for i in range(10)]
)
assert success_count == 8
assert failure_count == 2
assert success_count + failure_count == 10
def test_metrics_summary_aggregation(self):
"""Test metrics summary aggregation"""
results = [
{"metrics": {"precision": 0.9, "recall": 0.8}, "execution_time": 1.2},
{"metrics": {"precision": 0.8, "recall": 0.7}, "execution_time": 1.5},
{"metrics": {"precision": 0.85, "recall": 0.75}, "execution_time": 1.3}
]
# Calculate averages
avg_precision = sum(r["metrics"]["precision"] for r in results) / len(results)
avg_recall = sum(r["metrics"]["recall"] for r in results) / len(results)
avg_time = sum(r["execution_time"] for r in results) / len(results)
assert abs(avg_precision - 0.85) < 0.01
assert abs(avg_recall - 0.75) < 0.01
assert abs(avg_time - 1.33) < 0.01
def test_recommendation_severity_levels(self):
"""Test recommendation severity levels"""
severities = ["low", "medium", "high", "critical"]
for severity in severities:
rec = {
"issue": "Test Issue",
"severity": severity,
"suggestions": ["Fix it"]
}
assert rec["severity"] in severities
def test_empty_dataset_handling(self):
"""Test handling of empty datasets"""
mock_service = Mock()
mock_service.get_test_cases.return_value = []
cases = mock_service.get_test_cases("empty_dataset")
assert len(cases) == 0
assert isinstance(cases, list)
def test_error_handling(self):
"""Test error handling in service"""
mock_service = Mock()
mock_service.create_dataset.return_value = (False, "Dataset name cannot be empty")
success, error = mock_service.create_dataset(name="", kb_ids=[])
assert success is False
assert "empty" in error.lower()
def test_pagination_logic(self):
"""Test pagination logic"""
total_items = 50
page_size = 10
page = 2
# Calculate expected items for page 2
start = (page - 1) * page_size
end = min(start + page_size, total_items)
expected_count = end - start
assert expected_count == 10
assert start == 10
assert end == 20
class TestMetricsCalculations:
"""Test metric calculation logic"""
def test_precision_calculation(self):
"""Test precision calculation"""
retrieved = {"chunk_1", "chunk_2", "chunk_3", "chunk_4"}
relevant = {"chunk_1", "chunk_2", "chunk_5"}
precision = len(retrieved & relevant) / len(retrieved)
assert precision == 0.5 # 2 out of 4
def test_recall_calculation(self):
"""Test recall calculation"""
retrieved = {"chunk_1", "chunk_2", "chunk_3", "chunk_4"}
relevant = {"chunk_1", "chunk_2", "chunk_5"}
recall = len(retrieved & relevant) / len(relevant)
assert abs(recall - 0.67) < 0.01 # 2 out of 3
def test_hit_rate_positive(self):
"""Test hit rate when relevant chunk is found"""
retrieved = {"chunk_1", "chunk_2", "chunk_3"}
relevant = {"chunk_2", "chunk_4"}
hit_rate = 1.0 if (retrieved & relevant) else 0.0
assert hit_rate == 1.0
def test_hit_rate_negative(self):
"""Test hit rate when no relevant chunk is found"""
retrieved = {"chunk_1", "chunk_2", "chunk_3"}
relevant = {"chunk_4", "chunk_5"}
hit_rate = 1.0 if (retrieved & relevant) else 0.0
assert hit_rate == 0.0
def test_mrr_calculation(self):
"""Test MRR calculation"""
retrieved_ids = ["chunk_1", "chunk_2", "chunk_3", "chunk_4"]
relevant_ids = {"chunk_3", "chunk_5"}
mrr = 0.0
for i, chunk_id in enumerate(retrieved_ids, 1):
if chunk_id in relevant_ids:
mrr = 1.0 / i
break
assert abs(mrr - 0.33) < 0.01 # First relevant at position 3
# Summary test
def test_evaluation_framework_summary():
"""
Summary test to confirm all evaluation framework features work.
This test verifies that:
- Basic assertions work
- Mocking works for all service methods
- Metrics calculations are correct
- Error handling works
- Pagination logic works
"""
assert True, "Evaluation test framework is working correctly!"
if __name__ == "__main__":
pytest.main([__file__, "-v"])

View file

@ -1,557 +0,0 @@
#
# Copyright 2025 The InfiniFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
"""
Unit tests for RAG Evaluation Service
Tests cover:
- Dataset management (CRUD operations)
- Test case management
- Evaluation execution
- Metrics computation
- Recommendations generation
"""
import pytest
from unittest.mock import patch
class TestEvaluationDatasetManagement:
"""Tests for evaluation dataset management"""
@pytest.fixture
def mock_evaluation_service(self):
"""Create a mock EvaluationService"""
with patch('api.db.services.evaluation_service.EvaluationService') as mock:
yield mock
@pytest.fixture
def sample_dataset_data(self):
"""Sample dataset data for testing"""
return {
"name": "Customer Support QA",
"description": "Test cases for customer support",
"kb_ids": ["kb_123", "kb_456"],
"tenant_id": "tenant_1",
"user_id": "user_1"
}
def test_create_dataset_success(self, mock_evaluation_service, sample_dataset_data):
"""Test successful dataset creation"""
mock_evaluation_service.create_dataset.return_value = (True, "dataset_123")
success, dataset_id = mock_evaluation_service.create_dataset(**sample_dataset_data)
assert success is True
assert dataset_id == "dataset_123"
mock_evaluation_service.create_dataset.assert_called_once()
def test_create_dataset_with_empty_name(self, mock_evaluation_service):
"""Test dataset creation with empty name"""
data = {
"name": "",
"description": "Test",
"kb_ids": ["kb_123"],
"tenant_id": "tenant_1",
"user_id": "user_1"
}
mock_evaluation_service.create_dataset.return_value = (False, "Dataset name cannot be empty")
success, error = mock_evaluation_service.create_dataset(**data)
assert success is False
assert "name" in error.lower() or "empty" in error.lower()
def test_create_dataset_with_empty_kb_ids(self, mock_evaluation_service):
"""Test dataset creation with empty kb_ids"""
data = {
"name": "Test Dataset",
"description": "Test",
"kb_ids": [],
"tenant_id": "tenant_1",
"user_id": "user_1"
}
mock_evaluation_service.create_dataset.return_value = (False, "kb_ids cannot be empty")
success, error = mock_evaluation_service.create_dataset(**data)
assert success is False
def test_get_dataset_success(self, mock_evaluation_service):
"""Test successful dataset retrieval"""
expected_dataset = {
"id": "dataset_123",
"name": "Test Dataset",
"kb_ids": ["kb_123"]
}
mock_evaluation_service.get_dataset.return_value = expected_dataset
dataset = mock_evaluation_service.get_dataset("dataset_123")
assert dataset is not None
assert dataset["id"] == "dataset_123"
def test_get_dataset_not_found(self, mock_evaluation_service):
"""Test getting non-existent dataset"""
mock_evaluation_service.get_dataset.return_value = None
dataset = mock_evaluation_service.get_dataset("nonexistent")
assert dataset is None
def test_list_datasets(self, mock_evaluation_service):
"""Test listing datasets"""
expected_result = {
"total": 2,
"datasets": [
{"id": "dataset_1", "name": "Dataset 1"},
{"id": "dataset_2", "name": "Dataset 2"}
]
}
mock_evaluation_service.list_datasets.return_value = expected_result
result = mock_evaluation_service.list_datasets(
tenant_id="tenant_1",
user_id="user_1",
page=1,
page_size=20
)
assert result["total"] == 2
assert len(result["datasets"]) == 2
def test_list_datasets_with_pagination(self, mock_evaluation_service):
"""Test listing datasets with pagination"""
mock_evaluation_service.list_datasets.return_value = {
"total": 50,
"datasets": [{"id": f"dataset_{i}"} for i in range(10)]
}
result = mock_evaluation_service.list_datasets(
tenant_id="tenant_1",
user_id="user_1",
page=2,
page_size=10
)
assert result["total"] == 50
assert len(result["datasets"]) == 10
def test_update_dataset_success(self, mock_evaluation_service):
"""Test successful dataset update"""
mock_evaluation_service.update_dataset.return_value = True
success = mock_evaluation_service.update_dataset(
"dataset_123",
name="Updated Name",
description="Updated Description"
)
assert success is True
def test_update_dataset_not_found(self, mock_evaluation_service):
"""Test updating non-existent dataset"""
mock_evaluation_service.update_dataset.return_value = False
success = mock_evaluation_service.update_dataset(
"nonexistent",
name="Updated Name"
)
assert success is False
def test_delete_dataset_success(self, mock_evaluation_service):
"""Test successful dataset deletion"""
mock_evaluation_service.delete_dataset.return_value = True
success = mock_evaluation_service.delete_dataset("dataset_123")
assert success is True
def test_delete_dataset_not_found(self, mock_evaluation_service):
"""Test deleting non-existent dataset"""
mock_evaluation_service.delete_dataset.return_value = False
success = mock_evaluation_service.delete_dataset("nonexistent")
assert success is False
class TestEvaluationTestCaseManagement:
"""Tests for test case management"""
@pytest.fixture
def mock_evaluation_service(self):
"""Create a mock EvaluationService"""
with patch('api.db.services.evaluation_service.EvaluationService') as mock:
yield mock
@pytest.fixture
def sample_test_case(self):
"""Sample test case data"""
return {
"dataset_id": "dataset_123",
"question": "How do I reset my password?",
"reference_answer": "Click on 'Forgot Password' and follow the email instructions.",
"relevant_doc_ids": ["doc_789"],
"relevant_chunk_ids": ["chunk_101", "chunk_102"]
}
def test_add_test_case_success(self, mock_evaluation_service, sample_test_case):
"""Test successful test case addition"""
mock_evaluation_service.add_test_case.return_value = (True, "case_123")
success, case_id = mock_evaluation_service.add_test_case(**sample_test_case)
assert success is True
assert case_id == "case_123"
def test_add_test_case_with_empty_question(self, mock_evaluation_service):
"""Test adding test case with empty question"""
mock_evaluation_service.add_test_case.return_value = (False, "Question cannot be empty")
success, error = mock_evaluation_service.add_test_case(
dataset_id="dataset_123",
question=""
)
assert success is False
assert "question" in error.lower() or "empty" in error.lower()
def test_add_test_case_without_reference_answer(self, mock_evaluation_service):
"""Test adding test case without reference answer (optional)"""
mock_evaluation_service.add_test_case.return_value = (True, "case_123")
success, case_id = mock_evaluation_service.add_test_case(
dataset_id="dataset_123",
question="Test question",
reference_answer=None
)
assert success is True
def test_get_test_cases(self, mock_evaluation_service):
"""Test getting all test cases for a dataset"""
expected_cases = [
{"id": "case_1", "question": "Question 1"},
{"id": "case_2", "question": "Question 2"}
]
mock_evaluation_service.get_test_cases.return_value = expected_cases
cases = mock_evaluation_service.get_test_cases("dataset_123")
assert len(cases) == 2
assert cases[0]["id"] == "case_1"
def test_get_test_cases_empty_dataset(self, mock_evaluation_service):
"""Test getting test cases from empty dataset"""
mock_evaluation_service.get_test_cases.return_value = []
cases = mock_evaluation_service.get_test_cases("dataset_123")
assert len(cases) == 0
def test_delete_test_case_success(self, mock_evaluation_service):
"""Test successful test case deletion"""
mock_evaluation_service.delete_test_case.return_value = True
success = mock_evaluation_service.delete_test_case("case_123")
assert success is True
def test_import_test_cases_success(self, mock_evaluation_service):
"""Test bulk import of test cases"""
cases = [
{"question": "Question 1", "reference_answer": "Answer 1"},
{"question": "Question 2", "reference_answer": "Answer 2"},
{"question": "Question 3", "reference_answer": "Answer 3"}
]
mock_evaluation_service.import_test_cases.return_value = (3, 0)
success_count, failure_count = mock_evaluation_service.import_test_cases(
"dataset_123",
cases
)
assert success_count == 3
assert failure_count == 0
def test_import_test_cases_with_failures(self, mock_evaluation_service):
"""Test bulk import with some failures"""
cases = [
{"question": "Question 1"},
{"question": ""}, # Invalid
{"question": "Question 3"}
]
mock_evaluation_service.import_test_cases.return_value = (2, 1)
success_count, failure_count = mock_evaluation_service.import_test_cases(
"dataset_123",
cases
)
assert success_count == 2
assert failure_count == 1
class TestEvaluationExecution:
"""Tests for evaluation execution"""
@pytest.fixture
def mock_evaluation_service(self):
"""Create a mock EvaluationService"""
with patch('api.db.services.evaluation_service.EvaluationService') as mock:
yield mock
def test_start_evaluation_success(self, mock_evaluation_service):
"""Test successful evaluation start"""
mock_evaluation_service.start_evaluation.return_value = (True, "run_123")
success, run_id = mock_evaluation_service.start_evaluation(
dataset_id="dataset_123",
dialog_id="dialog_456",
user_id="user_1"
)
assert success is True
assert run_id == "run_123"
def test_start_evaluation_with_invalid_dialog(self, mock_evaluation_service):
"""Test starting evaluation with invalid dialog"""
mock_evaluation_service.start_evaluation.return_value = (False, "Dialog not found")
success, error = mock_evaluation_service.start_evaluation(
dataset_id="dataset_123",
dialog_id="nonexistent",
user_id="user_1"
)
assert success is False
assert "dialog" in error.lower()
def test_start_evaluation_with_custom_name(self, mock_evaluation_service):
"""Test starting evaluation with custom name"""
mock_evaluation_service.start_evaluation.return_value = (True, "run_123")
success, run_id = mock_evaluation_service.start_evaluation(
dataset_id="dataset_123",
dialog_id="dialog_456",
user_id="user_1",
name="My Custom Evaluation"
)
assert success is True
def test_get_run_results(self, mock_evaluation_service):
"""Test getting evaluation run results"""
expected_results = {
"run": {
"id": "run_123",
"status": "COMPLETED",
"metrics_summary": {
"avg_precision": 0.85,
"avg_recall": 0.78
}
},
"results": [
{"case_id": "case_1", "metrics": {"precision": 0.9}},
{"case_id": "case_2", "metrics": {"precision": 0.8}}
]
}
mock_evaluation_service.get_run_results.return_value = expected_results
results = mock_evaluation_service.get_run_results("run_123")
assert results["run"]["id"] == "run_123"
assert len(results["results"]) == 2
def test_get_run_results_not_found(self, mock_evaluation_service):
"""Test getting results for non-existent run"""
mock_evaluation_service.get_run_results.return_value = {}
results = mock_evaluation_service.get_run_results("nonexistent")
assert results == {}
class TestEvaluationMetrics:
"""Tests for metrics computation"""
@pytest.fixture
def mock_evaluation_service(self):
"""Create a mock EvaluationService"""
with patch('api.db.services.evaluation_service.EvaluationService') as mock:
yield mock
def test_compute_retrieval_metrics_perfect_match(self, mock_evaluation_service):
"""Test retrieval metrics with perfect match"""
retrieved_ids = ["chunk_1", "chunk_2", "chunk_3"]
relevant_ids = ["chunk_1", "chunk_2", "chunk_3"]
expected_metrics = {
"precision": 1.0,
"recall": 1.0,
"f1_score": 1.0,
"hit_rate": 1.0,
"mrr": 1.0
}
mock_evaluation_service._compute_retrieval_metrics.return_value = expected_metrics
metrics = mock_evaluation_service._compute_retrieval_metrics(retrieved_ids, relevant_ids)
assert metrics["precision"] == 1.0
assert metrics["recall"] == 1.0
assert metrics["f1_score"] == 1.0
def test_compute_retrieval_metrics_partial_match(self, mock_evaluation_service):
"""Test retrieval metrics with partial match"""
retrieved_ids = ["chunk_1", "chunk_2", "chunk_4", "chunk_5"]
relevant_ids = ["chunk_1", "chunk_2", "chunk_3"]
expected_metrics = {
"precision": 0.5, # 2 out of 4 retrieved are relevant
"recall": 0.67, # 2 out of 3 relevant were retrieved
"f1_score": 0.57,
"hit_rate": 1.0, # At least one relevant was retrieved
"mrr": 1.0 # First retrieved is relevant
}
mock_evaluation_service._compute_retrieval_metrics.return_value = expected_metrics
metrics = mock_evaluation_service._compute_retrieval_metrics(retrieved_ids, relevant_ids)
assert metrics["precision"] < 1.0
assert metrics["recall"] < 1.0
assert metrics["hit_rate"] == 1.0
def test_compute_retrieval_metrics_no_match(self, mock_evaluation_service):
"""Test retrieval metrics with no match"""
retrieved_ids = ["chunk_4", "chunk_5", "chunk_6"]
relevant_ids = ["chunk_1", "chunk_2", "chunk_3"]
expected_metrics = {
"precision": 0.0,
"recall": 0.0,
"f1_score": 0.0,
"hit_rate": 0.0,
"mrr": 0.0
}
mock_evaluation_service._compute_retrieval_metrics.return_value = expected_metrics
metrics = mock_evaluation_service._compute_retrieval_metrics(retrieved_ids, relevant_ids)
assert metrics["precision"] == 0.0
assert metrics["recall"] == 0.0
assert metrics["hit_rate"] == 0.0
def test_compute_summary_metrics(self, mock_evaluation_service):
"""Test summary metrics computation"""
results = [
{"metrics": {"precision": 0.9, "recall": 0.8}, "execution_time": 1.2},
{"metrics": {"precision": 0.8, "recall": 0.7}, "execution_time": 1.5},
{"metrics": {"precision": 0.85, "recall": 0.75}, "execution_time": 1.3}
]
expected_summary = {
"total_cases": 3,
"avg_execution_time": 1.33,
"avg_precision": 0.85,
"avg_recall": 0.75
}
mock_evaluation_service._compute_summary_metrics.return_value = expected_summary
summary = mock_evaluation_service._compute_summary_metrics(results)
assert summary["total_cases"] == 3
assert summary["avg_precision"] > 0.8
class TestEvaluationRecommendations:
"""Tests for configuration recommendations"""
@pytest.fixture
def mock_evaluation_service(self):
"""Create a mock EvaluationService"""
with patch('api.db.services.evaluation_service.EvaluationService') as mock:
yield mock
def test_get_recommendations_low_precision(self, mock_evaluation_service):
"""Test recommendations for low precision"""
recommendations = [
{
"issue": "Low Precision",
"severity": "high",
"suggestions": [
"Increase similarity_threshold",
"Enable reranking"
]
}
]
mock_evaluation_service.get_recommendations.return_value = recommendations
recs = mock_evaluation_service.get_recommendations("run_123")
assert len(recs) > 0
assert any("precision" in r["issue"].lower() for r in recs)
def test_get_recommendations_low_recall(self, mock_evaluation_service):
"""Test recommendations for low recall"""
recommendations = [
{
"issue": "Low Recall",
"severity": "high",
"suggestions": [
"Increase top_k",
"Lower similarity_threshold"
]
}
]
mock_evaluation_service.get_recommendations.return_value = recommendations
recs = mock_evaluation_service.get_recommendations("run_123")
assert len(recs) > 0
assert any("recall" in r["issue"].lower() for r in recs)
def test_get_recommendations_slow_response(self, mock_evaluation_service):
"""Test recommendations for slow response time"""
recommendations = [
{
"issue": "Slow Response Time",
"severity": "medium",
"suggestions": [
"Reduce top_k",
"Optimize embedding model"
]
}
]
mock_evaluation_service.get_recommendations.return_value = recommendations
recs = mock_evaluation_service.get_recommendations("run_123")
assert len(recs) > 0
assert any("response" in r["issue"].lower() or "slow" in r["issue"].lower() for r in recs)
def test_get_recommendations_no_issues(self, mock_evaluation_service):
"""Test recommendations when metrics are good"""
mock_evaluation_service.get_recommendations.return_value = []
recs = mock_evaluation_service.get_recommendations("run_123")
assert len(recs) == 0
if __name__ == "__main__":
pytest.main([__file__, "-v"])

163
uv.lock generated
View file

@ -445,25 +445,25 @@ wheels = [
[[package]]
name = "bce-python-sdk"
version = "0.9.54"
version = "0.9.55"
source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" }
dependencies = [
{ name = "future" },
{ name = "pycryptodome" },
{ name = "six" },
]
sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/b3/c8/1c3bc30aa745ad4c3d073f150bddaf1d43ee6ee33f0b8ec60068494f511e/bce_python_sdk-0.9.54.tar.gz", hash = "sha256:f68026f40f11ea38ef445f50a7756009d5b703c7253438b138b30fb3b83be275", size = 275698, upload-time = "2025-11-27T02:28:50.24Z" }
sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/ff/ae/f31ee3ccae94e1a07d8886a413f08c1581349e6cb45bf8b3c608fbf173e4/bce_python_sdk-0.9.55.tar.gz", hash = "sha256:bed63f8a0975f2e9daecf53417c3d5b803232ad87f35a0b16e25850710ce209c", size = 275733, upload-time = "2025-12-02T12:02:38.041Z" }
wheels = [
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/69/a7/b8806c8505bb830cc863837ef8b42695170dd9561605c61262250df066d3/bce_python_sdk-0.9.54-py3-none-any.whl", hash = "sha256:a084eee577931f15a55280a7401bea2474115989ee79ebbca131610bdce81c99", size = 390447, upload-time = "2025-11-27T02:28:48.603Z" },
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/8f/01/1b13a627e5f0239f24b168138d9a948e876d4b387c03f59d31699578c960/bce_python_sdk-0.9.55-py3-none-any.whl", hash = "sha256:6045d19d783b548644cce50a2f41ef5242da6654fb91b2c21629f309ca6dbf4c", size = 390463, upload-time = "2025-12-02T12:02:36.417Z" },
]
[[package]]
name = "beartype"
version = "0.22.7"
version = "0.22.8"
source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" }
sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/2c/49/e28a77f8a3868b1c9ff6a030678e84de24c4783bae4c12cec9443cf8fb54/beartype-0.22.7.tar.gz", hash = "sha256:c7269855b71e32b7c9f0fc662baade752eb525107266e053338c2f6e8873826b", size = 1599627, upload-time = "2025-11-29T06:49:56.751Z" }
sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/8c/1d/794ae2acaa67c8b216d91d5919da2606c2bb14086849ffde7f5555f3a3a5/beartype-0.22.8.tar.gz", hash = "sha256:b19b21c9359722ee3f7cc433f063b3e13997b27ae8226551ea5062e621f61165", size = 1602262, upload-time = "2025-12-03T05:11:10.766Z" }
wheels = [
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/f6/0c/a764253610513295b7f57904b91fae1d99c7afd1b16b6eaae06fdfb71fb5/beartype-0.22.7-py3-none-any.whl", hash = "sha256:e13430ac07c61fa4bc54d375970438aeb9aa47a482c529a6f438ce52e18e6f50", size = 1330771, upload-time = "2025-11-29T06:49:54.545Z" },
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/14/2a/fbcbf5a025d3e71ddafad7efd43e34ec4362f4d523c3c471b457148fb211/beartype-0.22.8-py3-none-any.whl", hash = "sha256:b832882d04e41a4097bab9f63e6992bc6de58c414ee84cba9b45b67314f5ab2e", size = 1331895, upload-time = "2025-12-03T05:11:08.373Z" },
]
[[package]]
@ -1910,11 +1910,11 @@ wheels = [
[[package]]
name = "fsspec"
version = "2025.10.0"
version = "2025.12.0"
source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" }
sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/24/7f/2747c0d332b9acfa75dc84447a066fdf812b5a6b8d30472b74d309bfe8cb/fsspec-2025.10.0.tar.gz", hash = "sha256:b6789427626f068f9a83ca4e8a3cc050850b6c0f71f99ddb4f542b8266a26a59", size = 309285, upload-time = "2025-10-30T14:58:44.036Z" }
sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/b6/27/954057b0d1f53f086f681755207dda6de6c660ce133c829158e8e8fe7895/fsspec-2025.12.0.tar.gz", hash = "sha256:c505de011584597b1060ff778bb664c1bc022e87921b0e4f10cc9c44f9635973", size = 309748, upload-time = "2025-12-03T15:23:42.687Z" }
wheels = [
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/eb/02/a6b21098b1d5d6249b7c5ab69dde30108a71e4e819d4a9778f1de1d5b70d/fsspec-2025.10.0-py3-none-any.whl", hash = "sha256:7c7712353ae7d875407f97715f0e1ffcc21e33d5b24556cb1e090ae9409ec61d", size = 200966, upload-time = "2025-10-30T14:58:42.53Z" },
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/51/c7/b64cae5dba3a1b138d7123ec36bb5ccd39d39939f18454407e5468f4763f/fsspec-2025.12.0-py3-none-any.whl", hash = "sha256:8bf1fe301b7d8acfa6e8571e3b1c3d158f909666642431cc78a1b7b4dbc5ec5b", size = 201422, upload-time = "2025-12-03T15:23:41.434Z" },
]
[[package]]
@ -2022,16 +2022,21 @@ wheels = [
[[package]]
name = "google-auth"
version = "2.41.1"
version = "2.43.0"
source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" }
dependencies = [
{ name = "cachetools" },
{ name = "pyasn1-modules" },
{ name = "rsa" },
]
sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/a8/af/5129ce5b2f9688d2fa49b463e544972a7c82b0fdb50980dafee92e121d9f/google_auth-2.41.1.tar.gz", hash = "sha256:b76b7b1f9e61f0cb7e88870d14f6a94aeef248959ef6992670efee37709cbfd2", size = 292284, upload-time = "2025-09-30T22:51:26.363Z" }
sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/ff/ef/66d14cf0e01b08d2d51ffc3c20410c4e134a1548fc246a6081eae585a4fe/google_auth-2.43.0.tar.gz", hash = "sha256:88228eee5fc21b62a1b5fe773ca15e67778cb07dc8363adcb4a8827b52d81483", size = 296359, upload-time = "2025-11-06T00:13:36.587Z" }
wheels = [
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/be/a4/7319a2a8add4cc352be9e3efeff5e2aacee917c85ca2fa1647e29089983c/google_auth-2.41.1-py2.py3-none-any.whl", hash = "sha256:754843be95575b9a19c604a848a41be03f7f2afd8c019f716dc1f51ee41c639d", size = 221302, upload-time = "2025-09-30T22:51:24.212Z" },
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/6f/d1/385110a9ae86d91cc14c5282c61fe9f4dc41c0b9f7d423c6ad77038c4448/google_auth-2.43.0-py2.py3-none-any.whl", hash = "sha256:af628ba6fa493f75c7e9dbe9373d148ca9f4399b5ea29976519e0a3848eddd16", size = 223114, upload-time = "2025-11-06T00:13:35.209Z" },
]
[package.optional-dependencies]
requests = [
{ name = "requests" },
]
[[package]]
@ -2049,15 +2054,15 @@ wheels = [
[[package]]
name = "google-auth-oauthlib"
version = "1.2.3"
version = "1.2.2"
source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" }
dependencies = [
{ name = "google-auth" },
{ name = "requests-oauthlib" },
]
sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/86/a6/c6336a6ceb682709a4aa39e2e6b5754a458075ca92359512b6cbfcb25ae3/google_auth_oauthlib-1.2.3.tar.gz", hash = "sha256:eb09e450d3cc789ecbc2b3529cb94a713673fd5f7a22c718ad91cf75aedc2ea4", size = 21265, upload-time = "2025-10-30T21:28:19.105Z" }
sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/fb/87/e10bf24f7bcffc1421b84d6f9c3377c30ec305d082cd737ddaa6d8f77f7c/google_auth_oauthlib-1.2.2.tar.gz", hash = "sha256:11046fb8d3348b296302dd939ace8af0a724042e8029c1b872d87fabc9f41684", size = 20955, upload-time = "2025-04-22T16:40:29.172Z" }
wheels = [
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/38/07/a54c100da461ffc5968457823fcc665a48fb4b875c68bcfecbfe24a10dbe/google_auth_oauthlib-1.2.3-py3-none-any.whl", hash = "sha256:7c0940e037677f25e71999607493640d071212e7f3c15aa0febea4c47a5a0680", size = 19184, upload-time = "2025-10-30T21:28:17.88Z" },
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/ac/84/40ee070be95771acd2f4418981edb834979424565c3eec3cd88b6aa09d24/google_auth_oauthlib-1.2.2-py3-none-any.whl", hash = "sha256:fd619506f4b3908b5df17b65f39ca8d66ea56986e5472eb5978fd8f3786f00a2", size = 19072, upload-time = "2025-04-22T16:40:28.174Z" },
]
[[package]]
@ -2177,11 +2182,11 @@ wheels = [
[[package]]
name = "google-genai"
version = "1.52.0"
version = "1.53.0"
source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" }
dependencies = [
{ name = "anyio" },
{ name = "google-auth" },
{ name = "google-auth", extra = ["requests"] },
{ name = "httpx" },
{ name = "pydantic" },
{ name = "requests" },
@ -2189,9 +2194,9 @@ dependencies = [
{ name = "typing-extensions" },
{ name = "websockets" },
]
sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/09/4e/0ad8585d05312074bb69711b2d81cfed69ce0ae441913d57bf169bed20a7/google_genai-1.52.0.tar.gz", hash = "sha256:a74e8a4b3025f23aa98d6a0f84783119012ca6c336fd68f73c5d2b11465d7fc5", size = 258743, upload-time = "2025-11-21T02:18:55.742Z" }
sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/de/b3/36fbfde2e21e6d3bc67780b61da33632f495ab1be08076cf0a16af74098f/google_genai-1.53.0.tar.gz", hash = "sha256:938a26d22f3fd32c6eeeb4276ef204ef82884e63af9842ce3eac05ceb39cbd8d", size = 260102, upload-time = "2025-12-03T17:21:23.233Z" }
wheels = [
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/ec/66/03f663e7bca7abe9ccfebe6cb3fe7da9a118fd723a5abb278d6117e7990e/google_genai-1.52.0-py3-none-any.whl", hash = "sha256:c8352b9f065ae14b9322b949c7debab8562982f03bf71d44130cd2b798c20743", size = 261219, upload-time = "2025-11-21T02:18:54.515Z" },
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/40/f2/97fefdd1ad1f3428321bac819ae7a83ccc59f6439616054736b7819fa56c/google_genai-1.53.0-py3-none-any.whl", hash = "sha256:65a3f99e5c03c372d872cda7419f5940e723374bb12a2f3ffd5e3e56e8eb2094", size = 262015, upload-time = "2025-12-03T17:21:21.934Z" },
]
[[package]]
@ -2776,7 +2781,7 @@ wheels = [
[[package]]
name = "infinity-sdk"
version = "0.6.8"
version = "0.6.10"
source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" }
dependencies = [
{ name = "datrie" },
@ -2795,9 +2800,9 @@ dependencies = [
{ name = "sqlglot", extra = ["rs"] },
{ name = "thrift" },
]
sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/a3/2d/4b699d62202319e5cbbcb4a7d9e87a86dde7ba7c767d0af4ebbee3de8419/infinity_sdk-0.6.8.tar.gz", hash = "sha256:e91c1f6cdf2fa41bc615c72be2a9e981211bd05b34522c1d27f1b825b905b125", size = 72669, upload-time = "2025-12-02T05:09:29.377Z" }
sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/b4/e5/88fdcfe42835c5494a08f02b64762a98e04dae4ad49f7dfabac18ee01928/infinity_sdk-0.6.10.tar.gz", hash = "sha256:b55c296ca3b2c8c2f4568f359dd8a50772e9432f09b64667140e9804bf780436", size = 29502969, upload-time = "2025-12-04T02:42:17.882Z" }
wheels = [
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/ab/08/59ed1261ee80d3b2c5a80313a013a94cae83ce90ff1da1ef488055944a7b/infinity_sdk-0.6.8-py3-none-any.whl", hash = "sha256:392f942a2073a5b545261dad9859b217c6a0331ede606c8894e7ae335f2ead5e", size = 81564, upload-time = "2025-12-02T05:09:27.784Z" },
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/97/99/8857ea0805bd83fe092f5dca914a31f9fcc731c3800264657bd3ba950a1d/infinity_sdk-0.6.10-py3-none-any.whl", hash = "sha256:8f605039ec73d1b05d219105fbabef186e0178fddbad058c2c06c4873be48651", size = 29722107, upload-time = "2025-12-04T02:42:04.101Z" },
]
[[package]]
@ -3077,7 +3082,7 @@ wheels = [
[[package]]
name = "langfuse"
version = "3.10.3"
version = "3.10.5"
source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" }
dependencies = [
{ name = "backoff" },
@ -3091,9 +3096,9 @@ dependencies = [
{ name = "requests" },
{ name = "wrapt" },
]
sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/7b/03/c4316cb0a91cff97118c21b973b3089c2fe1bdbcad02f3623d6ac572e954/langfuse-3.10.3.tar.gz", hash = "sha256:69d6eaf573212f8cdc1cebd2d6b47f271bfe76c7eb5a3c5d6766bb0d9bf0004c", size = 226617, upload-time = "2025-12-01T18:01:02.607Z" }
sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/69/21/dff0434290512484436bfa108e36f0adc3457eb4117767de70e76a411cac/langfuse-3.10.5.tar.gz", hash = "sha256:14eb767663f7e7480cd1cd1b3ca457022817c129e666efe97e5c80adb8c5aac0", size = 223142, upload-time = "2025-12-03T17:49:39.747Z" }
wheels = [
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/fd/04/f07c2a23f2822f73f8576b1ba7348c014c4be65127384b4bee475f913f3b/langfuse-3.10.3-py3-none-any.whl", hash = "sha256:b9a2e6506f8f0923c2f4b8c9e3fa355231994197a17f75509a37f335660ce334", size = 399062, upload-time = "2025-12-01T18:01:00.688Z" },
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/64/6f/dc15775f82d38da62cd2015110f5802bb175a9ee731a4533fe2a0cdf75b6/langfuse-3.10.5-py3-none-any.whl", hash = "sha256:0223a64109a4293b9bd9b2e0e3229f53b75291cd96341e42cc3eba186973fcdb", size = 398888, upload-time = "2025-12-03T17:49:38.171Z" },
]
[[package]]
@ -4043,32 +4048,32 @@ wheels = [
[[package]]
name = "opentelemetry-api"
version = "1.38.0"
version = "1.39.0"
source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" }
dependencies = [
{ name = "importlib-metadata" },
{ name = "typing-extensions" },
]
sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/08/d8/0f354c375628e048bd0570645b310797299754730079853095bf000fba69/opentelemetry_api-1.38.0.tar.gz", hash = "sha256:f4c193b5e8acb0912b06ac5b16321908dd0843d75049c091487322284a3eea12", size = 65242, upload-time = "2025-10-16T08:35:50.25Z" }
sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/c0/0b/e5428c009d4d9af0515b0a8371a8aaae695371af291f45e702f7969dce6b/opentelemetry_api-1.39.0.tar.gz", hash = "sha256:6130644268c5ac6bdffaf660ce878f10906b3e789f7e2daa5e169b047a2933b9", size = 65763, upload-time = "2025-12-03T13:19:56.378Z" }
wheels = [
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/ae/a2/d86e01c28300bd41bab8f18afd613676e2bd63515417b77636fc1add426f/opentelemetry_api-1.38.0-py3-none-any.whl", hash = "sha256:2891b0197f47124454ab9f0cf58f3be33faca394457ac3e09daba13ff50aa582", size = 65947, upload-time = "2025-10-16T08:35:30.23Z" },
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/05/85/d831a9bc0a9e0e1a304ff3d12c1489a5fbc9bf6690a15dcbdae372bbca45/opentelemetry_api-1.39.0-py3-none-any.whl", hash = "sha256:3c3b3ca5c5687b1b5b37e5c5027ff68eacea8675241b29f13110a8ffbb8f0459", size = 66357, upload-time = "2025-12-03T13:19:33.043Z" },
]
[[package]]
name = "opentelemetry-exporter-otlp-proto-common"
version = "1.38.0"
version = "1.39.0"
source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" }
dependencies = [
{ name = "opentelemetry-proto" },
]
sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/19/83/dd4660f2956ff88ed071e9e0e36e830df14b8c5dc06722dbde1841accbe8/opentelemetry_exporter_otlp_proto_common-1.38.0.tar.gz", hash = "sha256:e333278afab4695aa8114eeb7bf4e44e65c6607d54968271a249c180b2cb605c", size = 20431, upload-time = "2025-10-16T08:35:53.285Z" }
sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/11/cb/3a29ce606b10c76d413d6edd42d25a654af03e73e50696611e757d2602f3/opentelemetry_exporter_otlp_proto_common-1.39.0.tar.gz", hash = "sha256:a135fceed1a6d767f75be65bd2845da344dd8b9258eeed6bc48509d02b184409", size = 20407, upload-time = "2025-12-03T13:19:59.003Z" }
wheels = [
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/a7/9e/55a41c9601191e8cd8eb626b54ee6827b9c9d4a46d736f32abc80d8039fc/opentelemetry_exporter_otlp_proto_common-1.38.0-py3-none-any.whl", hash = "sha256:03cb76ab213300fe4f4c62b7d8f17d97fcfd21b89f0b5ce38ea156327ddda74a", size = 18359, upload-time = "2025-10-16T08:35:34.099Z" },
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/ef/c6/215edba62d13a3948c718b289539f70e40965bc37fc82ecd55bb0b749c1a/opentelemetry_exporter_otlp_proto_common-1.39.0-py3-none-any.whl", hash = "sha256:3d77be7c4bdf90f1a76666c934368b8abed730b5c6f0547a2ec57feb115849ac", size = 18367, upload-time = "2025-12-03T13:19:36.906Z" },
]
[[package]]
name = "opentelemetry-exporter-otlp-proto-http"
version = "1.38.0"
version = "1.39.0"
source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" }
dependencies = [
{ name = "googleapis-common-protos" },
@ -4079,48 +4084,48 @@ dependencies = [
{ name = "requests" },
{ name = "typing-extensions" },
]
sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/81/0a/debcdfb029fbd1ccd1563f7c287b89a6f7bef3b2902ade56797bfd020854/opentelemetry_exporter_otlp_proto_http-1.38.0.tar.gz", hash = "sha256:f16bd44baf15cbe07633c5112ffc68229d0edbeac7b37610be0b2def4e21e90b", size = 17282, upload-time = "2025-10-16T08:35:54.422Z" }
sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/81/dc/1e9bf3f6a28e29eba516bc0266e052996d02bc7e92675f3cd38169607609/opentelemetry_exporter_otlp_proto_http-1.39.0.tar.gz", hash = "sha256:28d78fc0eb82d5a71ae552263d5012fa3ebad18dfd189bf8d8095ba0e65ee1ed", size = 17287, upload-time = "2025-12-03T13:20:01.134Z" }
wheels = [
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/e5/77/154004c99fb9f291f74aa0822a2f5bbf565a72d8126b3a1b63ed8e5f83c7/opentelemetry_exporter_otlp_proto_http-1.38.0-py3-none-any.whl", hash = "sha256:84b937305edfc563f08ec69b9cb2298be8188371217e867c1854d77198d0825b", size = 19579, upload-time = "2025-10-16T08:35:36.269Z" },
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/bc/46/e4a102e17205bb05a50dbf24ef0e92b66b648cd67db9a68865af06a242fd/opentelemetry_exporter_otlp_proto_http-1.39.0-py3-none-any.whl", hash = "sha256:5789cb1375a8b82653328c0ce13a054d285f774099faf9d068032a49de4c7862", size = 19639, upload-time = "2025-12-03T13:19:39.536Z" },
]
[[package]]
name = "opentelemetry-proto"
version = "1.38.0"
version = "1.39.0"
source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" }
dependencies = [
{ name = "protobuf" },
]
sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/51/14/f0c4f0f6371b9cb7f9fa9ee8918bfd59ac7040c7791f1e6da32a1839780d/opentelemetry_proto-1.38.0.tar.gz", hash = "sha256:88b161e89d9d372ce723da289b7da74c3a8354a8e5359992be813942969ed468", size = 46152, upload-time = "2025-10-16T08:36:01.612Z" }
sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/48/b5/64d2f8c3393cd13ea2092106118f7b98461ba09333d40179a31444c6f176/opentelemetry_proto-1.39.0.tar.gz", hash = "sha256:c1fa48678ad1a1624258698e59be73f990b7fc1f39e73e16a9d08eef65dd838c", size = 46153, upload-time = "2025-12-03T13:20:08.729Z" }
wheels = [
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/b6/6a/82b68b14efca5150b2632f3692d627afa76b77378c4999f2648979409528/opentelemetry_proto-1.38.0-py3-none-any.whl", hash = "sha256:b6ebe54d3217c42e45462e2a1ae28c3e2bf2ec5a5645236a490f55f45f1a0a18", size = 72535, upload-time = "2025-10-16T08:35:45.749Z" },
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/e3/4d/d500e1862beed68318705732d1976c390f4a72ca8009c4983ff627acff20/opentelemetry_proto-1.39.0-py3-none-any.whl", hash = "sha256:1e086552ac79acb501485ff0ce75533f70f3382d43d0a30728eeee594f7bf818", size = 72534, upload-time = "2025-12-03T13:19:50.251Z" },
]
[[package]]
name = "opentelemetry-sdk"
version = "1.38.0"
version = "1.39.0"
source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" }
dependencies = [
{ name = "opentelemetry-api" },
{ name = "opentelemetry-semantic-conventions" },
{ name = "typing-extensions" },
]
sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/85/cb/f0eee1445161faf4c9af3ba7b848cc22a50a3d3e2515051ad8628c35ff80/opentelemetry_sdk-1.38.0.tar.gz", hash = "sha256:93df5d4d871ed09cb4272305be4d996236eedb232253e3ab864c8620f051cebe", size = 171942, upload-time = "2025-10-16T08:36:02.257Z" }
sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/51/e3/7cd989003e7cde72e0becfe830abff0df55c69d237ee7961a541e0167833/opentelemetry_sdk-1.39.0.tar.gz", hash = "sha256:c22204f12a0529e07aa4d985f1bca9d6b0e7b29fe7f03e923548ae52e0e15dde", size = 171322, upload-time = "2025-12-03T13:20:09.651Z" }
wheels = [
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/2f/2e/e93777a95d7d9c40d270a371392b6d6f1ff170c2a3cb32d6176741b5b723/opentelemetry_sdk-1.38.0-py3-none-any.whl", hash = "sha256:1c66af6564ecc1553d72d811a01df063ff097cdc82ce188da9951f93b8d10f6b", size = 132349, upload-time = "2025-10-16T08:35:46.995Z" },
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/a4/b4/2adc8bc83eb1055ecb592708efb6f0c520cc2eb68970b02b0f6ecda149cf/opentelemetry_sdk-1.39.0-py3-none-any.whl", hash = "sha256:90cfb07600dfc0d2de26120cebc0c8f27e69bf77cd80ef96645232372709a514", size = 132413, upload-time = "2025-12-03T13:19:51.364Z" },
]
[[package]]
name = "opentelemetry-semantic-conventions"
version = "0.59b0"
version = "0.60b0"
source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" }
dependencies = [
{ name = "opentelemetry-api" },
{ name = "typing-extensions" },
]
sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/40/bc/8b9ad3802cd8ac6583a4eb7de7e5d7db004e89cb7efe7008f9c8a537ee75/opentelemetry_semantic_conventions-0.59b0.tar.gz", hash = "sha256:7a6db3f30d70202d5bf9fa4b69bc866ca6a30437287de6c510fb594878aed6b0", size = 129861, upload-time = "2025-10-16T08:36:03.346Z" }
sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/71/0e/176a7844fe4e3cb5de604212094dffaed4e18b32f1c56b5258bcbcba85c2/opentelemetry_semantic_conventions-0.60b0.tar.gz", hash = "sha256:227d7aa73cbb8a2e418029d6b6465553aa01cf7e78ec9d0bc3255c7b3ac5bf8f", size = 137935, upload-time = "2025-12-03T13:20:12.395Z" }
wheels = [
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/24/7d/c88d7b15ba8fe5c6b8f93be50fc11795e9fc05386c44afaf6b76fe191f9b/opentelemetry_semantic_conventions-0.59b0-py3-none-any.whl", hash = "sha256:35d3b8833ef97d614136e253c1da9342b4c3c083bbaf29ce31d572a1c3825eed", size = 207954, upload-time = "2025-10-16T08:35:48.054Z" },
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/d0/56/af0306666f91bae47db14d620775604688361f0f76a872e0005277311131/opentelemetry_semantic_conventions-0.60b0-py3-none-any.whl", hash = "sha256:069530852691136018087b52688857d97bba61cd641d0f8628d2d92788c4f78a", size = 219981, upload-time = "2025-12-03T13:19:53.585Z" },
]
[[package]]
@ -5683,7 +5688,7 @@ requires-dist = [
{ name = "huggingface-hub", specifier = ">=0.25.0,<0.26.0" },
{ name = "imageio-ffmpeg", specifier = ">=0.6.0" },
{ name = "infinity-emb", specifier = ">=0.0.66,<0.0.67" },
{ name = "infinity-sdk", specifier = "==0.6.8" },
{ name = "infinity-sdk", specifier = "==0.6.10" },
{ name = "itsdangerous", specifier = "==2.1.2" },
{ name = "jira", specifier = "==3.10.5" },
{ name = "json-repair", specifier = "==0.35.0" },
@ -6712,11 +6717,11 @@ wheels = [
[[package]]
name = "sqlglot"
version = "28.0.0"
version = "28.1.0"
source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" }
sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/52/8d/9ce5904aca760b81adf821c77a1dcf07c98f9caaa7e3b5c991c541ff89d2/sqlglot-28.0.0.tar.gz", hash = "sha256:cc9a651ef4182e61dac58aa955e5fb21845a5865c6a4d7d7b5a7857450285ad4", size = 5520798, upload-time = "2025-11-17T10:34:57.016Z" }
sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/7e/49/cda1fc4e610ed5764de2842bb2f362f4aba267b4a7d05a3a217a25b39004/sqlglot-28.1.0.tar.gz", hash = "sha256:a3ef7344359667b51cf95e840aac70a49f847602c61c9fbaeb847f74f7877fe1", size = 5546281, upload-time = "2025-12-02T16:52:28.387Z" }
wheels = [
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/56/6d/86de134f40199105d2fee1b066741aa870b3ce75ee74018d9c8508bbb182/sqlglot-28.0.0-py3-none-any.whl", hash = "sha256:ac1778e7fa4812f4f7e5881b260632fc167b00ca4c1226868891fb15467122e4", size = 536127, upload-time = "2025-11-17T10:34:55.192Z" },
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/2c/e8/bd016214348f65ba31107c1b81af70fc7662d96758052d5d59b516fd3858/sqlglot-28.1.0-py3-none-any.whl", hash = "sha256:2a895a31666ba947c686caa980624c82bcd0e6fdf59b4fdb9e47108bd092d1ac", size = 547889, upload-time = "2025-12-02T16:52:26.019Z" },
]
[package.optional-dependencies]
@ -6726,40 +6731,40 @@ rs = [
[[package]]
name = "sqlglotrs"
version = "0.7.3"
version = "0.8.0"
source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" }
sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/87/5a/46d8efeda45be6ce1c630229455f000cafedea6129b47e6cfab39ff462f5/sqlglotrs-0.7.3.tar.gz", hash = "sha256:caadc572c8a194f99d6ba44d02f9ada0110e3d47cca3330c81f4aa608f1143eb", size = 15888, upload-time = "2025-10-13T06:33:57.322Z" }
sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/d5/37/118f24c367fde662e6c1181327dc9c16d08914108904c69bac3a6ba12c52/sqlglotrs-0.8.0.tar.gz", hash = "sha256:2b9a23c580d82be2388ee23496230cfc667f280ed0ed7eaa099d0da8d718cbf2", size = 15706, upload-time = "2025-12-02T16:58:38.197Z" }
wheels = [
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/9d/95/f08e01f54e521a286fcd9f7a8bdd178eabcddd9dbc6d6c15dc983c7be8dd/sqlglotrs-0.7.3-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:7acc6dba37af53d9cf1e3217fdd719878dbfaaf2a578ad7b3fbc07ef9dadd035", size = 314621, upload-time = "2025-10-13T06:33:48.917Z" },
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/98/7d/01a5db15e413ab587816448f1222286d3a10f0465954d21f5d2915aaeed5/sqlglotrs-0.7.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:3cbfb42071422afbd7376d70b93a969e86fb74752efe98dd66ee6d2ae27a9665", size = 300189, upload-time = "2025-10-13T06:33:40.963Z" },
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/08/21/94d1fb647a394afcb09a9174f7bff078452bb956e6898093dd9ee459ef2b/sqlglotrs-0.7.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:07500421de9dea8dfc0cd6769145df754178fc2ae5a3692bdbf5d37aebc0712a", size = 332771, upload-time = "2025-10-13T06:32:45.992Z" },
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/29/d1/ccade8e794304c925e9b94e1d7bff4c56896f571a291a03bfd96048c4a0f/sqlglotrs-0.7.3-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:792eb179a742d7d72d1d47c9a50e073078f0133e9191bd07920945dcc9170844", size = 342960, upload-time = "2025-10-13T06:32:55.493Z" },
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/1e/2f/2ff3cfe7d91ac3762100e511c4eff0c98824970d7c27e18e88c44a4d4567/sqlglotrs-0.7.3-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f4c3849992e33e47403c2517d464564e4b4cf6a080ad761141504e271ab2c7cd", size = 487268, upload-time = "2025-10-13T06:33:13.784Z" },
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/3f/d7/a95fbdd26f20b7bd5781bb5a4c51616fdd59f1c521010f668ffd54e59f5d/sqlglotrs-0.7.3-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:016f51409ed3d87c33ca5a31dd6766e75a809909e418a0ffd2965e0ae7b84a7b", size = 365853, upload-time = "2025-10-13T06:33:23.415Z" },
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/53/7a/5d50d0b1167c79a509957d58a6bf9f6450f894e0bc233987cb85ccaec50f/sqlglotrs-0.7.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:94dd711ea2ba76e664dab3e7f7b08cb5517cf5164fd94a552598acfd1f6df59a", size = 343697, upload-time = "2025-10-13T06:33:32.542Z" },
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/e5/89/85acbd412a5c7ef39ee5a96f5be28d6d38bce2c4521a264c747361b4c021/sqlglotrs-0.7.3-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:517198977f3baece667513326e42545b00b2878719922c58fcbfa21553f1338d", size = 363446, upload-time = "2025-10-13T06:33:03.995Z" },
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/37/4d/0a04f29731b6fda327bd11495c143ce70d1a7446b22440a32d8571408a06/sqlglotrs-0.7.3-cp310-cp310-win32.whl", hash = "sha256:1e9121ef3a64dc7d18e500e5e93df458a9bb6f4111b8f8569d5e4f8db21e61d2", size = 183997, upload-time = "2025-10-13T06:33:58.579Z" },
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/0e/16/0e95fa77409da059c951c6be11d4d73311c60bb5ed82f1d40a4afc9a1aa9/sqlglotrs-0.7.3-cp310-cp310-win_amd64.whl", hash = "sha256:48fd7e9efef56331e1ef7402b6d65113c087da1cfe2ef80d143ee62046d49056", size = 195923, upload-time = "2025-10-13T06:34:06.676Z" },
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/82/41/fcd87de298b562947cb2592feb9df5794886a8fa24eab8a080a552aa0e4d/sqlglotrs-0.7.3-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:f2144fc8e472de2b165665c1370e7f0ca7f9400f60ca5e78c7aedbb3233bc8d7", size = 314465, upload-time = "2025-10-13T06:33:50.219Z" },
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/14/81/22cf241e22f364c414d57893fad9cfea869f8866189e75575a3862f1d329/sqlglotrs-0.7.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:93cb74928b205da3f29f2b9c728d2c6656ad30e1ef500560f6c851bca2129fbc", size = 300129, upload-time = "2025-10-13T06:33:42.205Z" },
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/a5/90/4e4220f8605c6fbca77dfad2052cdebf195099c99fd0684723677dcbf091/sqlglotrs-0.7.3-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a918137bacfa31529802063e349a99d5352f74c914beceb14689cd2864c5e4d0", size = 332735, upload-time = "2025-10-13T06:32:48.095Z" },
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/3b/35/abe3cb6aa197b5193fcb446ab69465b5927e09e281b2c05f4e12249fd335/sqlglotrs-0.7.3-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:c3fd0edbd957d136c67919ead10c90d832da1aedbbedc6da899d173fe78bf600", size = 342779, upload-time = "2025-10-13T06:32:56.782Z" },
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/22/71/670ad31f4dbfe594592a1992c4e7a62003dc47dffb15d96b2fec4137f933/sqlglotrs-0.7.3-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:5a361a1dd8c55fbc57f81db738658141cab723509cc1b3edcc871bccfbba0cfb", size = 487344, upload-time = "2025-10-13T06:33:15.095Z" },
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/f4/73/86e46b762b615c7cdec489e4b0670d2a04ea6fab0c0be30a5756e95f108f/sqlglotrs-0.7.3-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:c698af6379475c243a8f190845bf1d1267a2c9867011a4567d5cfdcc5b0eb094", size = 366062, upload-time = "2025-10-13T06:33:25.183Z" },
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/dc/07/b4dd7315df7d975c4b82d09106eb73ea2ee8f3734f764889913636e9d68c/sqlglotrs-0.7.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:75d63ed29058c56f153912c90811d8af1706d81f0c759883baeb21acb6322969", size = 343642, upload-time = "2025-10-13T06:33:33.826Z" },
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/37/84/2e834fc665236ef6b0fced14d75c8e9eb0db471d96fde539d8c37ce3a10f/sqlglotrs-0.7.3-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:4e19dee6dc46c4d84c556ae456fa0c6400edb157528fd369670b3d041b54ef21", size = 363731, upload-time = "2025-10-13T06:33:05.913Z" },
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/ad/db/b7063b1240a1c39bc5627880dbb80c9e3f7b5548a17962d3a6bf98239171/sqlglotrs-0.7.3-cp311-cp311-win32.whl", hash = "sha256:f1276d0f02eaefbdd149b614f6c21fb9be372d7e1137f19c3d5f9e50662367b3", size = 183607, upload-time = "2025-10-13T06:33:59.858Z" },
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/09/98/e9cb2b3dd4abb34d2ae71747f113bf12f741a86fa29e661f1f09ba8376d0/sqlglotrs-0.7.3-cp311-cp311-win_amd64.whl", hash = "sha256:ccf05fc6e89523cf5819982fab12b8fe07a9656dbb5356fc4b56b562e734c202", size = 196050, upload-time = "2025-10-13T06:34:07.921Z" },
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/23/3f/3b059058e198b2fb6612d0ddaad5431a796d7081d40b21f12273ea1b26dc/sqlglotrs-0.7.3-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:2e7be55bf719b5ebdc344a996b6d27b9a0ba9bae0a09462900805e2f7dc4dca5", size = 310987, upload-time = "2025-10-13T06:33:51.874Z" },
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/47/b6/0058b2fe4f4813d9f3280d65ace97a637e8edd152be2a13bb1782c5c2eff/sqlglotrs-0.7.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:6fef415993e1843201a57916f310b49e79669db379ff38094161fa93be2ffdf2", size = 296829, upload-time = "2025-10-13T06:33:43.838Z" },
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/f3/a8/35c593b03bf498876aea68ea944a7e7bb9cf648e68984f55795181c928dd/sqlglotrs-0.7.3-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e980354e576e852c53e0bb5444b04ebb6459054074bce8012cc3385dd3d116ed", size = 332313, upload-time = "2025-10-13T06:32:49.343Z" },
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/e9/bc/534e21a233846d33d6b55100485bf1844d301b0b75deded5310ef9cd171f/sqlglotrs-0.7.3-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:1444b260c040cc80697956629f3fd3adece0bdb4f83bae22cd618ca3f18c4de8", size = 342309, upload-time = "2025-10-13T06:32:58.031Z" },
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/71/63/1d7bd7de87f01adb43cd1710d3fd5b9d5b0b3fea160bbeadc340fe1a9132/sqlglotrs-0.7.3-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:3202c6f00145b8adb4632c1bb5071be5aa362829054653bac058dbcdbc6228e7", size = 484954, upload-time = "2025-10-13T06:33:16.697Z" },
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/6f/bd/10126c9f59fb4f8fa51bf3f0ad17895b953bd09e1687986d5d9e110758c8/sqlglotrs-0.7.3-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:17ae27e895f0ed960e28e76028c84758ff00df24e598654df3b5f22de8c7fc30", size = 366874, upload-time = "2025-10-13T06:33:26.888Z" },
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/a3/fa/f12a1eb9c22cdce962bafebefea58e898c19bae3d21e9b79d6e811a2951d/sqlglotrs-0.7.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9a36c3d55b913c09dc31572ca7d5b423e85d761f1b3c9d8f86e2a1433a2f20d5", size = 342990, upload-time = "2025-10-13T06:33:35.478Z" },
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/86/1d/2bd1c8900d7a081a61a1c424785fd1a1452def751bc212630251423d80ce/sqlglotrs-0.7.3-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:94875611a2a953c06e8152b1d485f4d20ec61b72ebd848f96c04aca66b30f189", size = 362603, upload-time = "2025-10-13T06:33:07.507Z" },
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/18/3a/9c176a7f9b93d78168b3d137db4704a703cb51b32edb29d231b615130b47/sqlglotrs-0.7.3-cp312-cp312-win32.whl", hash = "sha256:64a708c87d1bea1f4bdb3edf0a3e94ea1b908ed229502da93621a4a50ef20189", size = 183180, upload-time = "2025-10-13T06:34:01.017Z" },
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/df/ea/37757060d3caadb22509d349087e1e16a2dcc4c1649a00d2d6b501f8ff50/sqlglotrs-0.7.3-cp312-cp312-win_amd64.whl", hash = "sha256:fe1ef1bedbb34b87dfb4e99a911026f8895ff2514b222cfd82cd08033406de2e", size = 195746, upload-time = "2025-10-13T06:34:09.478Z" },
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/d7/88/7fc59c146118603e06abf69dc19c237ef496a8dd936e5c224fdffc7df120/sqlglotrs-0.8.0-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:3db8f75b8efe5b94ed5540c13b80ef0a3e64c0d15864b05a6bccf5554c6e6008", size = 318097, upload-time = "2025-12-02T16:58:30.763Z" },
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/44/9a/7c0103f02b371f49f6ade420519d54c11c7e3ae4dcf22a855b9c71ccb546/sqlglotrs-0.8.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:37d00b69814fdabd4256be955d66e699afa1c50740f03369503d85f90245af35", size = 306820, upload-time = "2025-12-02T16:58:23.714Z" },
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/4a/cf/52de2a02a52976dfbd863ec57a3fafaf018a9536114f195404d51717501d/sqlglotrs-0.8.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:631da494550442ec2c7139993f59d854e4d4a44282b568594b5fc50818bc4736", size = 341540, upload-time = "2025-12-02T16:57:33.009Z" },
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/b8/89/072a295c3b98322a3d08d85ed47551c1f080309f2cde2d2fa75bd1964621/sqlglotrs-0.8.0-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:6b624e0650067cc006d8a0595e07be3ac91599187ee353313eb9f114ca434e44", size = 350048, upload-time = "2025-12-02T16:57:41.477Z" },
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/fd/b2/fbc05eef045124a9e5820812ddd641ec42add5e52f12126a85d942b0f166/sqlglotrs-0.8.0-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:6c0c5ae335b1917aa101d7cfe1aacbedf3b54f489d2038e94c8f42ffe5bd304a", size = 474032, upload-time = "2025-12-02T16:58:00.344Z" },
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/b2/a8/1472a5d5f849803fb2ad566ae43db8e5c9f3b1686b104dda245e4acfd963/sqlglotrs-0.8.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:21d145e9fef6e2e53fdf17f9b6ab7e7fbba26064365c56d2103a41e95053d1d4", size = 365233, upload-time = "2025-12-02T16:58:08.102Z" },
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/ab/c8/ea700f277cba380c7919136a16e03f9f990f29da34c5404b861fbb8b6fd5/sqlglotrs-0.8.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9ed5d7afd8b6b244c33316cc292122f26c20bf9677907bc5790c1b053097aff4", size = 348452, upload-time = "2025-12-02T16:58:15.863Z" },
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/60/f7/ba63c7cabcd71abed855e7a4cecb4b0df297bf17d315ff39eacf94926378/sqlglotrs-0.8.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:185442ad85a125719bf365a238c2b357c079cb5a13392adbbde172b1a0073410", size = 371656, upload-time = "2025-12-02T16:57:51.329Z" },
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/5c/dc/1ba05670afe7f4c7e651f972f4738dc4508525bb67b9151cdf463b0ef55b/sqlglotrs-0.8.0-cp310-cp310-win32.whl", hash = "sha256:a7d3f36d9c53090842ae18de6d96bd7634d73584255014983aad998f2b7dc95f", size = 188554, upload-time = "2025-12-02T16:58:39.078Z" },
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/33/fc/a393a837a9e09411da87cf8ee2d9f190e3bad37d289cd385e3791356a788/sqlglotrs-0.8.0-cp310-cp310-win_amd64.whl", hash = "sha256:c8a5e3c8870323666e9695be7cc65f710ed437ceea572e69e2b14e63b70f21b2", size = 200973, upload-time = "2025-12-02T16:58:46.02Z" },
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/3d/be/a6a8e41e59813663baf02b23534d822b62521d018ee740f132b4547c4239/sqlglotrs-0.8.0-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:0267b0121073669d1184bc0441779559e6b0c6067a12571b63befa2a9b4b0f77", size = 318016, upload-time = "2025-12-02T16:58:32.555Z" },
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/08/02/bf65a608b2caf268d81073171196f93beed8d32731ebda1288153dec2b73/sqlglotrs-0.8.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:30c1a2fa22a3ae4b38c7df9abbf14b2473f7e71c859c95bc270bd4a169688380", size = 306527, upload-time = "2025-12-02T16:58:24.853Z" },
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/39/98/32de2ad5ea9310e220baabfb6b2ee1e3c7ebb3b83a1db9bd2acdf72de6a5/sqlglotrs-0.8.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e7df3d2117c92004aa20082d71fbbd1735f063f123354d32d0b2b602ab4e1353", size = 341821, upload-time = "2025-12-02T16:57:34.854Z" },
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/3b/99/64247cb3b9f99ca09aafa11791fe250326d498b194795af91cc957003852/sqlglotrs-0.8.0-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:ecd7fdfd1be44828a8a8046ee743ffbaf93a972d7a125ff13e4673bb659fcf2c", size = 350003, upload-time = "2025-12-02T16:57:42.659Z" },
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/d0/91/bc15e4d2322cc28f4f94e519b2ae927ba42844830efaacf973ff774d8e06/sqlglotrs-0.8.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:171df6454f3dc064b89895c51cfb713163188493b36b845bf7c17df0e5702095", size = 474163, upload-time = "2025-12-02T16:58:01.554Z" },
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/93/8e/736451fc39f68f1e394a90d768dd9c8135412669ea3460e47033308cbb2e/sqlglotrs-0.8.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:497472ed07445a693e2699fd6f1b8ed5b8320488ade6a4a8e476664ee93ea51c", size = 365088, upload-time = "2025-12-02T16:58:09.604Z" },
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/97/2c/214f352fe03652b08873dcb8f4e6799a02be71446bdf9fea99ce13a502f3/sqlglotrs-0.8.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b2be9add4daed501e28564208b30d4a772dfd6aaa1ad10dadd2d49f4e851f9fa", size = 348368, upload-time = "2025-12-02T16:58:17.363Z" },
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/e6/22/c445428a52d053a6f6b31858ac817afb997316e9f0ab2ee3187a10bd85a4/sqlglotrs-0.8.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:871d5ee6414f2d7116b670d0430c16f5b3d5a96480c274f7f3d50d97dbea7601", size = 371720, upload-time = "2025-12-02T16:57:52.71Z" },
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/20/b2/301261db4ac543891f897b58a036e87ff33158ea4eda050ee0e08ae0083a/sqlglotrs-0.8.0-cp311-cp311-win32.whl", hash = "sha256:1bbe94effd9d64a8bdca12e0f14b28388059cb5a381561bac07aafedc8c63761", size = 188284, upload-time = "2025-12-02T16:58:40.21Z" },
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/c8/a1/0534075d3b8a7c8ab8eff4ea7ba0338a2ef76e3d2e49105b189049430e99/sqlglotrs-0.8.0-cp311-cp311-win_amd64.whl", hash = "sha256:05a5098ec2836799c4c43b06df7c68a2b4c19c0fce042a66706fe3edc957459d", size = 201117, upload-time = "2025-12-02T16:58:47.14Z" },
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/fd/20/7beddfd545aaebbfee10a77ac8ef8a205ff597f9ce041c4b0437d0194392/sqlglotrs-0.8.0-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:fcb53f27cf4b9cae8a66c5777b84eeb3d079e96bcb4277b627fd90bfd1a591b5", size = 314699, upload-time = "2025-12-02T16:58:33.82Z" },
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/47/6f/6223a1946fe24a979b8af3c7ae2d16c5451d8f35f2468782bd4af2c122da/sqlglotrs-0.8.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:4da1480cc288e02bd459e4638f212fa86a1fef81eb2cd69e6fdbdeb64e3df729", size = 303385, upload-time = "2025-12-02T16:58:26.052Z" },
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/a0/98/55050208ef839cad740df6ca86f2f3ca895d469f6ce2040cba32d0b6c4a0/sqlglotrs-0.8.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:dc4a77df178b0ba242aba0e7cd775c3f9aef0fa79dfc31c6e642431ce690f51f", size = 341580, upload-time = "2025-12-02T16:57:36.197Z" },
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/eb/f2/6f1d207e629fd4810cc826cf419acc386f3d43d32987684730fbc2399503/sqlglotrs-0.8.0-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:a8647d20cc5a9ff39071786169b3f1acf56f266483fa55386111783bca335f04", size = 348451, upload-time = "2025-12-02T16:57:43.756Z" },
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/df/1b/fa8a0907471fe7be3754bac683a21c984b17672eef6958206473f683b63a/sqlglotrs-0.8.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1afdd6a0fa915b3aef7c801cbdc815bb39b3d6aecc4d5b04c4ce54d3f73d0013", size = 475703, upload-time = "2025-12-02T16:58:02.843Z" },
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/92/56/f020c9c48d68883f6e24d69d18fe386eafc5963bc3982cc45013ec9b1ba0/sqlglotrs-0.8.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:3b4c1edeb80f572cf3586b9a23d15f18f48ac8dc481eceabdbb85dc7dbf8a2ce", size = 365842, upload-time = "2025-12-02T16:58:10.847Z" },
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/61/7b/091464f8aa2232a2f33028f9c9a2cbea7c4e5719400656f203592d46264d/sqlglotrs-0.8.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9b6d819f2753804d55b10e4320df08350cd2739556572a97ed1b1d7fc939f194", size = 348397, upload-time = "2025-12-02T16:58:18.567Z" },
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/b7/1b/1b0cf0d41e8412786d1e80695778db799520223acf85c3ddc53c1200731f/sqlglotrs-0.8.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:dcf2cce002969cefb1466f2837c716d20fc9eac62b05043523fda25b3de4c444", size = 369756, upload-time = "2025-12-02T16:57:53.85Z" },
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/31/6e/d9e50472aa92736751abf3d6fcad1c793f0701f17a553ae787e4a7581a1d/sqlglotrs-0.8.0-cp312-cp312-win32.whl", hash = "sha256:5459235a25b30eae508bcaea8bc6ebc04610acd87e985ba4d602981a94078384", size = 187891, upload-time = "2025-12-02T16:58:41.57Z" },
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/3b/a2/21d09ff2065a7e883f8f68dcea57fb23f6f04ba7a193f2ac2895b5dfafae/sqlglotrs-0.8.0-cp312-cp312-win_amd64.whl", hash = "sha256:1e0de4fa8e6c54419bd63a1205f3218feb5e2649d72f1bc69c5261b6c333e63b", size = 200842, upload-time = "2025-12-02T16:58:48.181Z" },
]
[[package]]

View file

@ -10,6 +10,10 @@ interface DocPreviewerProps {
url: string;
}
// Word document preview component. Behavior:
// 1) Fetches the document as a Blob.
// 2) Detects .docx input via a ZIP header probe.
// 3) Renders .docx using Mammoth; presents a controlled "unsupported" notice for non-ZIP payloads.
export const DocPreviewer: React.FC<DocPreviewerProps> = ({
className,
url,
@ -17,6 +21,33 @@ export const DocPreviewer: React.FC<DocPreviewerProps> = ({
const [htmlContent, setHtmlContent] = useState<string>('');
const [loading, setLoading] = useState(false);
// Determines whether the Blob represents a .docx document by checking for the ZIP
// file signature ("PK") in the initial bytes. A valid .docx file is a ZIP container
// and always begins with:
// 50 4B 03 04 ("PK..")
//
// Legacy .doc files use the CFBF binary format, commonly starting with:
// D0 CF 11 E0 A1 B1 1A E1
//
// Note that some files distributed with a “.doc” extension may internally be .docx
// documents (e.g., renamed files or files produced by systems that export .docx
// content under a .doc filename). These files will still present the ZIP signature
// and are therefore treated as supported .docx payloads. The header inspection
// ensures correct routing regardless of filename or reported extension.
const isZipLikeBlob = async (blob: Blob): Promise<boolean> => {
try {
const headerSlice = blob.slice(0, 4);
const buf = await headerSlice.arrayBuffer();
const bytes = new Uint8Array(buf);
// ZIP files start with "PK" (0x50, 0x4B)
return bytes.length >= 2 && bytes[0] === 0x50 && bytes[1] === 0x4b;
} catch (e) {
console.error('Failed to inspect blob header', e);
return false;
}
};
const fetchDocument = async () => {
if (!url) return;
@ -36,24 +67,21 @@ export const DocPreviewer: React.FC<DocPreviewerProps> = ({
const contentType: string =
blob.type || (res as any).headers?.['content-type'] || '';
// ---- Detect legacy .doc via MIME or URL ----
const cleanUrl = url.split(/[?#]/)[0].toLowerCase();
const isDocMime = /application\/msword/i.test(contentType);
const isLegacyDocByUrl =
cleanUrl.endsWith('.doc') && !cleanUrl.endsWith('.docx');
const isLegacyDoc = isDocMime || isLegacyDocByUrl;
// Execution path selection: ZIP-like payloads are treated as .docx and rendered via Mammoth;
// non-ZIP payloads receive an explicit unsupported notice.
const looksLikeZip = await isZipLikeBlob(blob);
if (isLegacyDoc) {
// Do not call mammoth and do not throw an error; instead, show a note in the preview area
if (!looksLikeZip) {
// Non-ZIP payload (likely legacy .doc or another format): skip Mammoth processing.
setHtmlContent(`
<div class="flex h-full items-center justify-center">
<div class="border border-dashed border-border-normal rounded-xl p-8 max-w-2xl text-center">
<p class="text-2xl font-bold mb-4">
Preview not available for .doc files
Preview is not available for this Word document
</p>
<p class="italic text-sm text-muted-foreground leading-relaxed">
Mammoth does not support <code>.doc</code> documents.<br/>
Inline preview is unavailable.
Mammoth supports modern <code>.docx</code> files only.<br/>
The file header does not indicate a <code>.docx</code> ZIP archive.
</p>
</div>
</div>
@ -61,7 +89,7 @@ export const DocPreviewer: React.FC<DocPreviewerProps> = ({
return;
}
// ---- Standard .docx preview path ----
// ZIP-like payload: parse as .docx with Mammoth
const arrayBuffer = await blob.arrayBuffer();
const result = await mammoth.convertToHtml(
{ arrayBuffer },
@ -74,8 +102,7 @@ export const DocPreviewer: React.FC<DocPreviewerProps> = ({
setHtmlContent(styledContent);
} catch (err) {
// Only errors from the mammoth conversion path should surface here
message.error('Document parsing failed');
message.error('Failed to parse document.');
console.error('Error parsing document:', err);
} finally {
setLoading(false);

View file

@ -1,8 +1,9 @@
import React, { useEffect, useRef } from 'react';
import React, { useEffect, useRef, useState } from 'react';
import { useTranslation } from 'react-i18next';
import './css/cloud9_night.less';
import './css/index.less';
import { JsonEditorOptions, JsonEditorProps } from './interface';
const defaultConfig: JsonEditorOptions = {
mode: 'code',
modes: ['tree', 'code'],
@ -14,6 +15,7 @@ const defaultConfig: JsonEditorOptions = {
enableTransform: false,
indentation: 2,
};
const JsonEditor: React.FC<JsonEditorProps> = ({
value,
onChange,
@ -25,43 +27,62 @@ const JsonEditor: React.FC<JsonEditorProps> = ({
const editorRef = useRef<any>(null);
const { i18n } = useTranslation();
const currentLanguageRef = useRef<string>(i18n.language);
const [isLoading, setIsLoading] = useState(true);
useEffect(() => {
if (typeof window !== 'undefined') {
const JSONEditor = require('jsoneditor');
import('jsoneditor/dist/jsoneditor.min.css');
let isMounted = true;
if (containerRef.current) {
// Default configuration options
const defaultOptions: JsonEditorOptions = {
...defaultConfig,
language: i18n.language === 'zh' ? 'zh-CN' : 'en',
onChange: () => {
if (editorRef.current && onChange) {
try {
const updatedJson = editorRef.current.get();
onChange(updatedJson);
} catch (err) {
// Do not trigger onChange when parsing error occurs
console.error(err);
}
const initEditor = async () => {
if (typeof window !== 'undefined') {
try {
const JSONEditorModule = await import('jsoneditor');
const JSONEditor = JSONEditorModule.default || JSONEditorModule;
await import('jsoneditor/dist/jsoneditor.min.css');
if (isMounted && containerRef.current) {
// Default configuration options
const defaultOptions: JsonEditorOptions = {
...defaultConfig,
language: i18n.language === 'zh' ? 'zh-CN' : 'en',
onChange: () => {
if (editorRef.current && onChange) {
try {
const updatedJson = editorRef.current.get();
onChange(updatedJson);
} catch (err) {
// Do not trigger onChange when parsing error occurs
console.error(err);
}
}
},
...options, // Merge user provided options with defaults
};
editorRef.current = new JSONEditor(
containerRef.current,
defaultOptions,
);
if (value) {
editorRef.current.set(value);
}
},
...options, // Merge user provided options with defaults
};
editorRef.current = new JSONEditor(
containerRef.current,
defaultOptions,
);
if (value) {
editorRef.current.set(value);
setIsLoading(false);
}
} catch (error) {
console.error('Failed to load jsoneditor:', error);
if (isMounted) {
setIsLoading(false);
}
}
}
}
};
initEditor();
return () => {
isMounted = false;
if (editorRef.current) {
if (typeof editorRef.current.destroy === 'function') {
editorRef.current.destroy();
@ -92,26 +113,38 @@ const JsonEditor: React.FC<JsonEditorProps> = ({
}
// Recreate the editor with new language
const JSONEditor = require('jsoneditor');
const initEditorWithNewLanguage = async () => {
try {
const JSONEditorModule = await import('jsoneditor');
const JSONEditor = JSONEditorModule.default || JSONEditorModule;
const newOptions: JsonEditorOptions = {
...defaultConfig,
language: i18n.language === 'zh' ? 'zh-CN' : 'en',
onChange: () => {
if (editorRef.current && onChange) {
try {
const updatedJson = editorRef.current.get();
onChange(updatedJson);
} catch (err) {
// Do not trigger onChange when parsing error occurs
}
}
},
...options, // Merge user provided options with defaults
const newOptions: JsonEditorOptions = {
...defaultConfig,
language: i18n.language === 'zh' ? 'zh-CN' : 'en',
onChange: () => {
if (editorRef.current && onChange) {
try {
const updatedJson = editorRef.current.get();
onChange(updatedJson);
} catch (err) {
// Do not trigger onChange when parsing error occurs
}
}
},
...options, // Merge user provided options with defaults
};
editorRef.current = new JSONEditor(containerRef.current, newOptions);
editorRef.current.set(currentData);
} catch (error) {
console.error(
'Failed to reload jsoneditor with new language:',
error,
);
}
};
editorRef.current = new JSONEditor(containerRef.current, newOptions);
editorRef.current.set(currentData);
initEditorWithNewLanguage();
}
}, [i18n.language, value, onChange, options]);
@ -135,7 +168,13 @@ const JsonEditor: React.FC<JsonEditorProps> = ({
ref={containerRef}
style={{ height }}
className={`ace-tomorrow-night w-full border border-border-button rounded-lg overflow-hidden bg-bg-input ${className} `}
/>
>
{isLoading && (
<div className="flex items-center justify-center h-full">
<div className="text-text-secondary">Loading editor...</div>
</div>
)}
</div>
);
};

View file

@ -217,20 +217,23 @@ const MarkdownContent = ({
const docType = chunkItem?.doc_type;
return showImage(docType) ? (
<Image
id={imageId}
className={styles.referenceInnerChunkImage}
onClick={
documentId
? handleDocumentButtonClick(
documentId,
chunkItem,
fileExtension === 'pdf',
documentUrl,
)
: () => {}
}
></Image>
<section>
<Image
id={imageId}
className={styles.referenceInnerChunkImage}
onClick={
documentId
? handleDocumentButtonClick(
documentId,
chunkItem,
fileExtension === 'pdf',
documentUrl,
)
: () => {}
}
></Image>
<span className="text-accent-primary"> {imageId}</span>
</section>
) : (
<HoverCard key={i}>
<HoverCardTrigger>

View file

@ -220,20 +220,23 @@ function MarkdownContent({
const docType = chunkItem?.doc_type;
return showImage(docType) ? (
<Image
id={imageId}
className={styles.referenceInnerChunkImage}
onClick={
documentId
? handleDocumentButtonClick(
documentId,
chunkItem,
fileExtension === 'pdf',
documentUrl,
)
: () => {}
}
></Image>
<section>
<Image
id={imageId}
className={styles.referenceInnerChunkImage}
onClick={
documentId
? handleDocumentButtonClick(
documentId,
chunkItem,
fileExtension === 'pdf',
documentUrl,
)
: () => {}
}
></Image>
<span className="text-accent-primary">{imageId}</span>
</section>
) : (
<HoverCard key={i}>
<HoverCardTrigger>

6
web/src/custom.d.ts vendored
View file

@ -2,3 +2,9 @@ declare module '*.md' {
const content: string;
export default content;
}
declare module 'jsoneditor' {
const JSONEditor: any;
export default JSONEditor;
export = JSONEditor;
}

View file

@ -40,6 +40,7 @@ import { useDropdownManager } from './context';
import { AgentBackground } from '@/components/canvas/background';
import Spotlight from '@/components/spotlight';
import { useNodeLoading } from '../hooks/use-node-loading';
import {
useHideFormSheetOnNodeDeletion,
useShowDrawer,
@ -166,6 +167,8 @@ function AgentCanvas({ drawerVisible, hideDrawer }: IProps) {
});
const [lastSendLoading, setLastSendLoading] = useState(false);
const [currentSendLoading, setCurrentSendLoading] = useState(false);
const { handleBeforeDelete } = useBeforeDelete();
const { addCanvasNode, addNoteNode } = useAddNode(reactFlowInstance);
@ -182,6 +185,7 @@ function AgentCanvas({ drawerVisible, hideDrawer }: IProps) {
}, [chatVisible, clearEventList, currentTaskId, stopMessage]);
const setLastSendLoadingFunc = (loading: boolean, messageId: string) => {
setCurrentSendLoading(!!loading);
if (messageId === currentMessageId) {
setLastSendLoading(loading);
} else {
@ -249,7 +253,10 @@ function AgentCanvas({ drawerVisible, hideDrawer }: IProps) {
clearActiveDropdown,
removePlaceholderNode,
]);
const { lastNode, setDerivedMessages, startButNotFinishedNodeIds } =
useNodeLoading({
currentEventListWithoutMessageById,
});
return (
<div className={cn(styles.canvasWrapper, 'px-5 pb-5')}>
<svg
@ -285,7 +292,15 @@ function AgentCanvas({ drawerVisible, hideDrawer }: IProps) {
</marker>
</defs>
</svg>
<AgentInstanceContext.Provider value={{ addCanvasNode, showFormDrawer }}>
<AgentInstanceContext.Provider
value={{
addCanvasNode,
showFormDrawer,
lastNode,
currentSendLoading,
startButNotFinishedNodeIds,
}}
>
<ReactFlow
connectionMode={ConnectionMode.Loose}
nodes={nodes}
@ -380,9 +395,10 @@ function AgentCanvas({ drawerVisible, hideDrawer }: IProps) {
></FormSheet>
</AgentInstanceContext.Provider>
)}
{chatVisible && (
<AgentChatContext.Provider
value={{ showLogSheet, setLastSendLoadingFunc }}
value={{ showLogSheet, setLastSendLoadingFunc, setDerivedMessages }}
>
<AgentChatLogContext.Provider
value={{ addEventList, setCurrentMessageId }}

View file

@ -44,7 +44,7 @@ function InnerAgentNode({
return (
<ToolBar selected={selected} id={id} label={data.label}>
<NodeWrapper selected={selected}>
<NodeWrapper selected={selected} id={id}>
{isHeadAgent && (
<>
<LeftEndHandle></LeftEndHandle>

View file

@ -24,7 +24,7 @@ function InnerBeginNode({ data, id, selected }: NodeProps<IBeginNode>) {
const inputs: Record<string, BeginQuery> = get(data, 'form.inputs', {});
return (
<NodeWrapper selected={selected}>
<NodeWrapper selected={selected} id={id}>
<CommonHandle
type="source"
position={Position.Right}

View file

@ -18,7 +18,7 @@ export function InnerCategorizeNode({
const { positions } = useBuildCategorizeHandlePositions({ data, id });
return (
<ToolBar selected={selected} id={id} label={data.label}>
<NodeWrapper selected={selected}>
<NodeWrapper selected={selected} id={id}>
<LeftEndHandle></LeftEndHandle>
<NodeHeader id={id} name={data.name} label={data.label}></NodeHeader>

View file

@ -14,7 +14,7 @@ export function ExitLoopNode({ id, data, selected }: NodeProps<BaseNode<any>>) {
showRun={false}
showCopy={false}
>
<NodeWrapper selected={selected}>
<NodeWrapper selected={selected} id={id}>
<LeftEndHandle></LeftEndHandle>
<NodeHeader id={id} name={data.name} label={data.label}></NodeHeader>
</NodeWrapper>

View file

@ -23,7 +23,7 @@ function InnerFileNode({ data, id, selected }: NodeProps<IBeginNode>) {
const inputs: Record<string, BeginQuery> = get(data, 'form.inputs', {});
return (
<NodeWrapper selected={selected}>
<NodeWrapper selected={selected} id={id}>
<CommonHandle
type="source"
position={Position.Right}

View file

@ -26,7 +26,7 @@ function InnerRagNode({
showRun={needsSingleStepDebugging(data.label)}
showCopy={showCopyIcon(data.label)}
>
<NodeWrapper selected={selected}>
<NodeWrapper selected={selected} id={id}>
<LeftEndHandle></LeftEndHandle>
<CommonHandle
type="source"

View file

@ -16,7 +16,7 @@ function InnerMessageNode({ id, data, selected }: NodeProps<IMessageNode>) {
const messages: string[] = get(data, 'form.content', []);
return (
<ToolBar selected={selected} id={id} label={data.label}>
<NodeWrapper selected={selected}>
<NodeWrapper selected={selected} id={id}>
<LeftEndHandle></LeftEndHandle>
<NodeHeader
id={id}

View file

@ -1,9 +1,13 @@
import { cn } from '@/lib/utils';
import { HTMLAttributes } from 'react';
import { Loader } from 'lucide-react';
import { HTMLAttributes, useContext } from 'react';
import { AgentInstanceContext } from '../../context';
type IProps = HTMLAttributes<HTMLDivElement> & { selected?: boolean };
export function NodeWrapper({ children, className, selected }: IProps) {
export function NodeWrapper({ children, className, selected, id }: IProps) {
const { currentSendLoading, startButNotFinishedNodeIds = [] } =
useContext(AgentInstanceContext);
return (
<section
className={cn(
@ -12,6 +16,13 @@ export function NodeWrapper({ children, className, selected }: IProps) {
className,
)}
>
{id &&
startButNotFinishedNodeIds.indexOf(id as string) > -1 &&
currentSendLoading && (
<div className=" absolute right-0 left-0 top-0 flex items-start justify-end p-2">
<Loader size={12} className=" animate-spin" />
</div>
)}
{children}
</section>
);

View file

@ -19,7 +19,7 @@ function ParserNode({
}: NodeProps<BaseNode<ParserFormSchemaType>>) {
const { t } = useTranslation();
return (
<NodeWrapper selected={selected}>
<NodeWrapper selected={selected} id={id}>
<CommonHandle
id={NodeHandleId.End}
type="target"

View file

@ -27,7 +27,7 @@ function InnerRetrievalNode({
return (
<ToolBar selected={selected} id={id} label={data.label}>
<NodeWrapper selected={selected}>
<NodeWrapper selected={selected} id={id}>
<LeftEndHandle></LeftEndHandle>
<CommonHandle
id={NodeHandleId.Start}

View file

@ -25,7 +25,7 @@ function InnerSplitterNode({
showCopy={false}
showRun={false}
>
<NodeWrapper selected={selected}>
<NodeWrapper selected={selected} id={id}>
<CommonHandle
id={NodeHandleId.End}
type="target"

View file

@ -65,7 +65,7 @@ function InnerSwitchNode({ id, data, selected }: NodeProps<ISwitchNode>) {
const { positions } = useBuildSwitchHandlePositions({ data, id });
return (
<ToolBar selected={selected} id={id} label={data.label} showRun={false}>
<NodeWrapper selected={selected}>
<NodeWrapper selected={selected} id={id}>
<LeftEndHandle></LeftEndHandle>
<NodeHeader id={id} name={data.name} label={data.label}></NodeHeader>
<section className="gap-2.5 flex flex-col">

View file

@ -27,7 +27,7 @@ function TokenizerNode({
showRun={false}
showCopy={false}
>
<NodeWrapper selected={selected}>
<NodeWrapper selected={selected} id={id}>
<CommonHandle
id={NodeHandleId.End}
type="target"

View file

@ -44,7 +44,7 @@ function InnerToolNode({
);
return (
<NodeWrapper selected={selected}>
<NodeWrapper selected={selected} id={id}>
<Handle
id={NodeHandleId.End}
type="target"

View file

@ -13,8 +13,9 @@ import {
} from '@/hooks/use-agent-request';
import { useFetchUserInfo } from '@/hooks/use-user-setting-request';
import { buildMessageUuidWithRole } from '@/utils/chat';
import { memo, useCallback } from 'react';
import { memo, useCallback, useContext } from 'react';
import { useParams } from 'umi';
import { AgentChatContext } from '../context';
import DebugContent from '../debug-content';
import { useAwaitCompentData } from '../hooks/use-chat-logic';
import { useIsTaskMode } from '../hooks/use-get-begin-query';
@ -49,6 +50,9 @@ function AgentChatBox() {
canvasId: canvasId as string,
});
const { setDerivedMessages } = useContext(AgentChatContext);
setDerivedMessages?.(derivedMessages);
const isTaskMode = useIsTaskMode();
const handleUploadFile: NonNullable<FileUploadProps['onUpload']> =

View file

@ -1,6 +1,8 @@
import { INodeEvent } from '@/hooks/use-send-message';
import { IMessage } from '@/interfaces/database/chat';
import { RAGFlowNodeType } from '@/interfaces/database/flow';
import { HandleType, Position } from '@xyflow/react';
import { createContext } from 'react';
import { Dispatch, SetStateAction, createContext } from 'react';
import { useAddNode } from './hooks/use-add-node';
import { useCacheChatLog } from './hooks/use-cache-chat-log';
import { useShowFormDrawer, useShowLogSheet } from './hooks/use-show-drawer';
@ -13,7 +15,11 @@ type AgentInstanceContextType = Pick<
ReturnType<typeof useAddNode>,
'addCanvasNode'
> &
Pick<ReturnType<typeof useShowFormDrawer>, 'showFormDrawer'>;
Pick<ReturnType<typeof useShowFormDrawer>, 'showFormDrawer'> & {
lastNode: INodeEvent | null;
currentSendLoading: boolean;
startButNotFinishedNodeIds: string[];
};
export const AgentInstanceContext = createContext<AgentInstanceContextType>(
{} as AgentInstanceContextType,
@ -22,7 +28,10 @@ export const AgentInstanceContext = createContext<AgentInstanceContextType>(
type AgentChatContextType = Pick<
ReturnType<typeof useShowLogSheet>,
'showLogSheet'
> & { setLastSendLoadingFunc: (loading: boolean, messageId: string) => void };
> & {
setLastSendLoadingFunc: (loading: boolean, messageId: string) => void;
setDerivedMessages: Dispatch<SetStateAction<IMessage[] | undefined>>;
};
export const AgentChatContext = createContext<AgentChatContextType>(
{} as AgentChatContextType,

View file

@ -55,7 +55,7 @@ const FormSheet = ({
<Sheet open={visible} modal={false}>
<SheetContent
className={cn('top-20 p-0 flex flex-col pb-20', {
'right-[620px]': chatVisible,
'right-[clamp(0px,34%,620px)]': chatVisible,
})}
closeIcon={false}
>

View file

@ -0,0 +1,88 @@
import {
INodeData,
INodeEvent,
MessageEventType,
} from '@/hooks/use-send-message';
import { IMessage } from '@/interfaces/database/chat';
import { useCallback, useMemo, useState } from 'react';
export const useNodeLoading = ({
currentEventListWithoutMessageById,
}: {
currentEventListWithoutMessageById: (messageId: string) => INodeEvent[];
}) => {
const [derivedMessages, setDerivedMessages] = useState<IMessage[]>();
const lastMessageId = useMemo(() => {
return derivedMessages?.[derivedMessages?.length - 1]?.id;
}, [derivedMessages]);
const currentEventListWithoutMessage = useMemo(() => {
if (!lastMessageId) {
return [];
}
return currentEventListWithoutMessageById(lastMessageId);
}, [currentEventListWithoutMessageById, lastMessageId]);
const startedNodeList = useMemo(() => {
const duplicateList = currentEventListWithoutMessage?.filter(
(x) => x.event === MessageEventType.NodeStarted,
) as INodeEvent[];
// Remove duplicate nodes
return duplicateList?.reduce<Array<INodeEvent>>((pre, cur) => {
if (pre.every((x) => x.data.component_id !== cur.data.component_id)) {
pre.push(cur);
}
return pre;
}, []);
}, [currentEventListWithoutMessage]);
const filterFinishedNodeList = useCallback(() => {
const nodeEventList = currentEventListWithoutMessage
.filter(
(x) => x.event === MessageEventType.NodeFinished,
// x.event === MessageEventType.NodeFinished &&
// (x.data as INodeData)?.component_id === componentId,
)
.map((x) => x.data);
return nodeEventList;
}, [currentEventListWithoutMessage]);
const lastNode = useMemo(() => {
if (!startedNodeList) {
return null;
}
return startedNodeList[startedNodeList.length - 1];
}, [startedNodeList]);
const startNodeIds = useMemo(() => {
if (!startedNodeList) {
return [];
}
return startedNodeList.map((x) => x.data.component_id);
}, [startedNodeList]);
const finishNodeIds = useMemo(() => {
if (!lastNode) {
return [];
}
const nodeDataList = filterFinishedNodeList();
const finishNodeIdsTemp = nodeDataList.map(
(x: INodeData) => x.component_id,
);
return Array.from(new Set(finishNodeIdsTemp));
}, [lastNode, filterFinishedNodeList]);
const startButNotFinishedNodeIds = useMemo(() => {
return startNodeIds.filter((x) => !finishNodeIds.includes(x));
}, [finishNodeIds, startNodeIds]);
return {
lastNode,
startButNotFinishedNodeIds,
filterFinishedNodeList,
setDerivedMessages,
};
};

View file

@ -26,7 +26,7 @@ export function LogSheet({
return (
<Sheet open onOpenChange={hideModal} modal={false}>
<SheetContent
className={cn('top-20 right-[620px]')}
className={cn('top-20 right-[clamp(0px,34%,620px)]')}
onInteractOutside={(e) => e.preventDefault()}
>
<SheetHeader>

View file

@ -0,0 +1,191 @@
import { useEffect, useMemo } from 'react';
import { ControllerRenderProps, useFormContext } from 'react-hook-form';
import { Checkbox } from '@/components/ui/checkbox';
import { Input } from '@/components/ui/input';
import { cn } from '@/lib/utils';
/* ---------------- Token Field ---------------- */
export type ConfluenceTokenFieldProps = ControllerRenderProps & {
fieldType: 'username' | 'token';
placeholder?: string;
disabled?: boolean;
};
const ConfluenceTokenField = ({
fieldType,
value,
onChange,
placeholder,
disabled,
...rest
}: ConfluenceTokenFieldProps) => {
return (
<div className="flex w-full flex-col gap-2">
<Input
className="w-full"
type={fieldType === 'token' ? 'password' : 'text'}
value={value ?? ''}
onChange={(e) => onChange(e.target.value)}
placeholder={
placeholder ||
(fieldType === 'token'
? 'Enter your Confluence access token'
: 'Confluence username or email')
}
disabled={disabled}
{...rest}
/>
</div>
);
};
/* ---------------- Indexing Mode Field ---------------- */
type ConfluenceIndexingMode = 'everything' | 'space' | 'page';
export type ConfluenceIndexingModeFieldProps = ControllerRenderProps;
export const ConfluenceIndexingModeField = (
fieldProps: ConfluenceIndexingModeFieldProps,
) => {
const { value, onChange, disabled } = fieldProps;
const { watch, setValue } = useFormContext();
const mode = useMemo<ConfluenceIndexingMode>(
() => (value as ConfluenceIndexingMode) || 'everything',
[value],
);
const spaceValue = watch('config.space');
const pageIdValue = watch('config.page_id');
const indexRecursively = watch('config.index_recursively');
useEffect(() => {
if (!value) onChange('everything');
}, [value, onChange]);
const handleModeChange = (nextMode?: string) => {
const normalized = (nextMode || 'everything') as ConfluenceIndexingMode;
onChange(normalized);
if (normalized === 'everything') {
setValue('config.space', '', { shouldDirty: true, shouldTouch: true });
setValue('config.page_id', '', { shouldDirty: true, shouldTouch: true });
setValue('config.index_recursively', false, {
shouldDirty: true,
shouldTouch: true,
});
} else if (normalized === 'space') {
setValue('config.page_id', '', { shouldDirty: true, shouldTouch: true });
setValue('config.index_recursively', false, {
shouldDirty: true,
shouldTouch: true,
});
} else if (normalized === 'page') {
setValue('config.space', '', { shouldDirty: true, shouldTouch: true });
}
};
return (
<div className="w-full rounded-lg border border-border-button bg-bg-card p-4 space-y-4">
<div className="flex items-center gap-2 text-sm font-medium text-text-secondary">
{INDEX_MODE_OPTIONS.map((option) => {
const isActive = option.value === mode;
return (
<button
key={option.value}
type="button"
disabled={disabled}
onClick={() => handleModeChange(option.value)}
className={cn(
'flex-1 rounded-lg border px-3 py-2 transition-all',
'border-transparent bg-transparent text-text-secondary hover:border-border-button hover:bg-bg-card-secondary',
isActive &&
'border-border-button bg-background text-primary shadow-sm',
)}
>
{option.label}
</button>
);
})}
</div>
{mode === 'everything' && (
<p className="text-sm text-text-secondary">
This connector will index all pages the provided credentials have
access to.
</p>
)}
{mode === 'space' && (
<div className="space-y-2">
<div className="text-sm font-semibold text-text-primary">
Space Key
</div>
<Input
className="w-full"
value={spaceValue ?? ''}
onChange={(e) =>
setValue('config.space', e.target.value, {
shouldDirty: true,
shouldTouch: true,
})
}
placeholder="e.g. KB"
disabled={disabled}
/>
<p className="text-xs text-text-secondary">
The Confluence space key to index.
</p>
</div>
)}
{mode === 'page' && (
<div className="space-y-2">
<div className="text-sm font-semibold text-text-primary">Page ID</div>
<Input
className="w-full"
value={pageIdValue ?? ''}
onChange={(e) =>
setValue('config.page_id', e.target.value, {
shouldDirty: true,
shouldTouch: true,
})
}
placeholder="e.g. 123456"
disabled={disabled}
/>
<p className="text-xs text-text-secondary">
The Confluence page ID to index.
</p>
<div className="flex items-center gap-2 pt-2">
<Checkbox
checked={Boolean(indexRecursively)}
onCheckedChange={(checked) =>
setValue('config.index_recursively', Boolean(checked), {
shouldDirty: true,
shouldTouch: true,
})
}
disabled={disabled}
/>
<span className="text-sm text-text-secondary">
Index child pages recursively
</span>
</div>
</div>
)}
</div>
);
};
const INDEX_MODE_OPTIONS = [
{ label: 'Everything', value: 'everything' },
{ label: 'Space', value: 'space' },
{ label: 'Page', value: 'page' },
];
export default ConfluenceTokenField;

View file

@ -1,9 +1,9 @@
import { FormFieldType } from '@/components/dynamic-form';
import SvgIcon from '@/components/svg-icon';
import { t } from 'i18next';
import { ConfluenceIndexingModeField } from './component/confluence-token-field';
import GmailTokenField from './component/gmail-token-field';
import GoogleDriveTokenField from './component/google-drive-token-field';
export enum DataSourceKey {
CONFLUENCE = 'confluence',
S3 = 's3',
@ -230,12 +230,35 @@ export const DataSourceFormFields = {
required: false,
tooltip: t('setting.confluenceIsCloudTip'),
},
{
label: 'Index Method',
name: 'config.index_mode',
type: FormFieldType.Text, // keep as text so RHF registers it
required: false,
horizontal: true,
labelClassName: 'self-start pt-4',
render: (fieldProps) => <ConfluenceIndexingModeField {...fieldProps} />,
},
{
label: 'Space Key',
name: 'config.space',
type: FormFieldType.Text,
required: false,
tooltip: t('setting.confluenceSpaceKeyTip'),
hidden: true,
},
{
label: 'Page ID',
name: 'config.page_id',
type: FormFieldType.Text,
required: false,
hidden: true,
},
{
label: 'Index Recursively',
name: 'config.index_recursively',
type: FormFieldType.Checkbox,
required: false,
hidden: true,
},
],
[DataSourceKey.GOOGLE_DRIVE]: [