Merge branch 'main' into feature/websocket-streaming-api
This commit is contained in:
commit
82d621c111
54 changed files with 1009 additions and 557006 deletions
|
|
@ -10,11 +10,10 @@ WORKDIR /ragflow
|
|||
# Copy models downloaded via download_deps.py
|
||||
RUN mkdir -p /ragflow/rag/res/deepdoc /root/.ragflow
|
||||
RUN --mount=type=bind,from=infiniflow/ragflow_deps:latest,source=/huggingface.co,target=/huggingface.co \
|
||||
cp /huggingface.co/InfiniFlow/huqie/huqie.txt.trie /ragflow/rag/res/ && \
|
||||
tar --exclude='.*' -cf - \
|
||||
/huggingface.co/InfiniFlow/text_concat_xgb_v1.0 \
|
||||
/huggingface.co/InfiniFlow/deepdoc \
|
||||
| tar -xf - --strip-components=3 -C /ragflow/rag/res/deepdoc
|
||||
| tar -xf - --strip-components=3 -C /ragflow/rag/res/deepdoc
|
||||
|
||||
# https://github.com/chrismattmann/tika-python
|
||||
# This is the only way to run python-tika without internet access. Without this set, the default is to check the tika version and pull latest every time from Apache.
|
||||
|
|
|
|||
|
|
@ -91,9 +91,6 @@ class Graph:
|
|||
def load(self):
|
||||
self.components = self.dsl["components"]
|
||||
cpn_nms = set([])
|
||||
for k, cpn in self.components.items():
|
||||
cpn_nms.add(cpn["obj"]["component_name"])
|
||||
|
||||
for k, cpn in self.components.items():
|
||||
cpn_nms.add(cpn["obj"]["component_name"])
|
||||
param = component_class(cpn["obj"]["component_name"] + "Param")()
|
||||
|
|
|
|||
|
|
@ -18,7 +18,6 @@ import json
|
|||
import logging
|
||||
import os
|
||||
import re
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
from copy import deepcopy
|
||||
from functools import partial
|
||||
from typing import Any
|
||||
|
|
@ -30,8 +29,8 @@ from api.db.services.llm_service import LLMBundle
|
|||
from api.db.services.tenant_llm_service import TenantLLMService
|
||||
from api.db.services.mcp_server_service import MCPServerService
|
||||
from common.connection_utils import timeout
|
||||
from rag.prompts.generator import next_step, COMPLETE_TASK, analyze_task, \
|
||||
citation_prompt, reflect, rank_memories, kb_prompt, citation_plus, full_question, message_fit_in, structured_output_prompt
|
||||
from rag.prompts.generator import next_step_async, COMPLETE_TASK, analyze_task_async, \
|
||||
citation_prompt, reflect_async, kb_prompt, citation_plus, full_question, message_fit_in, structured_output_prompt
|
||||
from common.mcp_tool_call_conn import MCPToolCallSession, mcp_tool_metadata_to_openai_tool
|
||||
from agent.component.llm import LLMParam, LLM
|
||||
|
||||
|
|
@ -154,96 +153,19 @@ class Agent(LLM, ToolBase):
|
|||
|
||||
return None
|
||||
|
||||
def _force_format_to_schema(self, text: str, schema_prompt: str) -> str:
|
||||
async def _force_format_to_schema_async(self, text: str, schema_prompt: str) -> str:
|
||||
fmt_msgs = [
|
||||
{"role": "system", "content": schema_prompt + "\nIMPORTANT: Output ONLY valid JSON. No markdown, no extra text."},
|
||||
{"role": "user", "content": text},
|
||||
]
|
||||
_, fmt_msgs = message_fit_in(fmt_msgs, int(self.chat_mdl.max_length * 0.97))
|
||||
return self._generate(fmt_msgs)
|
||||
return await self._generate_async(fmt_msgs)
|
||||
|
||||
def _invoke(self, **kwargs):
|
||||
return asyncio.run(self._invoke_async(**kwargs))
|
||||
|
||||
@timeout(int(os.environ.get("COMPONENT_EXEC_TIMEOUT", 20*60)))
|
||||
def _invoke(self, **kwargs):
|
||||
if self.check_if_canceled("Agent processing"):
|
||||
return
|
||||
|
||||
if kwargs.get("user_prompt"):
|
||||
usr_pmt = ""
|
||||
if kwargs.get("reasoning"):
|
||||
usr_pmt += "\nREASONING:\n{}\n".format(kwargs["reasoning"])
|
||||
if kwargs.get("context"):
|
||||
usr_pmt += "\nCONTEXT:\n{}\n".format(kwargs["context"])
|
||||
if usr_pmt:
|
||||
usr_pmt += "\nQUERY:\n{}\n".format(str(kwargs["user_prompt"]))
|
||||
else:
|
||||
usr_pmt = str(kwargs["user_prompt"])
|
||||
self._param.prompts = [{"role": "user", "content": usr_pmt}]
|
||||
|
||||
if not self.tools:
|
||||
if self.check_if_canceled("Agent processing"):
|
||||
return
|
||||
return LLM._invoke(self, **kwargs)
|
||||
|
||||
prompt, msg, user_defined_prompt = self._prepare_prompt_variables()
|
||||
output_schema = self._get_output_schema()
|
||||
schema_prompt = ""
|
||||
if output_schema:
|
||||
schema = json.dumps(output_schema, ensure_ascii=False, indent=2)
|
||||
schema_prompt = structured_output_prompt(schema)
|
||||
|
||||
downstreams = self._canvas.get_component(self._id)["downstream"] if self._canvas.get_component(self._id) else []
|
||||
ex = self.exception_handler()
|
||||
if any([self._canvas.get_component_obj(cid).component_name.lower()=="message" for cid in downstreams]) and not (ex and ex["goto"]) and not output_schema:
|
||||
self.set_output("content", partial(self.stream_output_with_tools, prompt, msg, user_defined_prompt))
|
||||
return
|
||||
|
||||
_, msg = message_fit_in([{"role": "system", "content": prompt}, *msg], int(self.chat_mdl.max_length * 0.97))
|
||||
use_tools = []
|
||||
ans = ""
|
||||
for delta_ans, tk in self._react_with_tools_streamly(prompt, msg, use_tools, user_defined_prompt,schema_prompt=schema_prompt):
|
||||
if self.check_if_canceled("Agent processing"):
|
||||
return
|
||||
ans += delta_ans
|
||||
|
||||
if ans.find("**ERROR**") >= 0:
|
||||
logging.error(f"Agent._chat got error. response: {ans}")
|
||||
if self.get_exception_default_value():
|
||||
self.set_output("content", self.get_exception_default_value())
|
||||
else:
|
||||
self.set_output("_ERROR", ans)
|
||||
return
|
||||
|
||||
if output_schema:
|
||||
error = ""
|
||||
for _ in range(self._param.max_retries + 1):
|
||||
try:
|
||||
def clean_formated_answer(ans: str) -> str:
|
||||
ans = re.sub(r"^.*</think>", "", ans, flags=re.DOTALL)
|
||||
ans = re.sub(r"^.*```json", "", ans, flags=re.DOTALL)
|
||||
return re.sub(r"```\n*$", "", ans, flags=re.DOTALL)
|
||||
obj = json_repair.loads(clean_formated_answer(ans))
|
||||
self.set_output("structured", obj)
|
||||
if use_tools:
|
||||
self.set_output("use_tools", use_tools)
|
||||
return obj
|
||||
except Exception:
|
||||
error = "The answer cannot be parsed as JSON"
|
||||
ans = self._force_format_to_schema(ans, schema_prompt)
|
||||
if ans.find("**ERROR**") >= 0:
|
||||
continue
|
||||
|
||||
self.set_output("_ERROR", error)
|
||||
return
|
||||
|
||||
self.set_output("content", ans)
|
||||
if use_tools:
|
||||
self.set_output("use_tools", use_tools)
|
||||
return ans
|
||||
|
||||
async def _invoke_async(self, **kwargs):
|
||||
"""
|
||||
Async entry: reuse existing logic but offload heavy sync parts via async wrappers to reduce blocking.
|
||||
"""
|
||||
if self.check_if_canceled("Agent processing"):
|
||||
return
|
||||
|
||||
|
|
@ -262,7 +184,7 @@ class Agent(LLM, ToolBase):
|
|||
if not self.tools:
|
||||
if self.check_if_canceled("Agent processing"):
|
||||
return
|
||||
return await asyncio.to_thread(LLM._invoke, self, **kwargs)
|
||||
return await LLM._invoke_async(self, **kwargs)
|
||||
|
||||
prompt, msg, user_defined_prompt = self._prepare_prompt_variables()
|
||||
output_schema = self._get_output_schema()
|
||||
|
|
@ -274,13 +196,13 @@ class Agent(LLM, ToolBase):
|
|||
downstreams = self._canvas.get_component(self._id)["downstream"] if self._canvas.get_component(self._id) else []
|
||||
ex = self.exception_handler()
|
||||
if any([self._canvas.get_component_obj(cid).component_name.lower()=="message" for cid in downstreams]) and not (ex and ex["goto"]) and not output_schema:
|
||||
self.set_output("content", partial(self.stream_output_with_tools_async, prompt, msg, user_defined_prompt))
|
||||
self.set_output("content", partial(self.stream_output_with_tools_async, prompt, deepcopy(msg), user_defined_prompt))
|
||||
return
|
||||
|
||||
_, msg = message_fit_in([{"role": "system", "content": prompt}, *msg], int(self.chat_mdl.max_length * 0.97))
|
||||
use_tools = []
|
||||
ans = ""
|
||||
async for delta_ans, tk in self._react_with_tools_streamly_async(prompt, msg, use_tools, user_defined_prompt, schema_prompt=schema_prompt):
|
||||
async for delta_ans, _tk in self._react_with_tools_streamly_async(prompt, msg, use_tools, user_defined_prompt,schema_prompt=schema_prompt):
|
||||
if self.check_if_canceled("Agent processing"):
|
||||
return
|
||||
ans += delta_ans
|
||||
|
|
@ -308,7 +230,7 @@ class Agent(LLM, ToolBase):
|
|||
return obj
|
||||
except Exception:
|
||||
error = "The answer cannot be parsed as JSON"
|
||||
ans = self._force_format_to_schema(ans, schema_prompt)
|
||||
ans = await self._force_format_to_schema_async(ans, schema_prompt)
|
||||
if ans.find("**ERROR**") >= 0:
|
||||
continue
|
||||
|
||||
|
|
@ -320,28 +242,6 @@ class Agent(LLM, ToolBase):
|
|||
self.set_output("use_tools", use_tools)
|
||||
return ans
|
||||
|
||||
def stream_output_with_tools(self, prompt, msg, user_defined_prompt={}):
|
||||
_, msg = message_fit_in([{"role": "system", "content": prompt}, *msg], int(self.chat_mdl.max_length * 0.97))
|
||||
answer_without_toolcall = ""
|
||||
use_tools = []
|
||||
for delta_ans,_ in self._react_with_tools_streamly(prompt, msg, use_tools, user_defined_prompt):
|
||||
if self.check_if_canceled("Agent streaming"):
|
||||
return
|
||||
|
||||
if delta_ans.find("**ERROR**") >= 0:
|
||||
if self.get_exception_default_value():
|
||||
self.set_output("content", self.get_exception_default_value())
|
||||
yield self.get_exception_default_value()
|
||||
else:
|
||||
self.set_output("_ERROR", delta_ans)
|
||||
return
|
||||
answer_without_toolcall += delta_ans
|
||||
yield delta_ans
|
||||
|
||||
self.set_output("content", answer_without_toolcall)
|
||||
if use_tools:
|
||||
self.set_output("use_tools", use_tools)
|
||||
|
||||
async def stream_output_with_tools_async(self, prompt, msg, user_defined_prompt={}):
|
||||
_, msg = message_fit_in([{"role": "system", "content": prompt}, *msg], int(self.chat_mdl.max_length * 0.97))
|
||||
answer_without_toolcall = ""
|
||||
|
|
@ -365,64 +265,22 @@ class Agent(LLM, ToolBase):
|
|||
self.set_output("use_tools", use_tools)
|
||||
|
||||
async def _react_with_tools_streamly_async(self, prompt, history: list[dict], use_tools, user_defined_prompt={}, schema_prompt: str = ""):
|
||||
"""
|
||||
Async wrapper that offloads synchronous flow to a thread, yielding results without blocking the event loop.
|
||||
"""
|
||||
loop = asyncio.get_running_loop()
|
||||
queue: asyncio.Queue = asyncio.Queue()
|
||||
|
||||
def worker():
|
||||
try:
|
||||
for delta_ans, tk in self._react_with_tools_streamly(prompt, history, use_tools, user_defined_prompt, schema_prompt=schema_prompt):
|
||||
asyncio.run_coroutine_threadsafe(queue.put((delta_ans, tk)), loop)
|
||||
except Exception as e:
|
||||
asyncio.run_coroutine_threadsafe(queue.put(e), loop)
|
||||
finally:
|
||||
asyncio.run_coroutine_threadsafe(queue.put(StopAsyncIteration), loop)
|
||||
|
||||
await asyncio.to_thread(worker)
|
||||
|
||||
while True:
|
||||
item = await queue.get()
|
||||
if item is StopAsyncIteration:
|
||||
break
|
||||
if isinstance(item, Exception):
|
||||
raise item
|
||||
yield item
|
||||
|
||||
def _gen_citations(self, text):
|
||||
retrievals = self._canvas.get_reference()
|
||||
retrievals = {"chunks": list(retrievals["chunks"].values()), "doc_aggs": list(retrievals["doc_aggs"].values())}
|
||||
formated_refer = kb_prompt(retrievals, self.chat_mdl.max_length, True)
|
||||
for delta_ans in self._generate_streamly([{"role": "system", "content": citation_plus("\n\n".join(formated_refer))},
|
||||
{"role": "user", "content": text}
|
||||
]):
|
||||
yield delta_ans
|
||||
|
||||
def _react_with_tools_streamly(self, prompt, history: list[dict], use_tools, user_defined_prompt={}, schema_prompt: str = ""):
|
||||
token_count = 0
|
||||
tool_metas = self.tool_meta
|
||||
hist = deepcopy(history)
|
||||
last_calling = ""
|
||||
if len(hist) > 3:
|
||||
st = timer()
|
||||
user_request = full_question(messages=history, chat_mdl=self.chat_mdl)
|
||||
user_request = await asyncio.to_thread(full_question, messages=history, chat_mdl=self.chat_mdl)
|
||||
self.callback("Multi-turn conversation optimization", {}, user_request, elapsed_time=timer()-st)
|
||||
else:
|
||||
user_request = history[-1]["content"]
|
||||
|
||||
def use_tool(name, args):
|
||||
nonlocal hist, use_tools, token_count,last_calling,user_request
|
||||
async def use_tool_async(name, args):
|
||||
nonlocal hist, use_tools, last_calling
|
||||
logging.info(f"{last_calling=} == {name=}")
|
||||
# Summarize of function calling
|
||||
#if all([
|
||||
# isinstance(self.toolcall_session.get_tool_obj(name), Agent),
|
||||
# last_calling,
|
||||
# last_calling != name
|
||||
#]):
|
||||
# self.toolcall_session.get_tool_obj(name).add2system_prompt(f"The chat history with other agents are as following: \n" + self.get_useful_memory(user_request, str(args["user_prompt"]),user_defined_prompt))
|
||||
last_calling = name
|
||||
tool_response = self.toolcall_session.tool_call(name, args)
|
||||
tool_response = await self.toolcall_session.tool_call_async(name, args)
|
||||
use_tools.append({
|
||||
"name": name,
|
||||
"arguments": args,
|
||||
|
|
@ -433,7 +291,7 @@ class Agent(LLM, ToolBase):
|
|||
|
||||
return name, tool_response
|
||||
|
||||
def complete():
|
||||
async def complete():
|
||||
nonlocal hist
|
||||
need2cite = self._param.cite and self._canvas.get_reference()["chunks"] and self._id.find("-->") < 0
|
||||
if schema_prompt:
|
||||
|
|
@ -451,7 +309,7 @@ class Agent(LLM, ToolBase):
|
|||
if len(hist) > 12:
|
||||
_hist = [hist[0], hist[1], *hist[-10:]]
|
||||
entire_txt = ""
|
||||
for delta_ans in self._generate_streamly(_hist):
|
||||
async for delta_ans in self._generate_streamly_async(_hist):
|
||||
if not need2cite or cited:
|
||||
yield delta_ans, 0
|
||||
entire_txt += delta_ans
|
||||
|
|
@ -460,7 +318,7 @@ class Agent(LLM, ToolBase):
|
|||
|
||||
st = timer()
|
||||
txt = ""
|
||||
for delta_ans in self._gen_citations(entire_txt):
|
||||
async for delta_ans in self._gen_citations_async(entire_txt):
|
||||
if self.check_if_canceled("Agent streaming"):
|
||||
return
|
||||
yield delta_ans, 0
|
||||
|
|
@ -475,14 +333,14 @@ class Agent(LLM, ToolBase):
|
|||
hist.append({"role": "user", "content": content})
|
||||
|
||||
st = timer()
|
||||
task_desc = analyze_task(self.chat_mdl, prompt, user_request, tool_metas, user_defined_prompt)
|
||||
task_desc = await analyze_task_async(self.chat_mdl, prompt, user_request, tool_metas, user_defined_prompt)
|
||||
self.callback("analyze_task", {}, task_desc, elapsed_time=timer()-st)
|
||||
for _ in range(self._param.max_rounds + 1):
|
||||
if self.check_if_canceled("Agent streaming"):
|
||||
return
|
||||
response, tk = next_step(self.chat_mdl, hist, tool_metas, task_desc, user_defined_prompt)
|
||||
response, tk = await next_step_async(self.chat_mdl, hist, tool_metas, task_desc, user_defined_prompt)
|
||||
# self.callback("next_step", {}, str(response)[:256]+"...")
|
||||
token_count += tk
|
||||
token_count += tk or 0
|
||||
hist.append({"role": "assistant", "content": response})
|
||||
try:
|
||||
functions = json_repair.loads(re.sub(r"```.*", "", response))
|
||||
|
|
@ -491,23 +349,24 @@ class Agent(LLM, ToolBase):
|
|||
for f in functions:
|
||||
if not isinstance(f, dict):
|
||||
raise TypeError(f"An object type should be returned, but `{f}`")
|
||||
with ThreadPoolExecutor(max_workers=5) as executor:
|
||||
thr = []
|
||||
for func in functions:
|
||||
name = func["name"]
|
||||
args = func["arguments"]
|
||||
if name == COMPLETE_TASK:
|
||||
append_user_content(hist, f"Respond with a formal answer. FORGET(DO NOT mention) about `{COMPLETE_TASK}`. The language for the response MUST be as the same as the first user request.\n")
|
||||
for txt, tkcnt in complete():
|
||||
yield txt, tkcnt
|
||||
return
|
||||
|
||||
thr.append(executor.submit(use_tool, name, args))
|
||||
tool_tasks = []
|
||||
for func in functions:
|
||||
name = func["name"]
|
||||
args = func["arguments"]
|
||||
if name == COMPLETE_TASK:
|
||||
append_user_content(hist, f"Respond with a formal answer. FORGET(DO NOT mention) about `{COMPLETE_TASK}`. The language for the response MUST be as the same as the first user request.\n")
|
||||
async for txt, tkcnt in complete():
|
||||
yield txt, tkcnt
|
||||
return
|
||||
|
||||
st = timer()
|
||||
reflection = reflect(self.chat_mdl, hist, [th.result() for th in thr], user_defined_prompt)
|
||||
append_user_content(hist, reflection)
|
||||
self.callback("reflection", {}, str(reflection), elapsed_time=timer()-st)
|
||||
tool_tasks.append(asyncio.create_task(use_tool_async(name, args)))
|
||||
|
||||
results = await asyncio.gather(*tool_tasks) if tool_tasks else []
|
||||
st = timer()
|
||||
reflection = await reflect_async(self.chat_mdl, hist, results, user_defined_prompt)
|
||||
append_user_content(hist, reflection)
|
||||
self.callback("reflection", {}, str(reflection), elapsed_time=timer()-st)
|
||||
|
||||
except Exception as e:
|
||||
logging.exception(msg=f"Wrong JSON argument format in LLM ReAct response: {e}")
|
||||
|
|
@ -531,21 +390,17 @@ Respond immediately with your final comprehensive answer.
|
|||
return
|
||||
append_user_content(hist, final_instruction)
|
||||
|
||||
for txt, tkcnt in complete():
|
||||
async for txt, tkcnt in complete():
|
||||
yield txt, tkcnt
|
||||
|
||||
def get_useful_memory(self, goal: str, sub_goal:str, topn=3, user_defined_prompt:dict={}) -> str:
|
||||
# self.callback("get_useful_memory", {"topn": 3}, "...")
|
||||
mems = self._canvas.get_memory()
|
||||
rank = rank_memories(self.chat_mdl, goal, sub_goal, [summ for (user, assist, summ) in mems], user_defined_prompt)
|
||||
try:
|
||||
rank = json_repair.loads(re.sub(r"```.*", "", rank))[:topn]
|
||||
mems = [mems[r] for r in rank]
|
||||
return "\n\n".join([f"User: {u}\nAgent: {a}" for u, a,_ in mems])
|
||||
except Exception as e:
|
||||
logging.exception(e)
|
||||
|
||||
return "Error occurred."
|
||||
async def _gen_citations_async(self, text):
|
||||
retrievals = self._canvas.get_reference()
|
||||
retrievals = {"chunks": list(retrievals["chunks"].values()), "doc_aggs": list(retrievals["doc_aggs"].values())}
|
||||
formated_refer = kb_prompt(retrievals, self.chat_mdl.max_length, True)
|
||||
async for delta_ans in self._generate_streamly_async([{"role": "system", "content": citation_plus("\n\n".join(formated_refer))},
|
||||
{"role": "user", "content": text}
|
||||
]):
|
||||
yield delta_ans
|
||||
|
||||
def reset(self, only_output=False):
|
||||
"""
|
||||
|
|
|
|||
|
|
@ -327,7 +327,7 @@ class LLM(ComponentBase):
|
|||
self.set_output("content", answer)
|
||||
|
||||
@timeout(int(os.environ.get("COMPONENT_EXEC_TIMEOUT", 10*60)))
|
||||
def _invoke(self, **kwargs):
|
||||
async def _invoke_async(self, **kwargs):
|
||||
if self.check_if_canceled("LLM processing"):
|
||||
return
|
||||
|
||||
|
|
@ -338,22 +338,25 @@ class LLM(ComponentBase):
|
|||
|
||||
prompt, msg, _ = self._prepare_prompt_variables()
|
||||
error: str = ""
|
||||
output_structure=None
|
||||
output_structure = None
|
||||
try:
|
||||
output_structure = self._param.outputs['structured']
|
||||
output_structure = self._param.outputs["structured"]
|
||||
except Exception:
|
||||
pass
|
||||
if output_structure and isinstance(output_structure, dict) and output_structure.get("properties") and len(output_structure["properties"]) > 0:
|
||||
schema=json.dumps(output_structure, ensure_ascii=False, indent=2)
|
||||
prompt += structured_output_prompt(schema)
|
||||
for _ in range(self._param.max_retries+1):
|
||||
schema = json.dumps(output_structure, ensure_ascii=False, indent=2)
|
||||
prompt_with_schema = prompt + structured_output_prompt(schema)
|
||||
for _ in range(self._param.max_retries + 1):
|
||||
if self.check_if_canceled("LLM processing"):
|
||||
return
|
||||
|
||||
_, msg = message_fit_in([{"role": "system", "content": prompt}, *msg], int(self.chat_mdl.max_length * 0.97))
|
||||
_, msg_fit = message_fit_in(
|
||||
[{"role": "system", "content": prompt_with_schema}, *deepcopy(msg)],
|
||||
int(self.chat_mdl.max_length * 0.97),
|
||||
)
|
||||
error = ""
|
||||
ans = self._generate(msg)
|
||||
msg.pop(0)
|
||||
ans = await self._generate_async(msg_fit)
|
||||
msg_fit.pop(0)
|
||||
if ans.find("**ERROR**") >= 0:
|
||||
logging.error(f"LLM response error: {ans}")
|
||||
error = ans
|
||||
|
|
@ -362,7 +365,7 @@ class LLM(ComponentBase):
|
|||
self.set_output("structured", json_repair.loads(clean_formated_answer(ans)))
|
||||
return
|
||||
except Exception:
|
||||
msg.append({"role": "user", "content": "The answer can't not be parsed as JSON"})
|
||||
msg_fit.append({"role": "user", "content": "The answer can't not be parsed as JSON"})
|
||||
error = "The answer can't not be parsed as JSON"
|
||||
if error:
|
||||
self.set_output("_ERROR", error)
|
||||
|
|
@ -370,18 +373,23 @@ class LLM(ComponentBase):
|
|||
|
||||
downstreams = self._canvas.get_component(self._id)["downstream"] if self._canvas.get_component(self._id) else []
|
||||
ex = self.exception_handler()
|
||||
if any([self._canvas.get_component_obj(cid).component_name.lower()=="message" for cid in downstreams]) and not (ex and ex["goto"]):
|
||||
self.set_output("content", partial(self._stream_output_async, prompt, msg))
|
||||
if any([self._canvas.get_component_obj(cid).component_name.lower() == "message" for cid in downstreams]) and not (
|
||||
ex and ex["goto"]
|
||||
):
|
||||
self.set_output("content", partial(self._stream_output_async, prompt, deepcopy(msg)))
|
||||
return
|
||||
|
||||
for _ in range(self._param.max_retries+1):
|
||||
error = ""
|
||||
for _ in range(self._param.max_retries + 1):
|
||||
if self.check_if_canceled("LLM processing"):
|
||||
return
|
||||
|
||||
_, msg = message_fit_in([{"role": "system", "content": prompt}, *msg], int(self.chat_mdl.max_length * 0.97))
|
||||
_, msg_fit = message_fit_in(
|
||||
[{"role": "system", "content": prompt}, *deepcopy(msg)], int(self.chat_mdl.max_length * 0.97)
|
||||
)
|
||||
error = ""
|
||||
ans = self._generate(msg)
|
||||
msg.pop(0)
|
||||
ans = await self._generate_async(msg_fit)
|
||||
msg_fit.pop(0)
|
||||
if ans.find("**ERROR**") >= 0:
|
||||
logging.error(f"LLM response error: {ans}")
|
||||
error = ans
|
||||
|
|
@ -395,23 +403,9 @@ class LLM(ComponentBase):
|
|||
else:
|
||||
self.set_output("_ERROR", error)
|
||||
|
||||
def _stream_output(self, prompt, msg):
|
||||
_, msg = message_fit_in([{"role": "system", "content": prompt}, *msg], int(self.chat_mdl.max_length * 0.97))
|
||||
answer = ""
|
||||
for ans in self._generate_streamly(msg):
|
||||
if self.check_if_canceled("LLM streaming"):
|
||||
return
|
||||
|
||||
if ans.find("**ERROR**") >= 0:
|
||||
if self.get_exception_default_value():
|
||||
self.set_output("content", self.get_exception_default_value())
|
||||
yield self.get_exception_default_value()
|
||||
else:
|
||||
self.set_output("_ERROR", ans)
|
||||
return
|
||||
yield ans
|
||||
answer += ans
|
||||
self.set_output("content", answer)
|
||||
@timeout(int(os.environ.get("COMPONENT_EXEC_TIMEOUT", 10*60)))
|
||||
def _invoke(self, **kwargs):
|
||||
return asyncio.run(self._invoke_async(**kwargs))
|
||||
|
||||
def add_memory(self, user:str, assist:str, func_name: str, params: dict, results: str, user_defined_prompt:dict={}):
|
||||
summ = tool_call_summary(self.chat_mdl, func_name, params, results, user_defined_prompt)
|
||||
|
|
|
|||
|
|
@ -49,16 +49,19 @@ class LLMToolPluginCallSession(ToolCallSession):
|
|||
self.callback = callback
|
||||
|
||||
def tool_call(self, name: str, arguments: dict[str, Any]) -> Any:
|
||||
return asyncio.run(self.tool_call_async(name, arguments))
|
||||
|
||||
async def tool_call_async(self, name: str, arguments: dict[str, Any]) -> Any:
|
||||
assert name in self.tools_map, f"LLM tool {name} does not exist"
|
||||
st = timer()
|
||||
tool_obj = self.tools_map[name]
|
||||
if isinstance(tool_obj, MCPToolCallSession):
|
||||
resp = tool_obj.tool_call(name, arguments, 60)
|
||||
resp = await asyncio.to_thread(tool_obj.tool_call, name, arguments, 60)
|
||||
else:
|
||||
if hasattr(tool_obj, "invoke_async") and asyncio.iscoroutinefunction(tool_obj.invoke_async):
|
||||
resp = asyncio.run(tool_obj.invoke_async(**arguments))
|
||||
resp = await tool_obj.invoke_async(**arguments)
|
||||
else:
|
||||
resp = asyncio.run(asyncio.to_thread(tool_obj.invoke, **arguments))
|
||||
resp = await asyncio.to_thread(tool_obj.invoke, **arguments)
|
||||
|
||||
self.callback(name, arguments, resp, elapsed_time=timer()-st)
|
||||
return resp
|
||||
|
|
|
|||
|
|
@ -33,7 +33,7 @@ from api.db.services.file_service import FileService
|
|||
from api.db.services.knowledgebase_service import KnowledgebaseService
|
||||
from api.db.services.llm_service import LLMBundle
|
||||
from api.db.services.tenant_llm_service import TenantLLMService
|
||||
from api.db.services.task_service import TaskService, queue_tasks
|
||||
from api.db.services.task_service import TaskService, queue_tasks, cancel_all_task_of
|
||||
from api.db.services.dialog_service import meta_filter, convert_conditions
|
||||
from api.utils.api_utils import check_duplicate_ids, construct_json_result, get_error_data_result, get_parser_config, get_result, server_error_response, token_required, \
|
||||
get_request_json
|
||||
|
|
@ -321,9 +321,7 @@ async def update_doc(tenant_id, dataset_id, document_id):
|
|||
try:
|
||||
if not DocumentService.update_by_id(doc.id, {"status": str(status)}):
|
||||
return get_error_data_result(message="Database error (Document update)!")
|
||||
|
||||
settings.docStoreConn.update({"doc_id": doc.id}, {"available_int": status}, search.index_name(kb.tenant_id), doc.kb_id)
|
||||
return get_result(data=True)
|
||||
except Exception as e:
|
||||
return server_error_response(e)
|
||||
|
||||
|
|
@ -350,12 +348,10 @@ async def update_doc(tenant_id, dataset_id, document_id):
|
|||
}
|
||||
renamed_doc = {}
|
||||
for key, value in doc.to_dict().items():
|
||||
if key == "run":
|
||||
renamed_doc["run"] = run_mapping.get(str(value))
|
||||
new_key = key_mapping.get(key, key)
|
||||
renamed_doc[new_key] = value
|
||||
if key == "run":
|
||||
renamed_doc["run"] = run_mapping.get(value)
|
||||
renamed_doc["run"] = run_mapping.get(str(value))
|
||||
|
||||
return get_result(data=renamed_doc)
|
||||
|
||||
|
|
@ -839,6 +835,8 @@ async def stop_parsing(tenant_id, dataset_id):
|
|||
return get_error_data_result(message=f"You don't own the document {id}.")
|
||||
if int(doc[0].progress) == 1 or doc[0].progress == 0:
|
||||
return get_error_data_result("Can't stop parsing document with progress at 0 or 1")
|
||||
# Send cancellation signal via Redis to stop background task
|
||||
cancel_all_task_of(id)
|
||||
info = {"run": "2", "progress": 0, "chunk_num": 0}
|
||||
DocumentService.update_by_id(id, info)
|
||||
settings.docStoreConn.delete({"doc_id": doc[0].id}, search.index_name(tenant_id), dataset_id)
|
||||
|
|
|
|||
|
|
@ -148,6 +148,7 @@ class Storage(Enum):
|
|||
AWS_S3 = 4
|
||||
OSS = 5
|
||||
OPENDAL = 6
|
||||
GCS = 7
|
||||
|
||||
# environment
|
||||
# ENV_STRONG_TEST_COUNT = "STRONG_TEST_COUNT"
|
||||
|
|
|
|||
|
|
@ -31,6 +31,7 @@ import rag.utils.ob_conn
|
|||
import rag.utils.opensearch_conn
|
||||
from rag.utils.azure_sas_conn import RAGFlowAzureSasBlob
|
||||
from rag.utils.azure_spn_conn import RAGFlowAzureSpnBlob
|
||||
from rag.utils.gcs_conn import RAGFlowGCS
|
||||
from rag.utils.minio_conn import RAGFlowMinio
|
||||
from rag.utils.opendal_conn import OpenDALStorage
|
||||
from rag.utils.s3_conn import RAGFlowS3
|
||||
|
|
@ -109,6 +110,7 @@ MINIO = {}
|
|||
OB = {}
|
||||
OSS = {}
|
||||
OS = {}
|
||||
GCS = {}
|
||||
|
||||
DOC_MAXIMUM_SIZE: int = 128 * 1024 * 1024
|
||||
DOC_BULK_SIZE: int = 4
|
||||
|
|
@ -151,7 +153,8 @@ class StorageFactory:
|
|||
Storage.AZURE_SAS: RAGFlowAzureSasBlob,
|
||||
Storage.AWS_S3: RAGFlowS3,
|
||||
Storage.OSS: RAGFlowOSS,
|
||||
Storage.OPENDAL: OpenDALStorage
|
||||
Storage.OPENDAL: OpenDALStorage,
|
||||
Storage.GCS: RAGFlowGCS,
|
||||
}
|
||||
|
||||
@classmethod
|
||||
|
|
@ -250,7 +253,7 @@ def init_settings():
|
|||
else:
|
||||
raise Exception(f"Not supported doc engine: {DOC_ENGINE}")
|
||||
|
||||
global AZURE, S3, MINIO, OSS
|
||||
global AZURE, S3, MINIO, OSS, GCS
|
||||
if STORAGE_IMPL_TYPE in ['AZURE_SPN', 'AZURE_SAS']:
|
||||
AZURE = get_base_config("azure", {})
|
||||
elif STORAGE_IMPL_TYPE == 'AWS_S3':
|
||||
|
|
@ -259,6 +262,8 @@ def init_settings():
|
|||
MINIO = decrypt_database_config(name="minio")
|
||||
elif STORAGE_IMPL_TYPE == 'OSS':
|
||||
OSS = get_base_config("oss", {})
|
||||
elif STORAGE_IMPL_TYPE == 'GCS':
|
||||
GCS = get_base_config("gcs", {})
|
||||
|
||||
global STORAGE_IMPL
|
||||
STORAGE_IMPL = StorageFactory.create(Storage[STORAGE_IMPL_TYPE])
|
||||
|
|
|
|||
|
|
@ -60,6 +60,8 @@ user_default_llm:
|
|||
# access_key: 'access_key'
|
||||
# secret_key: 'secret_key'
|
||||
# region: 'region'
|
||||
#gcs:
|
||||
# bucket: 'bridgtl-edm-d-bucket-ragflow'
|
||||
# oss:
|
||||
# access_key: 'access_key'
|
||||
# secret_key: 'secret_key'
|
||||
|
|
|
|||
|
|
@ -25,6 +25,8 @@ from rag.prompts.generator import vision_llm_figure_describe_prompt
|
|||
|
||||
|
||||
def vision_figure_parser_figure_data_wrapper(figures_data_without_positions):
|
||||
if not figures_data_without_positions:
|
||||
return []
|
||||
return [
|
||||
(
|
||||
(figure_data[1], [figure_data[0]]),
|
||||
|
|
@ -35,7 +37,9 @@ def vision_figure_parser_figure_data_wrapper(figures_data_without_positions):
|
|||
]
|
||||
|
||||
|
||||
def vision_figure_parser_docx_wrapper(sections,tbls,callback=None,**kwargs):
|
||||
def vision_figure_parser_docx_wrapper(sections, tbls, callback=None,**kwargs):
|
||||
if not tbls:
|
||||
return []
|
||||
try:
|
||||
vision_model = LLMBundle(kwargs["tenant_id"], LLMType.IMAGE2TEXT)
|
||||
callback(0.7, "Visual model detected. Attempting to enhance figure extraction...")
|
||||
|
|
@ -53,6 +57,8 @@ def vision_figure_parser_docx_wrapper(sections,tbls,callback=None,**kwargs):
|
|||
|
||||
|
||||
def vision_figure_parser_pdf_wrapper(tbls, callback=None, **kwargs):
|
||||
if not tbls:
|
||||
return []
|
||||
try:
|
||||
vision_model = LLMBundle(kwargs["tenant_id"], LLMType.IMAGE2TEXT)
|
||||
callback(0.7, "Visual model detected. Attempting to enhance figure extraction...")
|
||||
|
|
|
|||
|
|
@ -23,7 +23,7 @@ services:
|
|||
env_file: .env
|
||||
networks:
|
||||
- ragflow
|
||||
restart: on-failure
|
||||
restart: unless-stopped
|
||||
# https://docs.docker.com/engine/daemon/prometheus/#create-a-prometheus-configuration
|
||||
# If you're using Docker Desktop, the --add-host flag is optional. This flag makes sure that the host's internal IP gets exposed to the Prometheus container.
|
||||
extra_hosts:
|
||||
|
|
@ -48,7 +48,7 @@ services:
|
|||
env_file: .env
|
||||
networks:
|
||||
- ragflow
|
||||
restart: on-failure
|
||||
restart: unless-stopped
|
||||
# https://docs.docker.com/engine/daemon/prometheus/#create-a-prometheus-configuration
|
||||
# If you're using Docker Desktop, the --add-host flag is optional. This flag makes sure that the host's internal IP gets exposed to the Prometheus container.
|
||||
extra_hosts:
|
||||
|
|
|
|||
|
|
@ -31,7 +31,7 @@ services:
|
|||
retries: 120
|
||||
networks:
|
||||
- ragflow
|
||||
restart: on-failure
|
||||
restart: unless-stopped
|
||||
|
||||
opensearch01:
|
||||
profiles:
|
||||
|
|
@ -67,12 +67,12 @@ services:
|
|||
retries: 120
|
||||
networks:
|
||||
- ragflow
|
||||
restart: on-failure
|
||||
restart: unless-stopped
|
||||
|
||||
infinity:
|
||||
profiles:
|
||||
- infinity
|
||||
image: infiniflow/infinity:v0.6.8
|
||||
image: infiniflow/infinity:v0.6.10
|
||||
volumes:
|
||||
- infinity_data:/var/infinity
|
||||
- ./infinity_conf.toml:/infinity_conf.toml
|
||||
|
|
@ -94,7 +94,7 @@ services:
|
|||
interval: 10s
|
||||
timeout: 10s
|
||||
retries: 120
|
||||
restart: on-failure
|
||||
restart: unless-stopped
|
||||
|
||||
oceanbase:
|
||||
profiles:
|
||||
|
|
@ -119,7 +119,7 @@ services:
|
|||
timeout: 10s
|
||||
networks:
|
||||
- ragflow
|
||||
restart: on-failure
|
||||
restart: unless-stopped
|
||||
|
||||
sandbox-executor-manager:
|
||||
profiles:
|
||||
|
|
@ -147,7 +147,7 @@ services:
|
|||
interval: 10s
|
||||
timeout: 10s
|
||||
retries: 120
|
||||
restart: on-failure
|
||||
restart: unless-stopped
|
||||
|
||||
mysql:
|
||||
# mysql:5.7 linux/arm64 image is unavailable.
|
||||
|
|
@ -175,7 +175,7 @@ services:
|
|||
interval: 10s
|
||||
timeout: 10s
|
||||
retries: 120
|
||||
restart: on-failure
|
||||
restart: unless-stopped
|
||||
|
||||
minio:
|
||||
image: quay.io/minio/minio:RELEASE.2025-06-13T11-33-47Z
|
||||
|
|
@ -191,7 +191,7 @@ services:
|
|||
- minio_data:/data
|
||||
networks:
|
||||
- ragflow
|
||||
restart: on-failure
|
||||
restart: unless-stopped
|
||||
healthcheck:
|
||||
test: ["CMD", "curl", "-f", "http://localhost:9000/minio/health/live"]
|
||||
interval: 10s
|
||||
|
|
@ -209,7 +209,7 @@ services:
|
|||
- redis_data:/data
|
||||
networks:
|
||||
- ragflow
|
||||
restart: on-failure
|
||||
restart: unless-stopped
|
||||
healthcheck:
|
||||
test: ["CMD", "redis-cli", "-a", "${REDIS_PASSWORD}", "ping"]
|
||||
interval: 10s
|
||||
|
|
@ -228,7 +228,7 @@ services:
|
|||
networks:
|
||||
- ragflow
|
||||
command: ["--model-id", "/data/${TEI_MODEL}", "--auto-truncate"]
|
||||
restart: on-failure
|
||||
restart: unless-stopped
|
||||
|
||||
|
||||
tei-gpu:
|
||||
|
|
@ -249,7 +249,7 @@ services:
|
|||
- driver: nvidia
|
||||
count: all
|
||||
capabilities: [gpu]
|
||||
restart: on-failure
|
||||
restart: unless-stopped
|
||||
|
||||
|
||||
kibana:
|
||||
|
|
@ -271,7 +271,7 @@ services:
|
|||
retries: 120
|
||||
networks:
|
||||
- ragflow
|
||||
restart: on-failure
|
||||
restart: unless-stopped
|
||||
|
||||
|
||||
volumes:
|
||||
|
|
|
|||
|
|
@ -22,7 +22,7 @@ services:
|
|||
env_file: .env
|
||||
networks:
|
||||
- ragflow
|
||||
restart: on-failure
|
||||
restart: unless-stopped
|
||||
# https://docs.docker.com/engine/daemon/prometheus/#create-a-prometheus-configuration
|
||||
# If you're using Docker Desktop, the --add-host flag is optional. This flag makes sure that the host's internal IP gets exposed to the Prometheus container.
|
||||
extra_hosts:
|
||||
|
|
@ -39,7 +39,7 @@ services:
|
|||
# entrypoint: "/ragflow/entrypoint_task_executor.sh 1 3"
|
||||
# networks:
|
||||
# - ragflow
|
||||
# restart: on-failure
|
||||
# restart: unless-stopped
|
||||
# # https://docs.docker.com/engine/daemon/prometheus/#create-a-prometheus-configuration
|
||||
# # If you're using Docker Desktop, the --add-host flag is optional. This flag makes sure that the host's internal IP gets exposed to the Prometheus container.
|
||||
# extra_hosts:
|
||||
|
|
|
|||
|
|
@ -45,7 +45,7 @@ services:
|
|||
env_file: .env
|
||||
networks:
|
||||
- ragflow
|
||||
restart: on-failure
|
||||
restart: unless-stopped
|
||||
# https://docs.docker.com/engine/daemon/prometheus/#create-a-prometheus-configuration
|
||||
# If you use Docker Desktop, the --add-host flag is optional. This flag ensures that the host's internal IP is exposed to the Prometheus container.
|
||||
extra_hosts:
|
||||
|
|
@ -94,7 +94,7 @@ services:
|
|||
env_file: .env
|
||||
networks:
|
||||
- ragflow
|
||||
restart: on-failure
|
||||
restart: unless-stopped
|
||||
# https://docs.docker.com/engine/daemon/prometheus/#create-a-prometheus-configuration
|
||||
# If you use Docker Desktop, the --add-host flag is optional. This flag ensures that the host's internal IP is exposed to the Prometheus container.
|
||||
extra_hosts:
|
||||
|
|
@ -120,7 +120,7 @@ services:
|
|||
# entrypoint: "/ragflow/entrypoint_task_executor.sh 1 3"
|
||||
# networks:
|
||||
# - ragflow
|
||||
# restart: on-failure
|
||||
# restart: unless-stopped
|
||||
# # https://docs.docker.com/engine/daemon/prometheus/#create-a-prometheus-configuration
|
||||
# # If you're using Docker Desktop, the --add-host flag is optional. This flag makes sure that the host's internal IP gets exposed to the Prometheus container.
|
||||
# extra_hosts:
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
[general]
|
||||
version = "0.6.8"
|
||||
version = "0.6.10"
|
||||
time_zone = "utc-8"
|
||||
|
||||
[network]
|
||||
|
|
|
|||
|
|
@ -512,13 +512,16 @@ curl --request POST \
|
|||
- Maximum: `2048`
|
||||
- `"delimiter"`: `string`
|
||||
- Defaults to `"\n"`.
|
||||
- `"html4excel"`: `bool` Indicates whether to convert Excel documents into HTML format.
|
||||
- `"html4excel"`: `bool`
|
||||
- Whether to convert Excel documents into HTML format.
|
||||
- Defaults to `false`
|
||||
- `"layout_recognize"`: `string`
|
||||
- Defaults to `DeepDOC`
|
||||
- `"tag_kb_ids"`: `array<string>` refer to [Use tag set](https://ragflow.io/docs/dev/use_tag_sets)
|
||||
- Must include a list of dataset IDs, where each dataset is parsed using the Tag Chunking Method
|
||||
- `"task_page_size"`: `int` For PDF only.
|
||||
- `"tag_kb_ids"`: `array<string>`
|
||||
- IDs of datasets to be parsed using the Tag chunk method.
|
||||
- Before setting this, ensure a tag set is created and properly configured. For details, see [Use tag set](https://ragflow.io/docs/dev/use_tag_sets).
|
||||
- `"task_page_size"`: `int`
|
||||
- For PDFs only.
|
||||
- Defaults to `12`
|
||||
- Minimum: `1`
|
||||
- `"raptor"`: `object` RAPTOR-specific settings.
|
||||
|
|
|
|||
|
|
@ -43,7 +43,6 @@ def get_urls(use_china_mirrors=False) -> list[Union[str, list[str]]]:
|
|||
repos = [
|
||||
"InfiniFlow/text_concat_xgb_v1.0",
|
||||
"InfiniFlow/deepdoc",
|
||||
"InfiniFlow/huqie",
|
||||
]
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -96,7 +96,7 @@ ragflow:
|
|||
infinity:
|
||||
image:
|
||||
repository: infiniflow/infinity
|
||||
tag: v0.6.8
|
||||
tag: v0.6.10
|
||||
pullPolicy: IfNotPresent
|
||||
pullSecrets: []
|
||||
storage:
|
||||
|
|
|
|||
|
|
@ -49,7 +49,7 @@ dependencies = [
|
|||
"html-text==0.6.2",
|
||||
"httpx[socks]>=0.28.1,<0.29.0",
|
||||
"huggingface-hub>=0.25.0,<0.26.0",
|
||||
"infinity-sdk==0.6.8",
|
||||
"infinity-sdk==0.6.10",
|
||||
"infinity-emb>=0.0.66,<0.0.67",
|
||||
"itsdangerous==2.1.2",
|
||||
"json-repair==0.35.0",
|
||||
|
|
|
|||
|
|
@ -86,9 +86,11 @@ class Pdf(PdfParser):
|
|||
|
||||
# (A) Add text
|
||||
for b in self.boxes:
|
||||
if not (from_page < b["page_number"] <= to_page + from_page):
|
||||
# b["page_number"] is relative page number,must + from_page
|
||||
global_page_num = b["page_number"] + from_page
|
||||
if not (from_page < global_page_num <= to_page + from_page):
|
||||
continue
|
||||
page_items[b["page_number"]].append({
|
||||
page_items[global_page_num].append({
|
||||
"top": b["top"],
|
||||
"x0": b["x0"],
|
||||
"text": b["text"],
|
||||
|
|
@ -100,7 +102,6 @@ class Pdf(PdfParser):
|
|||
if not positions:
|
||||
continue
|
||||
|
||||
# Handle content type (list vs str)
|
||||
if isinstance(content, list):
|
||||
final_text = "\n".join(content)
|
||||
elif isinstance(content, str):
|
||||
|
|
@ -109,10 +110,11 @@ class Pdf(PdfParser):
|
|||
final_text = str(content)
|
||||
|
||||
try:
|
||||
# Parse positions
|
||||
pn_index = positions[0][0]
|
||||
if isinstance(pn_index, list):
|
||||
pn_index = pn_index[0]
|
||||
|
||||
# pn_index in tbls is absolute page number
|
||||
current_page_num = int(pn_index) + 1
|
||||
except Exception as e:
|
||||
print(f"Error parsing position: {e}")
|
||||
|
|
|
|||
|
|
@ -343,7 +343,8 @@ def form_history(history, limit=-6):
|
|||
return context
|
||||
|
||||
|
||||
def analyze_task(chat_mdl, prompt, task_name, tools_description: list[dict], user_defined_prompts: dict={}):
|
||||
|
||||
async def analyze_task_async(chat_mdl, prompt, task_name, tools_description: list[dict], user_defined_prompts: dict={}):
|
||||
tools_desc = tool_schema(tools_description)
|
||||
context = ""
|
||||
|
||||
|
|
@ -352,7 +353,7 @@ def analyze_task(chat_mdl, prompt, task_name, tools_description: list[dict], use
|
|||
else:
|
||||
template = PROMPT_JINJA_ENV.from_string(ANALYZE_TASK_SYSTEM + "\n\n" + ANALYZE_TASK_USER)
|
||||
context = template.render(task=task_name, context=context, agent_prompt=prompt, tools_desc=tools_desc)
|
||||
kwd = chat_mdl.chat(context, [{"role": "user", "content": "Please analyze it."}])
|
||||
kwd = await _chat_async(chat_mdl, context, [{"role": "user", "content": "Please analyze it."}])
|
||||
if isinstance(kwd, tuple):
|
||||
kwd = kwd[0]
|
||||
kwd = re.sub(r"^.*</think>", "", kwd, flags=re.DOTALL)
|
||||
|
|
@ -361,13 +362,17 @@ def analyze_task(chat_mdl, prompt, task_name, tools_description: list[dict], use
|
|||
return kwd
|
||||
|
||||
|
||||
async def analyze_task_async(chat_mdl, prompt, task_name, tools_description: list[dict], user_defined_prompts: dict={}):
|
||||
return await asyncio.to_thread(analyze_task, chat_mdl, prompt, task_name, tools_description, user_defined_prompts)
|
||||
async def _chat_async(chat_mdl, system: str, history: list, **kwargs):
|
||||
chat_async = getattr(chat_mdl, "async_chat", None)
|
||||
if chat_async and asyncio.iscoroutinefunction(chat_async):
|
||||
return await chat_async(system, history, **kwargs)
|
||||
return await asyncio.to_thread(chat_mdl.chat, system, history, **kwargs)
|
||||
|
||||
|
||||
def next_step(chat_mdl, history:list, tools_description: list[dict], task_desc, user_defined_prompts: dict={}):
|
||||
|
||||
async def next_step_async(chat_mdl, history:list, tools_description: list[dict], task_desc, user_defined_prompts: dict={}):
|
||||
if not tools_description:
|
||||
return ""
|
||||
return "", 0
|
||||
desc = tool_schema(tools_description)
|
||||
template = PROMPT_JINJA_ENV.from_string(user_defined_prompts.get("plan_generation", NEXT_STEP))
|
||||
user_prompt = "\nWhat's the next tool to call? If ready OR IMPOSSIBLE TO BE READY, then call `complete_task`."
|
||||
|
|
@ -376,18 +381,18 @@ def next_step(chat_mdl, history:list, tools_description: list[dict], task_desc,
|
|||
hist[-1]["content"] += user_prompt
|
||||
else:
|
||||
hist.append({"role": "user", "content": user_prompt})
|
||||
json_str = chat_mdl.chat(template.render(task_analysis=task_desc, desc=desc, today=datetime.datetime.now().strftime("%Y-%m-%d")),
|
||||
hist[1:], stop=["<|stop|>"])
|
||||
json_str = await _chat_async(
|
||||
chat_mdl,
|
||||
template.render(task_analysis=task_desc, desc=desc, today=datetime.datetime.now().strftime("%Y-%m-%d")),
|
||||
hist[1:],
|
||||
stop=["<|stop|>"],
|
||||
)
|
||||
tk_cnt = num_tokens_from_string(json_str)
|
||||
json_str = re.sub(r"^.*</think>", "", json_str, flags=re.DOTALL)
|
||||
return json_str, tk_cnt
|
||||
|
||||
|
||||
async def next_step_async(chat_mdl, history:list, tools_description: list[dict], task_desc, user_defined_prompts: dict={}):
|
||||
return await asyncio.to_thread(next_step, chat_mdl, history, tools_description, task_desc, user_defined_prompts)
|
||||
|
||||
|
||||
def reflect(chat_mdl, history: list[dict], tool_call_res: list[Tuple], user_defined_prompts: dict={}):
|
||||
async def reflect_async(chat_mdl, history: list[dict], tool_call_res: list[Tuple], user_defined_prompts: dict={}):
|
||||
tool_calls = [{"name": p[0], "result": p[1]} for p in tool_call_res]
|
||||
goal = history[1]["content"]
|
||||
template = PROMPT_JINJA_ENV.from_string(user_defined_prompts.get("reflection", REFLECT))
|
||||
|
|
@ -398,7 +403,7 @@ def reflect(chat_mdl, history: list[dict], tool_call_res: list[Tuple], user_defi
|
|||
else:
|
||||
hist.append({"role": "user", "content": user_prompt})
|
||||
_, msg = message_fit_in(hist, chat_mdl.max_length)
|
||||
ans = chat_mdl.chat(msg[0]["content"], msg[1:])
|
||||
ans = await _chat_async(chat_mdl, msg[0]["content"], msg[1:])
|
||||
ans = re.sub(r"^.*</think>", "", ans, flags=re.DOTALL)
|
||||
return """
|
||||
**Observation**
|
||||
|
|
@ -429,23 +434,15 @@ def tool_call_summary(chat_mdl, name: str, params: dict, result: str, user_defin
|
|||
return re.sub(r"^.*</think>", "", ans, flags=re.DOTALL)
|
||||
|
||||
|
||||
def rank_memories(chat_mdl, goal:str, sub_goal:str, tool_call_summaries: list[str], user_defined_prompts: dict={}):
|
||||
async def rank_memories_async(chat_mdl, goal:str, sub_goal:str, tool_call_summaries: list[str], user_defined_prompts: dict={}):
|
||||
template = PROMPT_JINJA_ENV.from_string(RANK_MEMORY)
|
||||
system_prompt = template.render(goal=goal, sub_goal=sub_goal, results=[{"i": i, "content": s} for i,s in enumerate(tool_call_summaries)])
|
||||
user_prompt = " → rank: "
|
||||
_, msg = message_fit_in(form_message(system_prompt, user_prompt), chat_mdl.max_length)
|
||||
ans = chat_mdl.chat(msg[0]["content"], msg[1:], stop="<|stop|>")
|
||||
ans = await _chat_async(chat_mdl, msg[0]["content"], msg[1:], stop="<|stop|>")
|
||||
return re.sub(r"^.*</think>", "", ans, flags=re.DOTALL)
|
||||
|
||||
|
||||
async def reflect_async(chat_mdl, history: list[dict], tool_call_res: list[Tuple], user_defined_prompts: dict={}):
|
||||
return await asyncio.to_thread(reflect, chat_mdl, history, tool_call_res, user_defined_prompts)
|
||||
|
||||
|
||||
async def rank_memories_async(chat_mdl, goal:str, sub_goal:str, tool_call_summaries: list[str], user_defined_prompts: dict={}):
|
||||
return await asyncio.to_thread(rank_memories, chat_mdl, goal, sub_goal, tool_call_summaries, user_defined_prompts)
|
||||
|
||||
|
||||
def gen_meta_filter(chat_mdl, meta_data:dict, query: str) -> dict:
|
||||
meta_data_structure = {}
|
||||
for key, values in meta_data.items():
|
||||
|
|
@ -514,7 +511,7 @@ def toc_index_extractor(toc:list[dict], content:str, chat_mdl):
|
|||
|
||||
The structure variable is the numeric system which represents the index of the hierarchy section in the table of contents. For example, the first section has structure index 1, the first subsection has structure index 1.1, the second subsection has structure index 1.2, etc.
|
||||
|
||||
The response should be in the following JSON format:
|
||||
The response should be in the following JSON format:
|
||||
[
|
||||
{
|
||||
"structure": <structure index, "x.x.x" or None> (string),
|
||||
|
|
@ -641,8 +638,8 @@ def toc_transformer(toc_pages, chat_mdl):
|
|||
|
||||
The `structure` is the numeric system which represents the index of the hierarchy section in the table of contents. For example, the first section has structure index 1, the first subsection has structure index 1.1, the second subsection has structure index 1.2, etc.
|
||||
The `title` is a short phrase or a several-words term.
|
||||
|
||||
The response should be in the following JSON format:
|
||||
|
||||
The response should be in the following JSON format:
|
||||
[
|
||||
{
|
||||
"structure": <structure index, "x.x.x" or None> (string),
|
||||
|
|
@ -667,7 +664,7 @@ def toc_transformer(toc_pages, chat_mdl):
|
|||
while not (if_complete == "yes"):
|
||||
prompt = f"""
|
||||
Your task is to continue the table of contents json structure, directly output the remaining part of the json structure.
|
||||
The response should be in the following JSON format:
|
||||
The response should be in the following JSON format:
|
||||
|
||||
The raw table of contents json structure is:
|
||||
{toc_content}
|
||||
|
|
@ -756,7 +753,7 @@ async def run_toc_from_text(chunks, chat_mdl, callback=None):
|
|||
|
||||
for chunk in chunks_res:
|
||||
titles.extend(chunk.get("toc", []))
|
||||
|
||||
|
||||
# Filter out entries with title == -1
|
||||
prune = len(titles) > 512
|
||||
max_len = 12 if prune else 22
|
||||
|
|
|
|||
555629
rag/res/huqie.txt
555629
rag/res/huqie.txt
File diff suppressed because it is too large
Load diff
|
|
@ -157,11 +157,30 @@ class Confluence(SyncBase):
|
|||
from common.data_source.config import DocumentSource
|
||||
from common.data_source.interfaces import StaticCredentialsProvider
|
||||
|
||||
index_mode = (self.conf.get("index_mode") or "everything").lower()
|
||||
if index_mode not in {"everything", "space", "page"}:
|
||||
index_mode = "everything"
|
||||
|
||||
space = ""
|
||||
page_id = ""
|
||||
|
||||
index_recursively = False
|
||||
if index_mode == "space":
|
||||
space = (self.conf.get("space") or "").strip()
|
||||
if not space:
|
||||
raise ValueError("Space Key is required when indexing a specific Confluence space.")
|
||||
elif index_mode == "page":
|
||||
page_id = (self.conf.get("page_id") or "").strip()
|
||||
if not page_id:
|
||||
raise ValueError("Page ID is required when indexing a specific Confluence page.")
|
||||
index_recursively = bool(self.conf.get("index_recursively", False))
|
||||
|
||||
self.connector = ConfluenceConnector(
|
||||
wiki_base=self.conf["wiki_base"],
|
||||
space=self.conf.get("space", ""),
|
||||
is_cloud=self.conf.get("is_cloud", True),
|
||||
# page_id=self.conf.get("page_id", ""),
|
||||
space=space,
|
||||
page_id=page_id,
|
||||
index_recursively=index_recursively,
|
||||
)
|
||||
|
||||
credentials_provider = StaticCredentialsProvider(tenant_id=task["tenant_id"], connector_name=DocumentSource.CONFLUENCE, credential_json=self.conf["credentials"])
|
||||
|
|
|
|||
207
rag/utils/gcs_conn.py
Normal file
207
rag/utils/gcs_conn.py
Normal file
|
|
@ -0,0 +1,207 @@
|
|||
# Copyright 2025 The InfiniFlow Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
import logging
|
||||
import time
|
||||
import datetime
|
||||
from io import BytesIO
|
||||
from google.cloud import storage
|
||||
from google.api_core.exceptions import NotFound
|
||||
from common.decorator import singleton
|
||||
from common import settings
|
||||
|
||||
|
||||
@singleton
|
||||
class RAGFlowGCS:
|
||||
def __init__(self):
|
||||
self.client = None
|
||||
self.bucket_name = None
|
||||
self.__open__()
|
||||
|
||||
def __open__(self):
|
||||
try:
|
||||
if self.client:
|
||||
self.client = None
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
try:
|
||||
self.client = storage.Client()
|
||||
self.bucket_name = settings.GCS["bucket"]
|
||||
except Exception:
|
||||
logging.exception("Fail to connect to GCS")
|
||||
|
||||
def _get_blob_path(self, folder, filename):
|
||||
"""Helper to construct the path: folder/filename"""
|
||||
if not folder:
|
||||
return filename
|
||||
return f"{folder}/{filename}"
|
||||
|
||||
def health(self):
|
||||
folder, fnm, binary = "ragflow-health", "health_check", b"_t@@@1"
|
||||
try:
|
||||
bucket_obj = self.client.bucket(self.bucket_name)
|
||||
if not bucket_obj.exists():
|
||||
logging.error(f"Health check failed: Main bucket '{self.bucket_name}' does not exist.")
|
||||
return False
|
||||
|
||||
blob_path = self._get_blob_path(folder, fnm)
|
||||
blob = bucket_obj.blob(blob_path)
|
||||
blob.upload_from_file(BytesIO(binary), content_type='application/octet-stream')
|
||||
return True
|
||||
except Exception as e:
|
||||
logging.exception(f"Health check failed: {e}")
|
||||
return False
|
||||
|
||||
def put(self, bucket, fnm, binary, tenant_id=None):
|
||||
# RENAMED PARAMETER: bucket_name -> bucket (to match interface)
|
||||
for _ in range(3):
|
||||
try:
|
||||
bucket_obj = self.client.bucket(self.bucket_name)
|
||||
blob_path = self._get_blob_path(bucket, fnm)
|
||||
blob = bucket_obj.blob(blob_path)
|
||||
|
||||
blob.upload_from_file(BytesIO(binary), content_type='application/octet-stream')
|
||||
return True
|
||||
except NotFound:
|
||||
logging.error(f"Fail to put: Main bucket {self.bucket_name} does not exist.")
|
||||
return False
|
||||
except Exception:
|
||||
logging.exception(f"Fail to put {bucket}/{fnm}:")
|
||||
self.__open__()
|
||||
time.sleep(1)
|
||||
return False
|
||||
|
||||
def rm(self, bucket, fnm, tenant_id=None):
|
||||
# RENAMED PARAMETER: bucket_name -> bucket
|
||||
try:
|
||||
bucket_obj = self.client.bucket(self.bucket_name)
|
||||
blob_path = self._get_blob_path(bucket, fnm)
|
||||
blob = bucket_obj.blob(blob_path)
|
||||
blob.delete()
|
||||
except NotFound:
|
||||
pass
|
||||
except Exception:
|
||||
logging.exception(f"Fail to remove {bucket}/{fnm}:")
|
||||
|
||||
def get(self, bucket, filename, tenant_id=None):
|
||||
# RENAMED PARAMETER: bucket_name -> bucket
|
||||
for _ in range(1):
|
||||
try:
|
||||
bucket_obj = self.client.bucket(self.bucket_name)
|
||||
blob_path = self._get_blob_path(bucket, filename)
|
||||
blob = bucket_obj.blob(blob_path)
|
||||
return blob.download_as_bytes()
|
||||
except NotFound:
|
||||
logging.warning(f"File not found {bucket}/{filename} in {self.bucket_name}")
|
||||
return None
|
||||
except Exception:
|
||||
logging.exception(f"Fail to get {bucket}/{filename}")
|
||||
self.__open__()
|
||||
time.sleep(1)
|
||||
return None
|
||||
|
||||
def obj_exist(self, bucket, filename, tenant_id=None):
|
||||
# RENAMED PARAMETER: bucket_name -> bucket
|
||||
try:
|
||||
bucket_obj = self.client.bucket(self.bucket_name)
|
||||
blob_path = self._get_blob_path(bucket, filename)
|
||||
blob = bucket_obj.blob(blob_path)
|
||||
return blob.exists()
|
||||
except Exception:
|
||||
logging.exception(f"obj_exist {bucket}/{filename} got exception")
|
||||
return False
|
||||
|
||||
def bucket_exists(self, bucket):
|
||||
# RENAMED PARAMETER: bucket_name -> bucket
|
||||
try:
|
||||
bucket_obj = self.client.bucket(self.bucket_name)
|
||||
return bucket_obj.exists()
|
||||
except Exception:
|
||||
logging.exception(f"bucket_exist check for {self.bucket_name} got exception")
|
||||
return False
|
||||
|
||||
def get_presigned_url(self, bucket, fnm, expires, tenant_id=None):
|
||||
# RENAMED PARAMETER: bucket_name -> bucket
|
||||
for _ in range(10):
|
||||
try:
|
||||
bucket_obj = self.client.bucket(self.bucket_name)
|
||||
blob_path = self._get_blob_path(bucket, fnm)
|
||||
blob = bucket_obj.blob(blob_path)
|
||||
|
||||
expiration = expires
|
||||
if isinstance(expires, int):
|
||||
expiration = datetime.timedelta(seconds=expires)
|
||||
|
||||
url = blob.generate_signed_url(
|
||||
version="v4",
|
||||
expiration=expiration,
|
||||
method="GET"
|
||||
)
|
||||
return url
|
||||
except Exception:
|
||||
logging.exception(f"Fail to get_presigned {bucket}/{fnm}:")
|
||||
self.__open__()
|
||||
time.sleep(1)
|
||||
return None
|
||||
|
||||
def remove_bucket(self, bucket):
|
||||
# RENAMED PARAMETER: bucket_name -> bucket
|
||||
try:
|
||||
bucket_obj = self.client.bucket(self.bucket_name)
|
||||
prefix = f"{bucket}/"
|
||||
|
||||
blobs = list(self.client.list_blobs(self.bucket_name, prefix=prefix))
|
||||
|
||||
if blobs:
|
||||
bucket_obj.delete_blobs(blobs)
|
||||
except Exception:
|
||||
logging.exception(f"Fail to remove virtual bucket (folder) {bucket}")
|
||||
|
||||
def copy(self, src_bucket, src_path, dest_bucket, dest_path):
|
||||
# RENAMED PARAMETERS to match original interface
|
||||
try:
|
||||
bucket_obj = self.client.bucket(self.bucket_name)
|
||||
|
||||
src_blob_path = self._get_blob_path(src_bucket, src_path)
|
||||
dest_blob_path = self._get_blob_path(dest_bucket, dest_path)
|
||||
|
||||
src_blob = bucket_obj.blob(src_blob_path)
|
||||
|
||||
if not src_blob.exists():
|
||||
logging.error(f"Source object not found: {src_blob_path}")
|
||||
return False
|
||||
|
||||
bucket_obj.copy_blob(src_blob, bucket_obj, dest_blob_path)
|
||||
return True
|
||||
|
||||
except NotFound:
|
||||
logging.error(f"Copy failed: Main bucket {self.bucket_name} does not exist.")
|
||||
return False
|
||||
except Exception:
|
||||
logging.exception(f"Fail to copy {src_bucket}/{src_path} -> {dest_bucket}/{dest_path}")
|
||||
return False
|
||||
|
||||
def move(self, src_bucket, src_path, dest_bucket, dest_path):
|
||||
try:
|
||||
if self.copy(src_bucket, src_path, dest_bucket, dest_path):
|
||||
self.rm(src_bucket, src_path)
|
||||
return True
|
||||
else:
|
||||
logging.error(f"Copy failed, move aborted: {src_bucket}/{src_path}")
|
||||
return False
|
||||
except Exception:
|
||||
logging.exception(f"Fail to move {src_bucket}/{src_path} -> {dest_bucket}/{dest_path}")
|
||||
return False
|
||||
|
|
@ -1,323 +0,0 @@
|
|||
#
|
||||
# Copyright 2025 The InfiniFlow Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
"""
|
||||
Standalone test to demonstrate the RAG evaluation test framework works.
|
||||
This test doesn't require RAGFlow dependencies.
|
||||
"""
|
||||
|
||||
import pytest
|
||||
from unittest.mock import Mock
|
||||
|
||||
|
||||
class TestEvaluationFrameworkDemo:
|
||||
"""Demo tests to verify the evaluation test framework is working"""
|
||||
|
||||
def test_basic_assertion(self):
|
||||
"""Test basic assertion works"""
|
||||
assert 1 + 1 == 2
|
||||
|
||||
def test_mock_evaluation_service(self):
|
||||
"""Test mocking evaluation service"""
|
||||
mock_service = Mock()
|
||||
mock_service.create_dataset.return_value = (True, "dataset_123")
|
||||
|
||||
success, dataset_id = mock_service.create_dataset(
|
||||
name="Test Dataset",
|
||||
kb_ids=["kb_1"]
|
||||
)
|
||||
|
||||
assert success is True
|
||||
assert dataset_id == "dataset_123"
|
||||
mock_service.create_dataset.assert_called_once()
|
||||
|
||||
def test_mock_test_case_addition(self):
|
||||
"""Test mocking test case addition"""
|
||||
mock_service = Mock()
|
||||
mock_service.add_test_case.return_value = (True, "case_123")
|
||||
|
||||
success, case_id = mock_service.add_test_case(
|
||||
dataset_id="dataset_123",
|
||||
question="Test question?",
|
||||
reference_answer="Test answer"
|
||||
)
|
||||
|
||||
assert success is True
|
||||
assert case_id == "case_123"
|
||||
|
||||
def test_mock_evaluation_run(self):
|
||||
"""Test mocking evaluation run"""
|
||||
mock_service = Mock()
|
||||
mock_service.start_evaluation.return_value = (True, "run_123")
|
||||
|
||||
success, run_id = mock_service.start_evaluation(
|
||||
dataset_id="dataset_123",
|
||||
dialog_id="dialog_456",
|
||||
user_id="user_1"
|
||||
)
|
||||
|
||||
assert success is True
|
||||
assert run_id == "run_123"
|
||||
|
||||
def test_mock_metrics_computation(self):
|
||||
"""Test mocking metrics computation"""
|
||||
mock_service = Mock()
|
||||
|
||||
# Mock retrieval metrics
|
||||
metrics = {
|
||||
"precision": 0.85,
|
||||
"recall": 0.78,
|
||||
"f1_score": 0.81,
|
||||
"hit_rate": 1.0,
|
||||
"mrr": 0.9
|
||||
}
|
||||
mock_service._compute_retrieval_metrics.return_value = metrics
|
||||
|
||||
result = mock_service._compute_retrieval_metrics(
|
||||
retrieved_ids=["chunk_1", "chunk_2", "chunk_3"],
|
||||
relevant_ids=["chunk_1", "chunk_2", "chunk_4"]
|
||||
)
|
||||
|
||||
assert result["precision"] == 0.85
|
||||
assert result["recall"] == 0.78
|
||||
assert result["f1_score"] == 0.81
|
||||
|
||||
def test_mock_recommendations(self):
|
||||
"""Test mocking recommendations"""
|
||||
mock_service = Mock()
|
||||
|
||||
recommendations = [
|
||||
{
|
||||
"issue": "Low Precision",
|
||||
"severity": "high",
|
||||
"suggestions": [
|
||||
"Increase similarity_threshold",
|
||||
"Enable reranking"
|
||||
]
|
||||
}
|
||||
]
|
||||
mock_service.get_recommendations.return_value = recommendations
|
||||
|
||||
recs = mock_service.get_recommendations("run_123")
|
||||
|
||||
assert len(recs) == 1
|
||||
assert recs[0]["issue"] == "Low Precision"
|
||||
assert len(recs[0]["suggestions"]) == 2
|
||||
|
||||
@pytest.mark.parametrize("precision,recall,expected_f1", [
|
||||
(1.0, 1.0, 1.0),
|
||||
(0.8, 0.6, 0.69),
|
||||
(0.5, 0.5, 0.5),
|
||||
(0.0, 0.0, 0.0),
|
||||
])
|
||||
def test_f1_score_calculation(self, precision, recall, expected_f1):
|
||||
"""Test F1 score calculation with different inputs"""
|
||||
if precision + recall > 0:
|
||||
f1 = 2 * (precision * recall) / (precision + recall)
|
||||
else:
|
||||
f1 = 0.0
|
||||
|
||||
assert abs(f1 - expected_f1) < 0.01
|
||||
|
||||
def test_dataset_list_structure(self):
|
||||
"""Test dataset list structure"""
|
||||
mock_service = Mock()
|
||||
|
||||
expected_result = {
|
||||
"total": 3,
|
||||
"datasets": [
|
||||
{"id": "dataset_1", "name": "Dataset 1"},
|
||||
{"id": "dataset_2", "name": "Dataset 2"},
|
||||
{"id": "dataset_3", "name": "Dataset 3"}
|
||||
]
|
||||
}
|
||||
mock_service.list_datasets.return_value = expected_result
|
||||
|
||||
result = mock_service.list_datasets(
|
||||
tenant_id="tenant_1",
|
||||
user_id="user_1",
|
||||
page=1,
|
||||
page_size=10
|
||||
)
|
||||
|
||||
assert result["total"] == 3
|
||||
assert len(result["datasets"]) == 3
|
||||
assert result["datasets"][0]["id"] == "dataset_1"
|
||||
|
||||
def test_evaluation_run_status_flow(self):
|
||||
"""Test evaluation run status transitions"""
|
||||
mock_service = Mock()
|
||||
|
||||
# Simulate status progression
|
||||
statuses = ["PENDING", "RUNNING", "COMPLETED"]
|
||||
|
||||
for status in statuses:
|
||||
mock_run = {"id": "run_123", "status": status}
|
||||
mock_service.get_run_results.return_value = {"run": mock_run}
|
||||
|
||||
result = mock_service.get_run_results("run_123")
|
||||
assert result["run"]["status"] == status
|
||||
|
||||
def test_bulk_import_success_count(self):
|
||||
"""Test bulk import success/failure counting"""
|
||||
mock_service = Mock()
|
||||
|
||||
# Simulate 8 successes, 2 failures
|
||||
mock_service.import_test_cases.return_value = (8, 2)
|
||||
|
||||
success_count, failure_count = mock_service.import_test_cases(
|
||||
dataset_id="dataset_123",
|
||||
cases=[{"question": f"Q{i}"} for i in range(10)]
|
||||
)
|
||||
|
||||
assert success_count == 8
|
||||
assert failure_count == 2
|
||||
assert success_count + failure_count == 10
|
||||
|
||||
def test_metrics_summary_aggregation(self):
|
||||
"""Test metrics summary aggregation"""
|
||||
results = [
|
||||
{"metrics": {"precision": 0.9, "recall": 0.8}, "execution_time": 1.2},
|
||||
{"metrics": {"precision": 0.8, "recall": 0.7}, "execution_time": 1.5},
|
||||
{"metrics": {"precision": 0.85, "recall": 0.75}, "execution_time": 1.3}
|
||||
]
|
||||
|
||||
# Calculate averages
|
||||
avg_precision = sum(r["metrics"]["precision"] for r in results) / len(results)
|
||||
avg_recall = sum(r["metrics"]["recall"] for r in results) / len(results)
|
||||
avg_time = sum(r["execution_time"] for r in results) / len(results)
|
||||
|
||||
assert abs(avg_precision - 0.85) < 0.01
|
||||
assert abs(avg_recall - 0.75) < 0.01
|
||||
assert abs(avg_time - 1.33) < 0.01
|
||||
|
||||
def test_recommendation_severity_levels(self):
|
||||
"""Test recommendation severity levels"""
|
||||
severities = ["low", "medium", "high", "critical"]
|
||||
|
||||
for severity in severities:
|
||||
rec = {
|
||||
"issue": "Test Issue",
|
||||
"severity": severity,
|
||||
"suggestions": ["Fix it"]
|
||||
}
|
||||
assert rec["severity"] in severities
|
||||
|
||||
def test_empty_dataset_handling(self):
|
||||
"""Test handling of empty datasets"""
|
||||
mock_service = Mock()
|
||||
mock_service.get_test_cases.return_value = []
|
||||
|
||||
cases = mock_service.get_test_cases("empty_dataset")
|
||||
|
||||
assert len(cases) == 0
|
||||
assert isinstance(cases, list)
|
||||
|
||||
def test_error_handling(self):
|
||||
"""Test error handling in service"""
|
||||
mock_service = Mock()
|
||||
mock_service.create_dataset.return_value = (False, "Dataset name cannot be empty")
|
||||
|
||||
success, error = mock_service.create_dataset(name="", kb_ids=[])
|
||||
|
||||
assert success is False
|
||||
assert "empty" in error.lower()
|
||||
|
||||
def test_pagination_logic(self):
|
||||
"""Test pagination logic"""
|
||||
total_items = 50
|
||||
page_size = 10
|
||||
page = 2
|
||||
|
||||
# Calculate expected items for page 2
|
||||
start = (page - 1) * page_size
|
||||
end = min(start + page_size, total_items)
|
||||
expected_count = end - start
|
||||
|
||||
assert expected_count == 10
|
||||
assert start == 10
|
||||
assert end == 20
|
||||
|
||||
|
||||
class TestMetricsCalculations:
|
||||
"""Test metric calculation logic"""
|
||||
|
||||
def test_precision_calculation(self):
|
||||
"""Test precision calculation"""
|
||||
retrieved = {"chunk_1", "chunk_2", "chunk_3", "chunk_4"}
|
||||
relevant = {"chunk_1", "chunk_2", "chunk_5"}
|
||||
|
||||
precision = len(retrieved & relevant) / len(retrieved)
|
||||
|
||||
assert precision == 0.5 # 2 out of 4
|
||||
|
||||
def test_recall_calculation(self):
|
||||
"""Test recall calculation"""
|
||||
retrieved = {"chunk_1", "chunk_2", "chunk_3", "chunk_4"}
|
||||
relevant = {"chunk_1", "chunk_2", "chunk_5"}
|
||||
|
||||
recall = len(retrieved & relevant) / len(relevant)
|
||||
|
||||
assert abs(recall - 0.67) < 0.01 # 2 out of 3
|
||||
|
||||
def test_hit_rate_positive(self):
|
||||
"""Test hit rate when relevant chunk is found"""
|
||||
retrieved = {"chunk_1", "chunk_2", "chunk_3"}
|
||||
relevant = {"chunk_2", "chunk_4"}
|
||||
|
||||
hit_rate = 1.0 if (retrieved & relevant) else 0.0
|
||||
|
||||
assert hit_rate == 1.0
|
||||
|
||||
def test_hit_rate_negative(self):
|
||||
"""Test hit rate when no relevant chunk is found"""
|
||||
retrieved = {"chunk_1", "chunk_2", "chunk_3"}
|
||||
relevant = {"chunk_4", "chunk_5"}
|
||||
|
||||
hit_rate = 1.0 if (retrieved & relevant) else 0.0
|
||||
|
||||
assert hit_rate == 0.0
|
||||
|
||||
def test_mrr_calculation(self):
|
||||
"""Test MRR calculation"""
|
||||
retrieved_ids = ["chunk_1", "chunk_2", "chunk_3", "chunk_4"]
|
||||
relevant_ids = {"chunk_3", "chunk_5"}
|
||||
|
||||
mrr = 0.0
|
||||
for i, chunk_id in enumerate(retrieved_ids, 1):
|
||||
if chunk_id in relevant_ids:
|
||||
mrr = 1.0 / i
|
||||
break
|
||||
|
||||
assert abs(mrr - 0.33) < 0.01 # First relevant at position 3
|
||||
|
||||
|
||||
# Summary test
|
||||
def test_evaluation_framework_summary():
|
||||
"""
|
||||
Summary test to confirm all evaluation framework features work.
|
||||
This test verifies that:
|
||||
- Basic assertions work
|
||||
- Mocking works for all service methods
|
||||
- Metrics calculations are correct
|
||||
- Error handling works
|
||||
- Pagination logic works
|
||||
"""
|
||||
assert True, "Evaluation test framework is working correctly!"
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
pytest.main([__file__, "-v"])
|
||||
|
|
@ -1,557 +0,0 @@
|
|||
#
|
||||
# Copyright 2025 The InfiniFlow Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
"""
|
||||
Unit tests for RAG Evaluation Service
|
||||
|
||||
Tests cover:
|
||||
- Dataset management (CRUD operations)
|
||||
- Test case management
|
||||
- Evaluation execution
|
||||
- Metrics computation
|
||||
- Recommendations generation
|
||||
"""
|
||||
|
||||
import pytest
|
||||
from unittest.mock import patch
|
||||
|
||||
|
||||
class TestEvaluationDatasetManagement:
|
||||
"""Tests for evaluation dataset management"""
|
||||
|
||||
@pytest.fixture
|
||||
def mock_evaluation_service(self):
|
||||
"""Create a mock EvaluationService"""
|
||||
with patch('api.db.services.evaluation_service.EvaluationService') as mock:
|
||||
yield mock
|
||||
|
||||
@pytest.fixture
|
||||
def sample_dataset_data(self):
|
||||
"""Sample dataset data for testing"""
|
||||
return {
|
||||
"name": "Customer Support QA",
|
||||
"description": "Test cases for customer support",
|
||||
"kb_ids": ["kb_123", "kb_456"],
|
||||
"tenant_id": "tenant_1",
|
||||
"user_id": "user_1"
|
||||
}
|
||||
|
||||
def test_create_dataset_success(self, mock_evaluation_service, sample_dataset_data):
|
||||
"""Test successful dataset creation"""
|
||||
mock_evaluation_service.create_dataset.return_value = (True, "dataset_123")
|
||||
|
||||
success, dataset_id = mock_evaluation_service.create_dataset(**sample_dataset_data)
|
||||
|
||||
assert success is True
|
||||
assert dataset_id == "dataset_123"
|
||||
mock_evaluation_service.create_dataset.assert_called_once()
|
||||
|
||||
def test_create_dataset_with_empty_name(self, mock_evaluation_service):
|
||||
"""Test dataset creation with empty name"""
|
||||
data = {
|
||||
"name": "",
|
||||
"description": "Test",
|
||||
"kb_ids": ["kb_123"],
|
||||
"tenant_id": "tenant_1",
|
||||
"user_id": "user_1"
|
||||
}
|
||||
|
||||
mock_evaluation_service.create_dataset.return_value = (False, "Dataset name cannot be empty")
|
||||
success, error = mock_evaluation_service.create_dataset(**data)
|
||||
|
||||
assert success is False
|
||||
assert "name" in error.lower() or "empty" in error.lower()
|
||||
|
||||
def test_create_dataset_with_empty_kb_ids(self, mock_evaluation_service):
|
||||
"""Test dataset creation with empty kb_ids"""
|
||||
data = {
|
||||
"name": "Test Dataset",
|
||||
"description": "Test",
|
||||
"kb_ids": [],
|
||||
"tenant_id": "tenant_1",
|
||||
"user_id": "user_1"
|
||||
}
|
||||
|
||||
mock_evaluation_service.create_dataset.return_value = (False, "kb_ids cannot be empty")
|
||||
success, error = mock_evaluation_service.create_dataset(**data)
|
||||
|
||||
assert success is False
|
||||
|
||||
def test_get_dataset_success(self, mock_evaluation_service):
|
||||
"""Test successful dataset retrieval"""
|
||||
expected_dataset = {
|
||||
"id": "dataset_123",
|
||||
"name": "Test Dataset",
|
||||
"kb_ids": ["kb_123"]
|
||||
}
|
||||
mock_evaluation_service.get_dataset.return_value = expected_dataset
|
||||
|
||||
dataset = mock_evaluation_service.get_dataset("dataset_123")
|
||||
|
||||
assert dataset is not None
|
||||
assert dataset["id"] == "dataset_123"
|
||||
|
||||
def test_get_dataset_not_found(self, mock_evaluation_service):
|
||||
"""Test getting non-existent dataset"""
|
||||
mock_evaluation_service.get_dataset.return_value = None
|
||||
|
||||
dataset = mock_evaluation_service.get_dataset("nonexistent")
|
||||
|
||||
assert dataset is None
|
||||
|
||||
def test_list_datasets(self, mock_evaluation_service):
|
||||
"""Test listing datasets"""
|
||||
expected_result = {
|
||||
"total": 2,
|
||||
"datasets": [
|
||||
{"id": "dataset_1", "name": "Dataset 1"},
|
||||
{"id": "dataset_2", "name": "Dataset 2"}
|
||||
]
|
||||
}
|
||||
mock_evaluation_service.list_datasets.return_value = expected_result
|
||||
|
||||
result = mock_evaluation_service.list_datasets(
|
||||
tenant_id="tenant_1",
|
||||
user_id="user_1",
|
||||
page=1,
|
||||
page_size=20
|
||||
)
|
||||
|
||||
assert result["total"] == 2
|
||||
assert len(result["datasets"]) == 2
|
||||
|
||||
def test_list_datasets_with_pagination(self, mock_evaluation_service):
|
||||
"""Test listing datasets with pagination"""
|
||||
mock_evaluation_service.list_datasets.return_value = {
|
||||
"total": 50,
|
||||
"datasets": [{"id": f"dataset_{i}"} for i in range(10)]
|
||||
}
|
||||
|
||||
result = mock_evaluation_service.list_datasets(
|
||||
tenant_id="tenant_1",
|
||||
user_id="user_1",
|
||||
page=2,
|
||||
page_size=10
|
||||
)
|
||||
|
||||
assert result["total"] == 50
|
||||
assert len(result["datasets"]) == 10
|
||||
|
||||
def test_update_dataset_success(self, mock_evaluation_service):
|
||||
"""Test successful dataset update"""
|
||||
mock_evaluation_service.update_dataset.return_value = True
|
||||
|
||||
success = mock_evaluation_service.update_dataset(
|
||||
"dataset_123",
|
||||
name="Updated Name",
|
||||
description="Updated Description"
|
||||
)
|
||||
|
||||
assert success is True
|
||||
|
||||
def test_update_dataset_not_found(self, mock_evaluation_service):
|
||||
"""Test updating non-existent dataset"""
|
||||
mock_evaluation_service.update_dataset.return_value = False
|
||||
|
||||
success = mock_evaluation_service.update_dataset(
|
||||
"nonexistent",
|
||||
name="Updated Name"
|
||||
)
|
||||
|
||||
assert success is False
|
||||
|
||||
def test_delete_dataset_success(self, mock_evaluation_service):
|
||||
"""Test successful dataset deletion"""
|
||||
mock_evaluation_service.delete_dataset.return_value = True
|
||||
|
||||
success = mock_evaluation_service.delete_dataset("dataset_123")
|
||||
|
||||
assert success is True
|
||||
|
||||
def test_delete_dataset_not_found(self, mock_evaluation_service):
|
||||
"""Test deleting non-existent dataset"""
|
||||
mock_evaluation_service.delete_dataset.return_value = False
|
||||
|
||||
success = mock_evaluation_service.delete_dataset("nonexistent")
|
||||
|
||||
assert success is False
|
||||
|
||||
|
||||
class TestEvaluationTestCaseManagement:
|
||||
"""Tests for test case management"""
|
||||
|
||||
@pytest.fixture
|
||||
def mock_evaluation_service(self):
|
||||
"""Create a mock EvaluationService"""
|
||||
with patch('api.db.services.evaluation_service.EvaluationService') as mock:
|
||||
yield mock
|
||||
|
||||
@pytest.fixture
|
||||
def sample_test_case(self):
|
||||
"""Sample test case data"""
|
||||
return {
|
||||
"dataset_id": "dataset_123",
|
||||
"question": "How do I reset my password?",
|
||||
"reference_answer": "Click on 'Forgot Password' and follow the email instructions.",
|
||||
"relevant_doc_ids": ["doc_789"],
|
||||
"relevant_chunk_ids": ["chunk_101", "chunk_102"]
|
||||
}
|
||||
|
||||
def test_add_test_case_success(self, mock_evaluation_service, sample_test_case):
|
||||
"""Test successful test case addition"""
|
||||
mock_evaluation_service.add_test_case.return_value = (True, "case_123")
|
||||
|
||||
success, case_id = mock_evaluation_service.add_test_case(**sample_test_case)
|
||||
|
||||
assert success is True
|
||||
assert case_id == "case_123"
|
||||
|
||||
def test_add_test_case_with_empty_question(self, mock_evaluation_service):
|
||||
"""Test adding test case with empty question"""
|
||||
mock_evaluation_service.add_test_case.return_value = (False, "Question cannot be empty")
|
||||
|
||||
success, error = mock_evaluation_service.add_test_case(
|
||||
dataset_id="dataset_123",
|
||||
question=""
|
||||
)
|
||||
|
||||
assert success is False
|
||||
assert "question" in error.lower() or "empty" in error.lower()
|
||||
|
||||
def test_add_test_case_without_reference_answer(self, mock_evaluation_service):
|
||||
"""Test adding test case without reference answer (optional)"""
|
||||
mock_evaluation_service.add_test_case.return_value = (True, "case_123")
|
||||
|
||||
success, case_id = mock_evaluation_service.add_test_case(
|
||||
dataset_id="dataset_123",
|
||||
question="Test question",
|
||||
reference_answer=None
|
||||
)
|
||||
|
||||
assert success is True
|
||||
|
||||
def test_get_test_cases(self, mock_evaluation_service):
|
||||
"""Test getting all test cases for a dataset"""
|
||||
expected_cases = [
|
||||
{"id": "case_1", "question": "Question 1"},
|
||||
{"id": "case_2", "question": "Question 2"}
|
||||
]
|
||||
mock_evaluation_service.get_test_cases.return_value = expected_cases
|
||||
|
||||
cases = mock_evaluation_service.get_test_cases("dataset_123")
|
||||
|
||||
assert len(cases) == 2
|
||||
assert cases[0]["id"] == "case_1"
|
||||
|
||||
def test_get_test_cases_empty_dataset(self, mock_evaluation_service):
|
||||
"""Test getting test cases from empty dataset"""
|
||||
mock_evaluation_service.get_test_cases.return_value = []
|
||||
|
||||
cases = mock_evaluation_service.get_test_cases("dataset_123")
|
||||
|
||||
assert len(cases) == 0
|
||||
|
||||
def test_delete_test_case_success(self, mock_evaluation_service):
|
||||
"""Test successful test case deletion"""
|
||||
mock_evaluation_service.delete_test_case.return_value = True
|
||||
|
||||
success = mock_evaluation_service.delete_test_case("case_123")
|
||||
|
||||
assert success is True
|
||||
|
||||
def test_import_test_cases_success(self, mock_evaluation_service):
|
||||
"""Test bulk import of test cases"""
|
||||
cases = [
|
||||
{"question": "Question 1", "reference_answer": "Answer 1"},
|
||||
{"question": "Question 2", "reference_answer": "Answer 2"},
|
||||
{"question": "Question 3", "reference_answer": "Answer 3"}
|
||||
]
|
||||
mock_evaluation_service.import_test_cases.return_value = (3, 0)
|
||||
|
||||
success_count, failure_count = mock_evaluation_service.import_test_cases(
|
||||
"dataset_123",
|
||||
cases
|
||||
)
|
||||
|
||||
assert success_count == 3
|
||||
assert failure_count == 0
|
||||
|
||||
def test_import_test_cases_with_failures(self, mock_evaluation_service):
|
||||
"""Test bulk import with some failures"""
|
||||
cases = [
|
||||
{"question": "Question 1"},
|
||||
{"question": ""}, # Invalid
|
||||
{"question": "Question 3"}
|
||||
]
|
||||
mock_evaluation_service.import_test_cases.return_value = (2, 1)
|
||||
|
||||
success_count, failure_count = mock_evaluation_service.import_test_cases(
|
||||
"dataset_123",
|
||||
cases
|
||||
)
|
||||
|
||||
assert success_count == 2
|
||||
assert failure_count == 1
|
||||
|
||||
|
||||
class TestEvaluationExecution:
|
||||
"""Tests for evaluation execution"""
|
||||
|
||||
@pytest.fixture
|
||||
def mock_evaluation_service(self):
|
||||
"""Create a mock EvaluationService"""
|
||||
with patch('api.db.services.evaluation_service.EvaluationService') as mock:
|
||||
yield mock
|
||||
|
||||
def test_start_evaluation_success(self, mock_evaluation_service):
|
||||
"""Test successful evaluation start"""
|
||||
mock_evaluation_service.start_evaluation.return_value = (True, "run_123")
|
||||
|
||||
success, run_id = mock_evaluation_service.start_evaluation(
|
||||
dataset_id="dataset_123",
|
||||
dialog_id="dialog_456",
|
||||
user_id="user_1"
|
||||
)
|
||||
|
||||
assert success is True
|
||||
assert run_id == "run_123"
|
||||
|
||||
def test_start_evaluation_with_invalid_dialog(self, mock_evaluation_service):
|
||||
"""Test starting evaluation with invalid dialog"""
|
||||
mock_evaluation_service.start_evaluation.return_value = (False, "Dialog not found")
|
||||
|
||||
success, error = mock_evaluation_service.start_evaluation(
|
||||
dataset_id="dataset_123",
|
||||
dialog_id="nonexistent",
|
||||
user_id="user_1"
|
||||
)
|
||||
|
||||
assert success is False
|
||||
assert "dialog" in error.lower()
|
||||
|
||||
def test_start_evaluation_with_custom_name(self, mock_evaluation_service):
|
||||
"""Test starting evaluation with custom name"""
|
||||
mock_evaluation_service.start_evaluation.return_value = (True, "run_123")
|
||||
|
||||
success, run_id = mock_evaluation_service.start_evaluation(
|
||||
dataset_id="dataset_123",
|
||||
dialog_id="dialog_456",
|
||||
user_id="user_1",
|
||||
name="My Custom Evaluation"
|
||||
)
|
||||
|
||||
assert success is True
|
||||
|
||||
def test_get_run_results(self, mock_evaluation_service):
|
||||
"""Test getting evaluation run results"""
|
||||
expected_results = {
|
||||
"run": {
|
||||
"id": "run_123",
|
||||
"status": "COMPLETED",
|
||||
"metrics_summary": {
|
||||
"avg_precision": 0.85,
|
||||
"avg_recall": 0.78
|
||||
}
|
||||
},
|
||||
"results": [
|
||||
{"case_id": "case_1", "metrics": {"precision": 0.9}},
|
||||
{"case_id": "case_2", "metrics": {"precision": 0.8}}
|
||||
]
|
||||
}
|
||||
mock_evaluation_service.get_run_results.return_value = expected_results
|
||||
|
||||
results = mock_evaluation_service.get_run_results("run_123")
|
||||
|
||||
assert results["run"]["id"] == "run_123"
|
||||
assert len(results["results"]) == 2
|
||||
|
||||
def test_get_run_results_not_found(self, mock_evaluation_service):
|
||||
"""Test getting results for non-existent run"""
|
||||
mock_evaluation_service.get_run_results.return_value = {}
|
||||
|
||||
results = mock_evaluation_service.get_run_results("nonexistent")
|
||||
|
||||
assert results == {}
|
||||
|
||||
|
||||
class TestEvaluationMetrics:
|
||||
"""Tests for metrics computation"""
|
||||
|
||||
@pytest.fixture
|
||||
def mock_evaluation_service(self):
|
||||
"""Create a mock EvaluationService"""
|
||||
with patch('api.db.services.evaluation_service.EvaluationService') as mock:
|
||||
yield mock
|
||||
|
||||
def test_compute_retrieval_metrics_perfect_match(self, mock_evaluation_service):
|
||||
"""Test retrieval metrics with perfect match"""
|
||||
retrieved_ids = ["chunk_1", "chunk_2", "chunk_3"]
|
||||
relevant_ids = ["chunk_1", "chunk_2", "chunk_3"]
|
||||
|
||||
expected_metrics = {
|
||||
"precision": 1.0,
|
||||
"recall": 1.0,
|
||||
"f1_score": 1.0,
|
||||
"hit_rate": 1.0,
|
||||
"mrr": 1.0
|
||||
}
|
||||
mock_evaluation_service._compute_retrieval_metrics.return_value = expected_metrics
|
||||
|
||||
metrics = mock_evaluation_service._compute_retrieval_metrics(retrieved_ids, relevant_ids)
|
||||
|
||||
assert metrics["precision"] == 1.0
|
||||
assert metrics["recall"] == 1.0
|
||||
assert metrics["f1_score"] == 1.0
|
||||
|
||||
def test_compute_retrieval_metrics_partial_match(self, mock_evaluation_service):
|
||||
"""Test retrieval metrics with partial match"""
|
||||
retrieved_ids = ["chunk_1", "chunk_2", "chunk_4", "chunk_5"]
|
||||
relevant_ids = ["chunk_1", "chunk_2", "chunk_3"]
|
||||
|
||||
expected_metrics = {
|
||||
"precision": 0.5, # 2 out of 4 retrieved are relevant
|
||||
"recall": 0.67, # 2 out of 3 relevant were retrieved
|
||||
"f1_score": 0.57,
|
||||
"hit_rate": 1.0, # At least one relevant was retrieved
|
||||
"mrr": 1.0 # First retrieved is relevant
|
||||
}
|
||||
mock_evaluation_service._compute_retrieval_metrics.return_value = expected_metrics
|
||||
|
||||
metrics = mock_evaluation_service._compute_retrieval_metrics(retrieved_ids, relevant_ids)
|
||||
|
||||
assert metrics["precision"] < 1.0
|
||||
assert metrics["recall"] < 1.0
|
||||
assert metrics["hit_rate"] == 1.0
|
||||
|
||||
def test_compute_retrieval_metrics_no_match(self, mock_evaluation_service):
|
||||
"""Test retrieval metrics with no match"""
|
||||
retrieved_ids = ["chunk_4", "chunk_5", "chunk_6"]
|
||||
relevant_ids = ["chunk_1", "chunk_2", "chunk_3"]
|
||||
|
||||
expected_metrics = {
|
||||
"precision": 0.0,
|
||||
"recall": 0.0,
|
||||
"f1_score": 0.0,
|
||||
"hit_rate": 0.0,
|
||||
"mrr": 0.0
|
||||
}
|
||||
mock_evaluation_service._compute_retrieval_metrics.return_value = expected_metrics
|
||||
|
||||
metrics = mock_evaluation_service._compute_retrieval_metrics(retrieved_ids, relevant_ids)
|
||||
|
||||
assert metrics["precision"] == 0.0
|
||||
assert metrics["recall"] == 0.0
|
||||
assert metrics["hit_rate"] == 0.0
|
||||
|
||||
def test_compute_summary_metrics(self, mock_evaluation_service):
|
||||
"""Test summary metrics computation"""
|
||||
results = [
|
||||
{"metrics": {"precision": 0.9, "recall": 0.8}, "execution_time": 1.2},
|
||||
{"metrics": {"precision": 0.8, "recall": 0.7}, "execution_time": 1.5},
|
||||
{"metrics": {"precision": 0.85, "recall": 0.75}, "execution_time": 1.3}
|
||||
]
|
||||
|
||||
expected_summary = {
|
||||
"total_cases": 3,
|
||||
"avg_execution_time": 1.33,
|
||||
"avg_precision": 0.85,
|
||||
"avg_recall": 0.75
|
||||
}
|
||||
mock_evaluation_service._compute_summary_metrics.return_value = expected_summary
|
||||
|
||||
summary = mock_evaluation_service._compute_summary_metrics(results)
|
||||
|
||||
assert summary["total_cases"] == 3
|
||||
assert summary["avg_precision"] > 0.8
|
||||
|
||||
|
||||
class TestEvaluationRecommendations:
|
||||
"""Tests for configuration recommendations"""
|
||||
|
||||
@pytest.fixture
|
||||
def mock_evaluation_service(self):
|
||||
"""Create a mock EvaluationService"""
|
||||
with patch('api.db.services.evaluation_service.EvaluationService') as mock:
|
||||
yield mock
|
||||
|
||||
def test_get_recommendations_low_precision(self, mock_evaluation_service):
|
||||
"""Test recommendations for low precision"""
|
||||
recommendations = [
|
||||
{
|
||||
"issue": "Low Precision",
|
||||
"severity": "high",
|
||||
"suggestions": [
|
||||
"Increase similarity_threshold",
|
||||
"Enable reranking"
|
||||
]
|
||||
}
|
||||
]
|
||||
mock_evaluation_service.get_recommendations.return_value = recommendations
|
||||
|
||||
recs = mock_evaluation_service.get_recommendations("run_123")
|
||||
|
||||
assert len(recs) > 0
|
||||
assert any("precision" in r["issue"].lower() for r in recs)
|
||||
|
||||
def test_get_recommendations_low_recall(self, mock_evaluation_service):
|
||||
"""Test recommendations for low recall"""
|
||||
recommendations = [
|
||||
{
|
||||
"issue": "Low Recall",
|
||||
"severity": "high",
|
||||
"suggestions": [
|
||||
"Increase top_k",
|
||||
"Lower similarity_threshold"
|
||||
]
|
||||
}
|
||||
]
|
||||
mock_evaluation_service.get_recommendations.return_value = recommendations
|
||||
|
||||
recs = mock_evaluation_service.get_recommendations("run_123")
|
||||
|
||||
assert len(recs) > 0
|
||||
assert any("recall" in r["issue"].lower() for r in recs)
|
||||
|
||||
def test_get_recommendations_slow_response(self, mock_evaluation_service):
|
||||
"""Test recommendations for slow response time"""
|
||||
recommendations = [
|
||||
{
|
||||
"issue": "Slow Response Time",
|
||||
"severity": "medium",
|
||||
"suggestions": [
|
||||
"Reduce top_k",
|
||||
"Optimize embedding model"
|
||||
]
|
||||
}
|
||||
]
|
||||
mock_evaluation_service.get_recommendations.return_value = recommendations
|
||||
|
||||
recs = mock_evaluation_service.get_recommendations("run_123")
|
||||
|
||||
assert len(recs) > 0
|
||||
assert any("response" in r["issue"].lower() or "slow" in r["issue"].lower() for r in recs)
|
||||
|
||||
def test_get_recommendations_no_issues(self, mock_evaluation_service):
|
||||
"""Test recommendations when metrics are good"""
|
||||
mock_evaluation_service.get_recommendations.return_value = []
|
||||
|
||||
recs = mock_evaluation_service.get_recommendations("run_123")
|
||||
|
||||
assert len(recs) == 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
pytest.main([__file__, "-v"])
|
||||
163
uv.lock
generated
163
uv.lock
generated
|
|
@ -445,25 +445,25 @@ wheels = [
|
|||
|
||||
[[package]]
|
||||
name = "bce-python-sdk"
|
||||
version = "0.9.54"
|
||||
version = "0.9.55"
|
||||
source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" }
|
||||
dependencies = [
|
||||
{ name = "future" },
|
||||
{ name = "pycryptodome" },
|
||||
{ name = "six" },
|
||||
]
|
||||
sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/b3/c8/1c3bc30aa745ad4c3d073f150bddaf1d43ee6ee33f0b8ec60068494f511e/bce_python_sdk-0.9.54.tar.gz", hash = "sha256:f68026f40f11ea38ef445f50a7756009d5b703c7253438b138b30fb3b83be275", size = 275698, upload-time = "2025-11-27T02:28:50.24Z" }
|
||||
sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/ff/ae/f31ee3ccae94e1a07d8886a413f08c1581349e6cb45bf8b3c608fbf173e4/bce_python_sdk-0.9.55.tar.gz", hash = "sha256:bed63f8a0975f2e9daecf53417c3d5b803232ad87f35a0b16e25850710ce209c", size = 275733, upload-time = "2025-12-02T12:02:38.041Z" }
|
||||
wheels = [
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/69/a7/b8806c8505bb830cc863837ef8b42695170dd9561605c61262250df066d3/bce_python_sdk-0.9.54-py3-none-any.whl", hash = "sha256:a084eee577931f15a55280a7401bea2474115989ee79ebbca131610bdce81c99", size = 390447, upload-time = "2025-11-27T02:28:48.603Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/8f/01/1b13a627e5f0239f24b168138d9a948e876d4b387c03f59d31699578c960/bce_python_sdk-0.9.55-py3-none-any.whl", hash = "sha256:6045d19d783b548644cce50a2f41ef5242da6654fb91b2c21629f309ca6dbf4c", size = 390463, upload-time = "2025-12-02T12:02:36.417Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "beartype"
|
||||
version = "0.22.7"
|
||||
version = "0.22.8"
|
||||
source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" }
|
||||
sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/2c/49/e28a77f8a3868b1c9ff6a030678e84de24c4783bae4c12cec9443cf8fb54/beartype-0.22.7.tar.gz", hash = "sha256:c7269855b71e32b7c9f0fc662baade752eb525107266e053338c2f6e8873826b", size = 1599627, upload-time = "2025-11-29T06:49:56.751Z" }
|
||||
sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/8c/1d/794ae2acaa67c8b216d91d5919da2606c2bb14086849ffde7f5555f3a3a5/beartype-0.22.8.tar.gz", hash = "sha256:b19b21c9359722ee3f7cc433f063b3e13997b27ae8226551ea5062e621f61165", size = 1602262, upload-time = "2025-12-03T05:11:10.766Z" }
|
||||
wheels = [
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/f6/0c/a764253610513295b7f57904b91fae1d99c7afd1b16b6eaae06fdfb71fb5/beartype-0.22.7-py3-none-any.whl", hash = "sha256:e13430ac07c61fa4bc54d375970438aeb9aa47a482c529a6f438ce52e18e6f50", size = 1330771, upload-time = "2025-11-29T06:49:54.545Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/14/2a/fbcbf5a025d3e71ddafad7efd43e34ec4362f4d523c3c471b457148fb211/beartype-0.22.8-py3-none-any.whl", hash = "sha256:b832882d04e41a4097bab9f63e6992bc6de58c414ee84cba9b45b67314f5ab2e", size = 1331895, upload-time = "2025-12-03T05:11:08.373Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
|
@ -1910,11 +1910,11 @@ wheels = [
|
|||
|
||||
[[package]]
|
||||
name = "fsspec"
|
||||
version = "2025.10.0"
|
||||
version = "2025.12.0"
|
||||
source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" }
|
||||
sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/24/7f/2747c0d332b9acfa75dc84447a066fdf812b5a6b8d30472b74d309bfe8cb/fsspec-2025.10.0.tar.gz", hash = "sha256:b6789427626f068f9a83ca4e8a3cc050850b6c0f71f99ddb4f542b8266a26a59", size = 309285, upload-time = "2025-10-30T14:58:44.036Z" }
|
||||
sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/b6/27/954057b0d1f53f086f681755207dda6de6c660ce133c829158e8e8fe7895/fsspec-2025.12.0.tar.gz", hash = "sha256:c505de011584597b1060ff778bb664c1bc022e87921b0e4f10cc9c44f9635973", size = 309748, upload-time = "2025-12-03T15:23:42.687Z" }
|
||||
wheels = [
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/eb/02/a6b21098b1d5d6249b7c5ab69dde30108a71e4e819d4a9778f1de1d5b70d/fsspec-2025.10.0-py3-none-any.whl", hash = "sha256:7c7712353ae7d875407f97715f0e1ffcc21e33d5b24556cb1e090ae9409ec61d", size = 200966, upload-time = "2025-10-30T14:58:42.53Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/51/c7/b64cae5dba3a1b138d7123ec36bb5ccd39d39939f18454407e5468f4763f/fsspec-2025.12.0-py3-none-any.whl", hash = "sha256:8bf1fe301b7d8acfa6e8571e3b1c3d158f909666642431cc78a1b7b4dbc5ec5b", size = 201422, upload-time = "2025-12-03T15:23:41.434Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
|
@ -2022,16 +2022,21 @@ wheels = [
|
|||
|
||||
[[package]]
|
||||
name = "google-auth"
|
||||
version = "2.41.1"
|
||||
version = "2.43.0"
|
||||
source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" }
|
||||
dependencies = [
|
||||
{ name = "cachetools" },
|
||||
{ name = "pyasn1-modules" },
|
||||
{ name = "rsa" },
|
||||
]
|
||||
sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/a8/af/5129ce5b2f9688d2fa49b463e544972a7c82b0fdb50980dafee92e121d9f/google_auth-2.41.1.tar.gz", hash = "sha256:b76b7b1f9e61f0cb7e88870d14f6a94aeef248959ef6992670efee37709cbfd2", size = 292284, upload-time = "2025-09-30T22:51:26.363Z" }
|
||||
sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/ff/ef/66d14cf0e01b08d2d51ffc3c20410c4e134a1548fc246a6081eae585a4fe/google_auth-2.43.0.tar.gz", hash = "sha256:88228eee5fc21b62a1b5fe773ca15e67778cb07dc8363adcb4a8827b52d81483", size = 296359, upload-time = "2025-11-06T00:13:36.587Z" }
|
||||
wheels = [
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/be/a4/7319a2a8add4cc352be9e3efeff5e2aacee917c85ca2fa1647e29089983c/google_auth-2.41.1-py2.py3-none-any.whl", hash = "sha256:754843be95575b9a19c604a848a41be03f7f2afd8c019f716dc1f51ee41c639d", size = 221302, upload-time = "2025-09-30T22:51:24.212Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/6f/d1/385110a9ae86d91cc14c5282c61fe9f4dc41c0b9f7d423c6ad77038c4448/google_auth-2.43.0-py2.py3-none-any.whl", hash = "sha256:af628ba6fa493f75c7e9dbe9373d148ca9f4399b5ea29976519e0a3848eddd16", size = 223114, upload-time = "2025-11-06T00:13:35.209Z" },
|
||||
]
|
||||
|
||||
[package.optional-dependencies]
|
||||
requests = [
|
||||
{ name = "requests" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
|
@ -2049,15 +2054,15 @@ wheels = [
|
|||
|
||||
[[package]]
|
||||
name = "google-auth-oauthlib"
|
||||
version = "1.2.3"
|
||||
version = "1.2.2"
|
||||
source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" }
|
||||
dependencies = [
|
||||
{ name = "google-auth" },
|
||||
{ name = "requests-oauthlib" },
|
||||
]
|
||||
sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/86/a6/c6336a6ceb682709a4aa39e2e6b5754a458075ca92359512b6cbfcb25ae3/google_auth_oauthlib-1.2.3.tar.gz", hash = "sha256:eb09e450d3cc789ecbc2b3529cb94a713673fd5f7a22c718ad91cf75aedc2ea4", size = 21265, upload-time = "2025-10-30T21:28:19.105Z" }
|
||||
sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/fb/87/e10bf24f7bcffc1421b84d6f9c3377c30ec305d082cd737ddaa6d8f77f7c/google_auth_oauthlib-1.2.2.tar.gz", hash = "sha256:11046fb8d3348b296302dd939ace8af0a724042e8029c1b872d87fabc9f41684", size = 20955, upload-time = "2025-04-22T16:40:29.172Z" }
|
||||
wheels = [
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/38/07/a54c100da461ffc5968457823fcc665a48fb4b875c68bcfecbfe24a10dbe/google_auth_oauthlib-1.2.3-py3-none-any.whl", hash = "sha256:7c0940e037677f25e71999607493640d071212e7f3c15aa0febea4c47a5a0680", size = 19184, upload-time = "2025-10-30T21:28:17.88Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/ac/84/40ee070be95771acd2f4418981edb834979424565c3eec3cd88b6aa09d24/google_auth_oauthlib-1.2.2-py3-none-any.whl", hash = "sha256:fd619506f4b3908b5df17b65f39ca8d66ea56986e5472eb5978fd8f3786f00a2", size = 19072, upload-time = "2025-04-22T16:40:28.174Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
|
@ -2177,11 +2182,11 @@ wheels = [
|
|||
|
||||
[[package]]
|
||||
name = "google-genai"
|
||||
version = "1.52.0"
|
||||
version = "1.53.0"
|
||||
source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" }
|
||||
dependencies = [
|
||||
{ name = "anyio" },
|
||||
{ name = "google-auth" },
|
||||
{ name = "google-auth", extra = ["requests"] },
|
||||
{ name = "httpx" },
|
||||
{ name = "pydantic" },
|
||||
{ name = "requests" },
|
||||
|
|
@ -2189,9 +2194,9 @@ dependencies = [
|
|||
{ name = "typing-extensions" },
|
||||
{ name = "websockets" },
|
||||
]
|
||||
sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/09/4e/0ad8585d05312074bb69711b2d81cfed69ce0ae441913d57bf169bed20a7/google_genai-1.52.0.tar.gz", hash = "sha256:a74e8a4b3025f23aa98d6a0f84783119012ca6c336fd68f73c5d2b11465d7fc5", size = 258743, upload-time = "2025-11-21T02:18:55.742Z" }
|
||||
sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/de/b3/36fbfde2e21e6d3bc67780b61da33632f495ab1be08076cf0a16af74098f/google_genai-1.53.0.tar.gz", hash = "sha256:938a26d22f3fd32c6eeeb4276ef204ef82884e63af9842ce3eac05ceb39cbd8d", size = 260102, upload-time = "2025-12-03T17:21:23.233Z" }
|
||||
wheels = [
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/ec/66/03f663e7bca7abe9ccfebe6cb3fe7da9a118fd723a5abb278d6117e7990e/google_genai-1.52.0-py3-none-any.whl", hash = "sha256:c8352b9f065ae14b9322b949c7debab8562982f03bf71d44130cd2b798c20743", size = 261219, upload-time = "2025-11-21T02:18:54.515Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/40/f2/97fefdd1ad1f3428321bac819ae7a83ccc59f6439616054736b7819fa56c/google_genai-1.53.0-py3-none-any.whl", hash = "sha256:65a3f99e5c03c372d872cda7419f5940e723374bb12a2f3ffd5e3e56e8eb2094", size = 262015, upload-time = "2025-12-03T17:21:21.934Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
|
@ -2776,7 +2781,7 @@ wheels = [
|
|||
|
||||
[[package]]
|
||||
name = "infinity-sdk"
|
||||
version = "0.6.8"
|
||||
version = "0.6.10"
|
||||
source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" }
|
||||
dependencies = [
|
||||
{ name = "datrie" },
|
||||
|
|
@ -2795,9 +2800,9 @@ dependencies = [
|
|||
{ name = "sqlglot", extra = ["rs"] },
|
||||
{ name = "thrift" },
|
||||
]
|
||||
sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/a3/2d/4b699d62202319e5cbbcb4a7d9e87a86dde7ba7c767d0af4ebbee3de8419/infinity_sdk-0.6.8.tar.gz", hash = "sha256:e91c1f6cdf2fa41bc615c72be2a9e981211bd05b34522c1d27f1b825b905b125", size = 72669, upload-time = "2025-12-02T05:09:29.377Z" }
|
||||
sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/b4/e5/88fdcfe42835c5494a08f02b64762a98e04dae4ad49f7dfabac18ee01928/infinity_sdk-0.6.10.tar.gz", hash = "sha256:b55c296ca3b2c8c2f4568f359dd8a50772e9432f09b64667140e9804bf780436", size = 29502969, upload-time = "2025-12-04T02:42:17.882Z" }
|
||||
wheels = [
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/ab/08/59ed1261ee80d3b2c5a80313a013a94cae83ce90ff1da1ef488055944a7b/infinity_sdk-0.6.8-py3-none-any.whl", hash = "sha256:392f942a2073a5b545261dad9859b217c6a0331ede606c8894e7ae335f2ead5e", size = 81564, upload-time = "2025-12-02T05:09:27.784Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/97/99/8857ea0805bd83fe092f5dca914a31f9fcc731c3800264657bd3ba950a1d/infinity_sdk-0.6.10-py3-none-any.whl", hash = "sha256:8f605039ec73d1b05d219105fbabef186e0178fddbad058c2c06c4873be48651", size = 29722107, upload-time = "2025-12-04T02:42:04.101Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
|
@ -3077,7 +3082,7 @@ wheels = [
|
|||
|
||||
[[package]]
|
||||
name = "langfuse"
|
||||
version = "3.10.3"
|
||||
version = "3.10.5"
|
||||
source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" }
|
||||
dependencies = [
|
||||
{ name = "backoff" },
|
||||
|
|
@ -3091,9 +3096,9 @@ dependencies = [
|
|||
{ name = "requests" },
|
||||
{ name = "wrapt" },
|
||||
]
|
||||
sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/7b/03/c4316cb0a91cff97118c21b973b3089c2fe1bdbcad02f3623d6ac572e954/langfuse-3.10.3.tar.gz", hash = "sha256:69d6eaf573212f8cdc1cebd2d6b47f271bfe76c7eb5a3c5d6766bb0d9bf0004c", size = 226617, upload-time = "2025-12-01T18:01:02.607Z" }
|
||||
sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/69/21/dff0434290512484436bfa108e36f0adc3457eb4117767de70e76a411cac/langfuse-3.10.5.tar.gz", hash = "sha256:14eb767663f7e7480cd1cd1b3ca457022817c129e666efe97e5c80adb8c5aac0", size = 223142, upload-time = "2025-12-03T17:49:39.747Z" }
|
||||
wheels = [
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/fd/04/f07c2a23f2822f73f8576b1ba7348c014c4be65127384b4bee475f913f3b/langfuse-3.10.3-py3-none-any.whl", hash = "sha256:b9a2e6506f8f0923c2f4b8c9e3fa355231994197a17f75509a37f335660ce334", size = 399062, upload-time = "2025-12-01T18:01:00.688Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/64/6f/dc15775f82d38da62cd2015110f5802bb175a9ee731a4533fe2a0cdf75b6/langfuse-3.10.5-py3-none-any.whl", hash = "sha256:0223a64109a4293b9bd9b2e0e3229f53b75291cd96341e42cc3eba186973fcdb", size = 398888, upload-time = "2025-12-03T17:49:38.171Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
|
@ -4043,32 +4048,32 @@ wheels = [
|
|||
|
||||
[[package]]
|
||||
name = "opentelemetry-api"
|
||||
version = "1.38.0"
|
||||
version = "1.39.0"
|
||||
source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" }
|
||||
dependencies = [
|
||||
{ name = "importlib-metadata" },
|
||||
{ name = "typing-extensions" },
|
||||
]
|
||||
sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/08/d8/0f354c375628e048bd0570645b310797299754730079853095bf000fba69/opentelemetry_api-1.38.0.tar.gz", hash = "sha256:f4c193b5e8acb0912b06ac5b16321908dd0843d75049c091487322284a3eea12", size = 65242, upload-time = "2025-10-16T08:35:50.25Z" }
|
||||
sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/c0/0b/e5428c009d4d9af0515b0a8371a8aaae695371af291f45e702f7969dce6b/opentelemetry_api-1.39.0.tar.gz", hash = "sha256:6130644268c5ac6bdffaf660ce878f10906b3e789f7e2daa5e169b047a2933b9", size = 65763, upload-time = "2025-12-03T13:19:56.378Z" }
|
||||
wheels = [
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/ae/a2/d86e01c28300bd41bab8f18afd613676e2bd63515417b77636fc1add426f/opentelemetry_api-1.38.0-py3-none-any.whl", hash = "sha256:2891b0197f47124454ab9f0cf58f3be33faca394457ac3e09daba13ff50aa582", size = 65947, upload-time = "2025-10-16T08:35:30.23Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/05/85/d831a9bc0a9e0e1a304ff3d12c1489a5fbc9bf6690a15dcbdae372bbca45/opentelemetry_api-1.39.0-py3-none-any.whl", hash = "sha256:3c3b3ca5c5687b1b5b37e5c5027ff68eacea8675241b29f13110a8ffbb8f0459", size = 66357, upload-time = "2025-12-03T13:19:33.043Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "opentelemetry-exporter-otlp-proto-common"
|
||||
version = "1.38.0"
|
||||
version = "1.39.0"
|
||||
source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" }
|
||||
dependencies = [
|
||||
{ name = "opentelemetry-proto" },
|
||||
]
|
||||
sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/19/83/dd4660f2956ff88ed071e9e0e36e830df14b8c5dc06722dbde1841accbe8/opentelemetry_exporter_otlp_proto_common-1.38.0.tar.gz", hash = "sha256:e333278afab4695aa8114eeb7bf4e44e65c6607d54968271a249c180b2cb605c", size = 20431, upload-time = "2025-10-16T08:35:53.285Z" }
|
||||
sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/11/cb/3a29ce606b10c76d413d6edd42d25a654af03e73e50696611e757d2602f3/opentelemetry_exporter_otlp_proto_common-1.39.0.tar.gz", hash = "sha256:a135fceed1a6d767f75be65bd2845da344dd8b9258eeed6bc48509d02b184409", size = 20407, upload-time = "2025-12-03T13:19:59.003Z" }
|
||||
wheels = [
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/a7/9e/55a41c9601191e8cd8eb626b54ee6827b9c9d4a46d736f32abc80d8039fc/opentelemetry_exporter_otlp_proto_common-1.38.0-py3-none-any.whl", hash = "sha256:03cb76ab213300fe4f4c62b7d8f17d97fcfd21b89f0b5ce38ea156327ddda74a", size = 18359, upload-time = "2025-10-16T08:35:34.099Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/ef/c6/215edba62d13a3948c718b289539f70e40965bc37fc82ecd55bb0b749c1a/opentelemetry_exporter_otlp_proto_common-1.39.0-py3-none-any.whl", hash = "sha256:3d77be7c4bdf90f1a76666c934368b8abed730b5c6f0547a2ec57feb115849ac", size = 18367, upload-time = "2025-12-03T13:19:36.906Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "opentelemetry-exporter-otlp-proto-http"
|
||||
version = "1.38.0"
|
||||
version = "1.39.0"
|
||||
source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" }
|
||||
dependencies = [
|
||||
{ name = "googleapis-common-protos" },
|
||||
|
|
@ -4079,48 +4084,48 @@ dependencies = [
|
|||
{ name = "requests" },
|
||||
{ name = "typing-extensions" },
|
||||
]
|
||||
sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/81/0a/debcdfb029fbd1ccd1563f7c287b89a6f7bef3b2902ade56797bfd020854/opentelemetry_exporter_otlp_proto_http-1.38.0.tar.gz", hash = "sha256:f16bd44baf15cbe07633c5112ffc68229d0edbeac7b37610be0b2def4e21e90b", size = 17282, upload-time = "2025-10-16T08:35:54.422Z" }
|
||||
sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/81/dc/1e9bf3f6a28e29eba516bc0266e052996d02bc7e92675f3cd38169607609/opentelemetry_exporter_otlp_proto_http-1.39.0.tar.gz", hash = "sha256:28d78fc0eb82d5a71ae552263d5012fa3ebad18dfd189bf8d8095ba0e65ee1ed", size = 17287, upload-time = "2025-12-03T13:20:01.134Z" }
|
||||
wheels = [
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/e5/77/154004c99fb9f291f74aa0822a2f5bbf565a72d8126b3a1b63ed8e5f83c7/opentelemetry_exporter_otlp_proto_http-1.38.0-py3-none-any.whl", hash = "sha256:84b937305edfc563f08ec69b9cb2298be8188371217e867c1854d77198d0825b", size = 19579, upload-time = "2025-10-16T08:35:36.269Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/bc/46/e4a102e17205bb05a50dbf24ef0e92b66b648cd67db9a68865af06a242fd/opentelemetry_exporter_otlp_proto_http-1.39.0-py3-none-any.whl", hash = "sha256:5789cb1375a8b82653328c0ce13a054d285f774099faf9d068032a49de4c7862", size = 19639, upload-time = "2025-12-03T13:19:39.536Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "opentelemetry-proto"
|
||||
version = "1.38.0"
|
||||
version = "1.39.0"
|
||||
source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" }
|
||||
dependencies = [
|
||||
{ name = "protobuf" },
|
||||
]
|
||||
sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/51/14/f0c4f0f6371b9cb7f9fa9ee8918bfd59ac7040c7791f1e6da32a1839780d/opentelemetry_proto-1.38.0.tar.gz", hash = "sha256:88b161e89d9d372ce723da289b7da74c3a8354a8e5359992be813942969ed468", size = 46152, upload-time = "2025-10-16T08:36:01.612Z" }
|
||||
sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/48/b5/64d2f8c3393cd13ea2092106118f7b98461ba09333d40179a31444c6f176/opentelemetry_proto-1.39.0.tar.gz", hash = "sha256:c1fa48678ad1a1624258698e59be73f990b7fc1f39e73e16a9d08eef65dd838c", size = 46153, upload-time = "2025-12-03T13:20:08.729Z" }
|
||||
wheels = [
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/b6/6a/82b68b14efca5150b2632f3692d627afa76b77378c4999f2648979409528/opentelemetry_proto-1.38.0-py3-none-any.whl", hash = "sha256:b6ebe54d3217c42e45462e2a1ae28c3e2bf2ec5a5645236a490f55f45f1a0a18", size = 72535, upload-time = "2025-10-16T08:35:45.749Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/e3/4d/d500e1862beed68318705732d1976c390f4a72ca8009c4983ff627acff20/opentelemetry_proto-1.39.0-py3-none-any.whl", hash = "sha256:1e086552ac79acb501485ff0ce75533f70f3382d43d0a30728eeee594f7bf818", size = 72534, upload-time = "2025-12-03T13:19:50.251Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "opentelemetry-sdk"
|
||||
version = "1.38.0"
|
||||
version = "1.39.0"
|
||||
source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" }
|
||||
dependencies = [
|
||||
{ name = "opentelemetry-api" },
|
||||
{ name = "opentelemetry-semantic-conventions" },
|
||||
{ name = "typing-extensions" },
|
||||
]
|
||||
sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/85/cb/f0eee1445161faf4c9af3ba7b848cc22a50a3d3e2515051ad8628c35ff80/opentelemetry_sdk-1.38.0.tar.gz", hash = "sha256:93df5d4d871ed09cb4272305be4d996236eedb232253e3ab864c8620f051cebe", size = 171942, upload-time = "2025-10-16T08:36:02.257Z" }
|
||||
sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/51/e3/7cd989003e7cde72e0becfe830abff0df55c69d237ee7961a541e0167833/opentelemetry_sdk-1.39.0.tar.gz", hash = "sha256:c22204f12a0529e07aa4d985f1bca9d6b0e7b29fe7f03e923548ae52e0e15dde", size = 171322, upload-time = "2025-12-03T13:20:09.651Z" }
|
||||
wheels = [
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/2f/2e/e93777a95d7d9c40d270a371392b6d6f1ff170c2a3cb32d6176741b5b723/opentelemetry_sdk-1.38.0-py3-none-any.whl", hash = "sha256:1c66af6564ecc1553d72d811a01df063ff097cdc82ce188da9951f93b8d10f6b", size = 132349, upload-time = "2025-10-16T08:35:46.995Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/a4/b4/2adc8bc83eb1055ecb592708efb6f0c520cc2eb68970b02b0f6ecda149cf/opentelemetry_sdk-1.39.0-py3-none-any.whl", hash = "sha256:90cfb07600dfc0d2de26120cebc0c8f27e69bf77cd80ef96645232372709a514", size = 132413, upload-time = "2025-12-03T13:19:51.364Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "opentelemetry-semantic-conventions"
|
||||
version = "0.59b0"
|
||||
version = "0.60b0"
|
||||
source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" }
|
||||
dependencies = [
|
||||
{ name = "opentelemetry-api" },
|
||||
{ name = "typing-extensions" },
|
||||
]
|
||||
sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/40/bc/8b9ad3802cd8ac6583a4eb7de7e5d7db004e89cb7efe7008f9c8a537ee75/opentelemetry_semantic_conventions-0.59b0.tar.gz", hash = "sha256:7a6db3f30d70202d5bf9fa4b69bc866ca6a30437287de6c510fb594878aed6b0", size = 129861, upload-time = "2025-10-16T08:36:03.346Z" }
|
||||
sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/71/0e/176a7844fe4e3cb5de604212094dffaed4e18b32f1c56b5258bcbcba85c2/opentelemetry_semantic_conventions-0.60b0.tar.gz", hash = "sha256:227d7aa73cbb8a2e418029d6b6465553aa01cf7e78ec9d0bc3255c7b3ac5bf8f", size = 137935, upload-time = "2025-12-03T13:20:12.395Z" }
|
||||
wheels = [
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/24/7d/c88d7b15ba8fe5c6b8f93be50fc11795e9fc05386c44afaf6b76fe191f9b/opentelemetry_semantic_conventions-0.59b0-py3-none-any.whl", hash = "sha256:35d3b8833ef97d614136e253c1da9342b4c3c083bbaf29ce31d572a1c3825eed", size = 207954, upload-time = "2025-10-16T08:35:48.054Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/d0/56/af0306666f91bae47db14d620775604688361f0f76a872e0005277311131/opentelemetry_semantic_conventions-0.60b0-py3-none-any.whl", hash = "sha256:069530852691136018087b52688857d97bba61cd641d0f8628d2d92788c4f78a", size = 219981, upload-time = "2025-12-03T13:19:53.585Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
|
@ -5683,7 +5688,7 @@ requires-dist = [
|
|||
{ name = "huggingface-hub", specifier = ">=0.25.0,<0.26.0" },
|
||||
{ name = "imageio-ffmpeg", specifier = ">=0.6.0" },
|
||||
{ name = "infinity-emb", specifier = ">=0.0.66,<0.0.67" },
|
||||
{ name = "infinity-sdk", specifier = "==0.6.8" },
|
||||
{ name = "infinity-sdk", specifier = "==0.6.10" },
|
||||
{ name = "itsdangerous", specifier = "==2.1.2" },
|
||||
{ name = "jira", specifier = "==3.10.5" },
|
||||
{ name = "json-repair", specifier = "==0.35.0" },
|
||||
|
|
@ -6712,11 +6717,11 @@ wheels = [
|
|||
|
||||
[[package]]
|
||||
name = "sqlglot"
|
||||
version = "28.0.0"
|
||||
version = "28.1.0"
|
||||
source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" }
|
||||
sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/52/8d/9ce5904aca760b81adf821c77a1dcf07c98f9caaa7e3b5c991c541ff89d2/sqlglot-28.0.0.tar.gz", hash = "sha256:cc9a651ef4182e61dac58aa955e5fb21845a5865c6a4d7d7b5a7857450285ad4", size = 5520798, upload-time = "2025-11-17T10:34:57.016Z" }
|
||||
sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/7e/49/cda1fc4e610ed5764de2842bb2f362f4aba267b4a7d05a3a217a25b39004/sqlglot-28.1.0.tar.gz", hash = "sha256:a3ef7344359667b51cf95e840aac70a49f847602c61c9fbaeb847f74f7877fe1", size = 5546281, upload-time = "2025-12-02T16:52:28.387Z" }
|
||||
wheels = [
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/56/6d/86de134f40199105d2fee1b066741aa870b3ce75ee74018d9c8508bbb182/sqlglot-28.0.0-py3-none-any.whl", hash = "sha256:ac1778e7fa4812f4f7e5881b260632fc167b00ca4c1226868891fb15467122e4", size = 536127, upload-time = "2025-11-17T10:34:55.192Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/2c/e8/bd016214348f65ba31107c1b81af70fc7662d96758052d5d59b516fd3858/sqlglot-28.1.0-py3-none-any.whl", hash = "sha256:2a895a31666ba947c686caa980624c82bcd0e6fdf59b4fdb9e47108bd092d1ac", size = 547889, upload-time = "2025-12-02T16:52:26.019Z" },
|
||||
]
|
||||
|
||||
[package.optional-dependencies]
|
||||
|
|
@ -6726,40 +6731,40 @@ rs = [
|
|||
|
||||
[[package]]
|
||||
name = "sqlglotrs"
|
||||
version = "0.7.3"
|
||||
version = "0.8.0"
|
||||
source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" }
|
||||
sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/87/5a/46d8efeda45be6ce1c630229455f000cafedea6129b47e6cfab39ff462f5/sqlglotrs-0.7.3.tar.gz", hash = "sha256:caadc572c8a194f99d6ba44d02f9ada0110e3d47cca3330c81f4aa608f1143eb", size = 15888, upload-time = "2025-10-13T06:33:57.322Z" }
|
||||
sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/d5/37/118f24c367fde662e6c1181327dc9c16d08914108904c69bac3a6ba12c52/sqlglotrs-0.8.0.tar.gz", hash = "sha256:2b9a23c580d82be2388ee23496230cfc667f280ed0ed7eaa099d0da8d718cbf2", size = 15706, upload-time = "2025-12-02T16:58:38.197Z" }
|
||||
wheels = [
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/9d/95/f08e01f54e521a286fcd9f7a8bdd178eabcddd9dbc6d6c15dc983c7be8dd/sqlglotrs-0.7.3-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:7acc6dba37af53d9cf1e3217fdd719878dbfaaf2a578ad7b3fbc07ef9dadd035", size = 314621, upload-time = "2025-10-13T06:33:48.917Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/98/7d/01a5db15e413ab587816448f1222286d3a10f0465954d21f5d2915aaeed5/sqlglotrs-0.7.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:3cbfb42071422afbd7376d70b93a969e86fb74752efe98dd66ee6d2ae27a9665", size = 300189, upload-time = "2025-10-13T06:33:40.963Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/08/21/94d1fb647a394afcb09a9174f7bff078452bb956e6898093dd9ee459ef2b/sqlglotrs-0.7.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:07500421de9dea8dfc0cd6769145df754178fc2ae5a3692bdbf5d37aebc0712a", size = 332771, upload-time = "2025-10-13T06:32:45.992Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/29/d1/ccade8e794304c925e9b94e1d7bff4c56896f571a291a03bfd96048c4a0f/sqlglotrs-0.7.3-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:792eb179a742d7d72d1d47c9a50e073078f0133e9191bd07920945dcc9170844", size = 342960, upload-time = "2025-10-13T06:32:55.493Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/1e/2f/2ff3cfe7d91ac3762100e511c4eff0c98824970d7c27e18e88c44a4d4567/sqlglotrs-0.7.3-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f4c3849992e33e47403c2517d464564e4b4cf6a080ad761141504e271ab2c7cd", size = 487268, upload-time = "2025-10-13T06:33:13.784Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/3f/d7/a95fbdd26f20b7bd5781bb5a4c51616fdd59f1c521010f668ffd54e59f5d/sqlglotrs-0.7.3-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:016f51409ed3d87c33ca5a31dd6766e75a809909e418a0ffd2965e0ae7b84a7b", size = 365853, upload-time = "2025-10-13T06:33:23.415Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/53/7a/5d50d0b1167c79a509957d58a6bf9f6450f894e0bc233987cb85ccaec50f/sqlglotrs-0.7.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:94dd711ea2ba76e664dab3e7f7b08cb5517cf5164fd94a552598acfd1f6df59a", size = 343697, upload-time = "2025-10-13T06:33:32.542Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/e5/89/85acbd412a5c7ef39ee5a96f5be28d6d38bce2c4521a264c747361b4c021/sqlglotrs-0.7.3-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:517198977f3baece667513326e42545b00b2878719922c58fcbfa21553f1338d", size = 363446, upload-time = "2025-10-13T06:33:03.995Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/37/4d/0a04f29731b6fda327bd11495c143ce70d1a7446b22440a32d8571408a06/sqlglotrs-0.7.3-cp310-cp310-win32.whl", hash = "sha256:1e9121ef3a64dc7d18e500e5e93df458a9bb6f4111b8f8569d5e4f8db21e61d2", size = 183997, upload-time = "2025-10-13T06:33:58.579Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/0e/16/0e95fa77409da059c951c6be11d4d73311c60bb5ed82f1d40a4afc9a1aa9/sqlglotrs-0.7.3-cp310-cp310-win_amd64.whl", hash = "sha256:48fd7e9efef56331e1ef7402b6d65113c087da1cfe2ef80d143ee62046d49056", size = 195923, upload-time = "2025-10-13T06:34:06.676Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/82/41/fcd87de298b562947cb2592feb9df5794886a8fa24eab8a080a552aa0e4d/sqlglotrs-0.7.3-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:f2144fc8e472de2b165665c1370e7f0ca7f9400f60ca5e78c7aedbb3233bc8d7", size = 314465, upload-time = "2025-10-13T06:33:50.219Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/14/81/22cf241e22f364c414d57893fad9cfea869f8866189e75575a3862f1d329/sqlglotrs-0.7.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:93cb74928b205da3f29f2b9c728d2c6656ad30e1ef500560f6c851bca2129fbc", size = 300129, upload-time = "2025-10-13T06:33:42.205Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/a5/90/4e4220f8605c6fbca77dfad2052cdebf195099c99fd0684723677dcbf091/sqlglotrs-0.7.3-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a918137bacfa31529802063e349a99d5352f74c914beceb14689cd2864c5e4d0", size = 332735, upload-time = "2025-10-13T06:32:48.095Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/3b/35/abe3cb6aa197b5193fcb446ab69465b5927e09e281b2c05f4e12249fd335/sqlglotrs-0.7.3-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:c3fd0edbd957d136c67919ead10c90d832da1aedbbedc6da899d173fe78bf600", size = 342779, upload-time = "2025-10-13T06:32:56.782Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/22/71/670ad31f4dbfe594592a1992c4e7a62003dc47dffb15d96b2fec4137f933/sqlglotrs-0.7.3-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:5a361a1dd8c55fbc57f81db738658141cab723509cc1b3edcc871bccfbba0cfb", size = 487344, upload-time = "2025-10-13T06:33:15.095Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/f4/73/86e46b762b615c7cdec489e4b0670d2a04ea6fab0c0be30a5756e95f108f/sqlglotrs-0.7.3-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:c698af6379475c243a8f190845bf1d1267a2c9867011a4567d5cfdcc5b0eb094", size = 366062, upload-time = "2025-10-13T06:33:25.183Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/dc/07/b4dd7315df7d975c4b82d09106eb73ea2ee8f3734f764889913636e9d68c/sqlglotrs-0.7.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:75d63ed29058c56f153912c90811d8af1706d81f0c759883baeb21acb6322969", size = 343642, upload-time = "2025-10-13T06:33:33.826Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/37/84/2e834fc665236ef6b0fced14d75c8e9eb0db471d96fde539d8c37ce3a10f/sqlglotrs-0.7.3-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:4e19dee6dc46c4d84c556ae456fa0c6400edb157528fd369670b3d041b54ef21", size = 363731, upload-time = "2025-10-13T06:33:05.913Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/ad/db/b7063b1240a1c39bc5627880dbb80c9e3f7b5548a17962d3a6bf98239171/sqlglotrs-0.7.3-cp311-cp311-win32.whl", hash = "sha256:f1276d0f02eaefbdd149b614f6c21fb9be372d7e1137f19c3d5f9e50662367b3", size = 183607, upload-time = "2025-10-13T06:33:59.858Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/09/98/e9cb2b3dd4abb34d2ae71747f113bf12f741a86fa29e661f1f09ba8376d0/sqlglotrs-0.7.3-cp311-cp311-win_amd64.whl", hash = "sha256:ccf05fc6e89523cf5819982fab12b8fe07a9656dbb5356fc4b56b562e734c202", size = 196050, upload-time = "2025-10-13T06:34:07.921Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/23/3f/3b059058e198b2fb6612d0ddaad5431a796d7081d40b21f12273ea1b26dc/sqlglotrs-0.7.3-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:2e7be55bf719b5ebdc344a996b6d27b9a0ba9bae0a09462900805e2f7dc4dca5", size = 310987, upload-time = "2025-10-13T06:33:51.874Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/47/b6/0058b2fe4f4813d9f3280d65ace97a637e8edd152be2a13bb1782c5c2eff/sqlglotrs-0.7.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:6fef415993e1843201a57916f310b49e79669db379ff38094161fa93be2ffdf2", size = 296829, upload-time = "2025-10-13T06:33:43.838Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/f3/a8/35c593b03bf498876aea68ea944a7e7bb9cf648e68984f55795181c928dd/sqlglotrs-0.7.3-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e980354e576e852c53e0bb5444b04ebb6459054074bce8012cc3385dd3d116ed", size = 332313, upload-time = "2025-10-13T06:32:49.343Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/e9/bc/534e21a233846d33d6b55100485bf1844d301b0b75deded5310ef9cd171f/sqlglotrs-0.7.3-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:1444b260c040cc80697956629f3fd3adece0bdb4f83bae22cd618ca3f18c4de8", size = 342309, upload-time = "2025-10-13T06:32:58.031Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/71/63/1d7bd7de87f01adb43cd1710d3fd5b9d5b0b3fea160bbeadc340fe1a9132/sqlglotrs-0.7.3-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:3202c6f00145b8adb4632c1bb5071be5aa362829054653bac058dbcdbc6228e7", size = 484954, upload-time = "2025-10-13T06:33:16.697Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/6f/bd/10126c9f59fb4f8fa51bf3f0ad17895b953bd09e1687986d5d9e110758c8/sqlglotrs-0.7.3-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:17ae27e895f0ed960e28e76028c84758ff00df24e598654df3b5f22de8c7fc30", size = 366874, upload-time = "2025-10-13T06:33:26.888Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/a3/fa/f12a1eb9c22cdce962bafebefea58e898c19bae3d21e9b79d6e811a2951d/sqlglotrs-0.7.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9a36c3d55b913c09dc31572ca7d5b423e85d761f1b3c9d8f86e2a1433a2f20d5", size = 342990, upload-time = "2025-10-13T06:33:35.478Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/86/1d/2bd1c8900d7a081a61a1c424785fd1a1452def751bc212630251423d80ce/sqlglotrs-0.7.3-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:94875611a2a953c06e8152b1d485f4d20ec61b72ebd848f96c04aca66b30f189", size = 362603, upload-time = "2025-10-13T06:33:07.507Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/18/3a/9c176a7f9b93d78168b3d137db4704a703cb51b32edb29d231b615130b47/sqlglotrs-0.7.3-cp312-cp312-win32.whl", hash = "sha256:64a708c87d1bea1f4bdb3edf0a3e94ea1b908ed229502da93621a4a50ef20189", size = 183180, upload-time = "2025-10-13T06:34:01.017Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/df/ea/37757060d3caadb22509d349087e1e16a2dcc4c1649a00d2d6b501f8ff50/sqlglotrs-0.7.3-cp312-cp312-win_amd64.whl", hash = "sha256:fe1ef1bedbb34b87dfb4e99a911026f8895ff2514b222cfd82cd08033406de2e", size = 195746, upload-time = "2025-10-13T06:34:09.478Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/d7/88/7fc59c146118603e06abf69dc19c237ef496a8dd936e5c224fdffc7df120/sqlglotrs-0.8.0-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:3db8f75b8efe5b94ed5540c13b80ef0a3e64c0d15864b05a6bccf5554c6e6008", size = 318097, upload-time = "2025-12-02T16:58:30.763Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/44/9a/7c0103f02b371f49f6ade420519d54c11c7e3ae4dcf22a855b9c71ccb546/sqlglotrs-0.8.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:37d00b69814fdabd4256be955d66e699afa1c50740f03369503d85f90245af35", size = 306820, upload-time = "2025-12-02T16:58:23.714Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/4a/cf/52de2a02a52976dfbd863ec57a3fafaf018a9536114f195404d51717501d/sqlglotrs-0.8.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:631da494550442ec2c7139993f59d854e4d4a44282b568594b5fc50818bc4736", size = 341540, upload-time = "2025-12-02T16:57:33.009Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/b8/89/072a295c3b98322a3d08d85ed47551c1f080309f2cde2d2fa75bd1964621/sqlglotrs-0.8.0-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:6b624e0650067cc006d8a0595e07be3ac91599187ee353313eb9f114ca434e44", size = 350048, upload-time = "2025-12-02T16:57:41.477Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/fd/b2/fbc05eef045124a9e5820812ddd641ec42add5e52f12126a85d942b0f166/sqlglotrs-0.8.0-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:6c0c5ae335b1917aa101d7cfe1aacbedf3b54f489d2038e94c8f42ffe5bd304a", size = 474032, upload-time = "2025-12-02T16:58:00.344Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/b2/a8/1472a5d5f849803fb2ad566ae43db8e5c9f3b1686b104dda245e4acfd963/sqlglotrs-0.8.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:21d145e9fef6e2e53fdf17f9b6ab7e7fbba26064365c56d2103a41e95053d1d4", size = 365233, upload-time = "2025-12-02T16:58:08.102Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/ab/c8/ea700f277cba380c7919136a16e03f9f990f29da34c5404b861fbb8b6fd5/sqlglotrs-0.8.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9ed5d7afd8b6b244c33316cc292122f26c20bf9677907bc5790c1b053097aff4", size = 348452, upload-time = "2025-12-02T16:58:15.863Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/60/f7/ba63c7cabcd71abed855e7a4cecb4b0df297bf17d315ff39eacf94926378/sqlglotrs-0.8.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:185442ad85a125719bf365a238c2b357c079cb5a13392adbbde172b1a0073410", size = 371656, upload-time = "2025-12-02T16:57:51.329Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/5c/dc/1ba05670afe7f4c7e651f972f4738dc4508525bb67b9151cdf463b0ef55b/sqlglotrs-0.8.0-cp310-cp310-win32.whl", hash = "sha256:a7d3f36d9c53090842ae18de6d96bd7634d73584255014983aad998f2b7dc95f", size = 188554, upload-time = "2025-12-02T16:58:39.078Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/33/fc/a393a837a9e09411da87cf8ee2d9f190e3bad37d289cd385e3791356a788/sqlglotrs-0.8.0-cp310-cp310-win_amd64.whl", hash = "sha256:c8a5e3c8870323666e9695be7cc65f710ed437ceea572e69e2b14e63b70f21b2", size = 200973, upload-time = "2025-12-02T16:58:46.02Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/3d/be/a6a8e41e59813663baf02b23534d822b62521d018ee740f132b4547c4239/sqlglotrs-0.8.0-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:0267b0121073669d1184bc0441779559e6b0c6067a12571b63befa2a9b4b0f77", size = 318016, upload-time = "2025-12-02T16:58:32.555Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/08/02/bf65a608b2caf268d81073171196f93beed8d32731ebda1288153dec2b73/sqlglotrs-0.8.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:30c1a2fa22a3ae4b38c7df9abbf14b2473f7e71c859c95bc270bd4a169688380", size = 306527, upload-time = "2025-12-02T16:58:24.853Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/39/98/32de2ad5ea9310e220baabfb6b2ee1e3c7ebb3b83a1db9bd2acdf72de6a5/sqlglotrs-0.8.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e7df3d2117c92004aa20082d71fbbd1735f063f123354d32d0b2b602ab4e1353", size = 341821, upload-time = "2025-12-02T16:57:34.854Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/3b/99/64247cb3b9f99ca09aafa11791fe250326d498b194795af91cc957003852/sqlglotrs-0.8.0-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:ecd7fdfd1be44828a8a8046ee743ffbaf93a972d7a125ff13e4673bb659fcf2c", size = 350003, upload-time = "2025-12-02T16:57:42.659Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/d0/91/bc15e4d2322cc28f4f94e519b2ae927ba42844830efaacf973ff774d8e06/sqlglotrs-0.8.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:171df6454f3dc064b89895c51cfb713163188493b36b845bf7c17df0e5702095", size = 474163, upload-time = "2025-12-02T16:58:01.554Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/93/8e/736451fc39f68f1e394a90d768dd9c8135412669ea3460e47033308cbb2e/sqlglotrs-0.8.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:497472ed07445a693e2699fd6f1b8ed5b8320488ade6a4a8e476664ee93ea51c", size = 365088, upload-time = "2025-12-02T16:58:09.604Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/97/2c/214f352fe03652b08873dcb8f4e6799a02be71446bdf9fea99ce13a502f3/sqlglotrs-0.8.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b2be9add4daed501e28564208b30d4a772dfd6aaa1ad10dadd2d49f4e851f9fa", size = 348368, upload-time = "2025-12-02T16:58:17.363Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/e6/22/c445428a52d053a6f6b31858ac817afb997316e9f0ab2ee3187a10bd85a4/sqlglotrs-0.8.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:871d5ee6414f2d7116b670d0430c16f5b3d5a96480c274f7f3d50d97dbea7601", size = 371720, upload-time = "2025-12-02T16:57:52.71Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/20/b2/301261db4ac543891f897b58a036e87ff33158ea4eda050ee0e08ae0083a/sqlglotrs-0.8.0-cp311-cp311-win32.whl", hash = "sha256:1bbe94effd9d64a8bdca12e0f14b28388059cb5a381561bac07aafedc8c63761", size = 188284, upload-time = "2025-12-02T16:58:40.21Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/c8/a1/0534075d3b8a7c8ab8eff4ea7ba0338a2ef76e3d2e49105b189049430e99/sqlglotrs-0.8.0-cp311-cp311-win_amd64.whl", hash = "sha256:05a5098ec2836799c4c43b06df7c68a2b4c19c0fce042a66706fe3edc957459d", size = 201117, upload-time = "2025-12-02T16:58:47.14Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/fd/20/7beddfd545aaebbfee10a77ac8ef8a205ff597f9ce041c4b0437d0194392/sqlglotrs-0.8.0-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:fcb53f27cf4b9cae8a66c5777b84eeb3d079e96bcb4277b627fd90bfd1a591b5", size = 314699, upload-time = "2025-12-02T16:58:33.82Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/47/6f/6223a1946fe24a979b8af3c7ae2d16c5451d8f35f2468782bd4af2c122da/sqlglotrs-0.8.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:4da1480cc288e02bd459e4638f212fa86a1fef81eb2cd69e6fdbdeb64e3df729", size = 303385, upload-time = "2025-12-02T16:58:26.052Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/a0/98/55050208ef839cad740df6ca86f2f3ca895d469f6ce2040cba32d0b6c4a0/sqlglotrs-0.8.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:dc4a77df178b0ba242aba0e7cd775c3f9aef0fa79dfc31c6e642431ce690f51f", size = 341580, upload-time = "2025-12-02T16:57:36.197Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/eb/f2/6f1d207e629fd4810cc826cf419acc386f3d43d32987684730fbc2399503/sqlglotrs-0.8.0-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:a8647d20cc5a9ff39071786169b3f1acf56f266483fa55386111783bca335f04", size = 348451, upload-time = "2025-12-02T16:57:43.756Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/df/1b/fa8a0907471fe7be3754bac683a21c984b17672eef6958206473f683b63a/sqlglotrs-0.8.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1afdd6a0fa915b3aef7c801cbdc815bb39b3d6aecc4d5b04c4ce54d3f73d0013", size = 475703, upload-time = "2025-12-02T16:58:02.843Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/92/56/f020c9c48d68883f6e24d69d18fe386eafc5963bc3982cc45013ec9b1ba0/sqlglotrs-0.8.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:3b4c1edeb80f572cf3586b9a23d15f18f48ac8dc481eceabdbb85dc7dbf8a2ce", size = 365842, upload-time = "2025-12-02T16:58:10.847Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/61/7b/091464f8aa2232a2f33028f9c9a2cbea7c4e5719400656f203592d46264d/sqlglotrs-0.8.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9b6d819f2753804d55b10e4320df08350cd2739556572a97ed1b1d7fc939f194", size = 348397, upload-time = "2025-12-02T16:58:18.567Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/b7/1b/1b0cf0d41e8412786d1e80695778db799520223acf85c3ddc53c1200731f/sqlglotrs-0.8.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:dcf2cce002969cefb1466f2837c716d20fc9eac62b05043523fda25b3de4c444", size = 369756, upload-time = "2025-12-02T16:57:53.85Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/31/6e/d9e50472aa92736751abf3d6fcad1c793f0701f17a553ae787e4a7581a1d/sqlglotrs-0.8.0-cp312-cp312-win32.whl", hash = "sha256:5459235a25b30eae508bcaea8bc6ebc04610acd87e985ba4d602981a94078384", size = 187891, upload-time = "2025-12-02T16:58:41.57Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/3b/a2/21d09ff2065a7e883f8f68dcea57fb23f6f04ba7a193f2ac2895b5dfafae/sqlglotrs-0.8.0-cp312-cp312-win_amd64.whl", hash = "sha256:1e0de4fa8e6c54419bd63a1205f3218feb5e2649d72f1bc69c5261b6c333e63b", size = 200842, upload-time = "2025-12-02T16:58:48.181Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
|
|
|||
|
|
@ -10,6 +10,10 @@ interface DocPreviewerProps {
|
|||
url: string;
|
||||
}
|
||||
|
||||
// Word document preview component. Behavior:
|
||||
// 1) Fetches the document as a Blob.
|
||||
// 2) Detects .docx input via a ZIP header probe.
|
||||
// 3) Renders .docx using Mammoth; presents a controlled "unsupported" notice for non-ZIP payloads.
|
||||
export const DocPreviewer: React.FC<DocPreviewerProps> = ({
|
||||
className,
|
||||
url,
|
||||
|
|
@ -17,6 +21,33 @@ export const DocPreviewer: React.FC<DocPreviewerProps> = ({
|
|||
const [htmlContent, setHtmlContent] = useState<string>('');
|
||||
const [loading, setLoading] = useState(false);
|
||||
|
||||
// Determines whether the Blob represents a .docx document by checking for the ZIP
|
||||
// file signature ("PK") in the initial bytes. A valid .docx file is a ZIP container
|
||||
// and always begins with:
|
||||
// 50 4B 03 04 ("PK..")
|
||||
//
|
||||
// Legacy .doc files use the CFBF binary format, commonly starting with:
|
||||
// D0 CF 11 E0 A1 B1 1A E1
|
||||
//
|
||||
// Note that some files distributed with a “.doc” extension may internally be .docx
|
||||
// documents (e.g., renamed files or files produced by systems that export .docx
|
||||
// content under a .doc filename). These files will still present the ZIP signature
|
||||
// and are therefore treated as supported .docx payloads. The header inspection
|
||||
// ensures correct routing regardless of filename or reported extension.
|
||||
const isZipLikeBlob = async (blob: Blob): Promise<boolean> => {
|
||||
try {
|
||||
const headerSlice = blob.slice(0, 4);
|
||||
const buf = await headerSlice.arrayBuffer();
|
||||
const bytes = new Uint8Array(buf);
|
||||
|
||||
// ZIP files start with "PK" (0x50, 0x4B)
|
||||
return bytes.length >= 2 && bytes[0] === 0x50 && bytes[1] === 0x4b;
|
||||
} catch (e) {
|
||||
console.error('Failed to inspect blob header', e);
|
||||
return false;
|
||||
}
|
||||
};
|
||||
|
||||
const fetchDocument = async () => {
|
||||
if (!url) return;
|
||||
|
||||
|
|
@ -36,24 +67,21 @@ export const DocPreviewer: React.FC<DocPreviewerProps> = ({
|
|||
const contentType: string =
|
||||
blob.type || (res as any).headers?.['content-type'] || '';
|
||||
|
||||
// ---- Detect legacy .doc via MIME or URL ----
|
||||
const cleanUrl = url.split(/[?#]/)[0].toLowerCase();
|
||||
const isDocMime = /application\/msword/i.test(contentType);
|
||||
const isLegacyDocByUrl =
|
||||
cleanUrl.endsWith('.doc') && !cleanUrl.endsWith('.docx');
|
||||
const isLegacyDoc = isDocMime || isLegacyDocByUrl;
|
||||
// Execution path selection: ZIP-like payloads are treated as .docx and rendered via Mammoth;
|
||||
// non-ZIP payloads receive an explicit unsupported notice.
|
||||
const looksLikeZip = await isZipLikeBlob(blob);
|
||||
|
||||
if (isLegacyDoc) {
|
||||
// Do not call mammoth and do not throw an error; instead, show a note in the preview area
|
||||
if (!looksLikeZip) {
|
||||
// Non-ZIP payload (likely legacy .doc or another format): skip Mammoth processing.
|
||||
setHtmlContent(`
|
||||
<div class="flex h-full items-center justify-center">
|
||||
<div class="border border-dashed border-border-normal rounded-xl p-8 max-w-2xl text-center">
|
||||
<p class="text-2xl font-bold mb-4">
|
||||
Preview not available for .doc files
|
||||
Preview is not available for this Word document
|
||||
</p>
|
||||
<p class="italic text-sm text-muted-foreground leading-relaxed">
|
||||
Mammoth does not support <code>.doc</code> documents.<br/>
|
||||
Inline preview is unavailable.
|
||||
Mammoth supports modern <code>.docx</code> files only.<br/>
|
||||
The file header does not indicate a <code>.docx</code> ZIP archive.
|
||||
</p>
|
||||
</div>
|
||||
</div>
|
||||
|
|
@ -61,7 +89,7 @@ export const DocPreviewer: React.FC<DocPreviewerProps> = ({
|
|||
return;
|
||||
}
|
||||
|
||||
// ---- Standard .docx preview path ----
|
||||
// ZIP-like payload: parse as .docx with Mammoth
|
||||
const arrayBuffer = await blob.arrayBuffer();
|
||||
const result = await mammoth.convertToHtml(
|
||||
{ arrayBuffer },
|
||||
|
|
@ -74,8 +102,7 @@ export const DocPreviewer: React.FC<DocPreviewerProps> = ({
|
|||
|
||||
setHtmlContent(styledContent);
|
||||
} catch (err) {
|
||||
// Only errors from the mammoth conversion path should surface here
|
||||
message.error('Document parsing failed');
|
||||
message.error('Failed to parse document.');
|
||||
console.error('Error parsing document:', err);
|
||||
} finally {
|
||||
setLoading(false);
|
||||
|
|
|
|||
|
|
@ -1,8 +1,9 @@
|
|||
import React, { useEffect, useRef } from 'react';
|
||||
import React, { useEffect, useRef, useState } from 'react';
|
||||
import { useTranslation } from 'react-i18next';
|
||||
import './css/cloud9_night.less';
|
||||
import './css/index.less';
|
||||
import { JsonEditorOptions, JsonEditorProps } from './interface';
|
||||
|
||||
const defaultConfig: JsonEditorOptions = {
|
||||
mode: 'code',
|
||||
modes: ['tree', 'code'],
|
||||
|
|
@ -14,6 +15,7 @@ const defaultConfig: JsonEditorOptions = {
|
|||
enableTransform: false,
|
||||
indentation: 2,
|
||||
};
|
||||
|
||||
const JsonEditor: React.FC<JsonEditorProps> = ({
|
||||
value,
|
||||
onChange,
|
||||
|
|
@ -25,43 +27,62 @@ const JsonEditor: React.FC<JsonEditorProps> = ({
|
|||
const editorRef = useRef<any>(null);
|
||||
const { i18n } = useTranslation();
|
||||
const currentLanguageRef = useRef<string>(i18n.language);
|
||||
const [isLoading, setIsLoading] = useState(true);
|
||||
|
||||
useEffect(() => {
|
||||
if (typeof window !== 'undefined') {
|
||||
const JSONEditor = require('jsoneditor');
|
||||
import('jsoneditor/dist/jsoneditor.min.css');
|
||||
let isMounted = true;
|
||||
|
||||
if (containerRef.current) {
|
||||
// Default configuration options
|
||||
const defaultOptions: JsonEditorOptions = {
|
||||
...defaultConfig,
|
||||
language: i18n.language === 'zh' ? 'zh-CN' : 'en',
|
||||
onChange: () => {
|
||||
if (editorRef.current && onChange) {
|
||||
try {
|
||||
const updatedJson = editorRef.current.get();
|
||||
onChange(updatedJson);
|
||||
} catch (err) {
|
||||
// Do not trigger onChange when parsing error occurs
|
||||
console.error(err);
|
||||
}
|
||||
const initEditor = async () => {
|
||||
if (typeof window !== 'undefined') {
|
||||
try {
|
||||
const JSONEditorModule = await import('jsoneditor');
|
||||
const JSONEditor = JSONEditorModule.default || JSONEditorModule;
|
||||
|
||||
await import('jsoneditor/dist/jsoneditor.min.css');
|
||||
|
||||
if (isMounted && containerRef.current) {
|
||||
// Default configuration options
|
||||
const defaultOptions: JsonEditorOptions = {
|
||||
...defaultConfig,
|
||||
language: i18n.language === 'zh' ? 'zh-CN' : 'en',
|
||||
onChange: () => {
|
||||
if (editorRef.current && onChange) {
|
||||
try {
|
||||
const updatedJson = editorRef.current.get();
|
||||
onChange(updatedJson);
|
||||
} catch (err) {
|
||||
// Do not trigger onChange when parsing error occurs
|
||||
console.error(err);
|
||||
}
|
||||
}
|
||||
},
|
||||
...options, // Merge user provided options with defaults
|
||||
};
|
||||
|
||||
editorRef.current = new JSONEditor(
|
||||
containerRef.current,
|
||||
defaultOptions,
|
||||
);
|
||||
|
||||
if (value) {
|
||||
editorRef.current.set(value);
|
||||
}
|
||||
},
|
||||
...options, // Merge user provided options with defaults
|
||||
};
|
||||
|
||||
editorRef.current = new JSONEditor(
|
||||
containerRef.current,
|
||||
defaultOptions,
|
||||
);
|
||||
|
||||
if (value) {
|
||||
editorRef.current.set(value);
|
||||
setIsLoading(false);
|
||||
}
|
||||
} catch (error) {
|
||||
console.error('Failed to load jsoneditor:', error);
|
||||
if (isMounted) {
|
||||
setIsLoading(false);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
initEditor();
|
||||
|
||||
return () => {
|
||||
isMounted = false;
|
||||
if (editorRef.current) {
|
||||
if (typeof editorRef.current.destroy === 'function') {
|
||||
editorRef.current.destroy();
|
||||
|
|
@ -92,26 +113,38 @@ const JsonEditor: React.FC<JsonEditorProps> = ({
|
|||
}
|
||||
|
||||
// Recreate the editor with new language
|
||||
const JSONEditor = require('jsoneditor');
|
||||
const initEditorWithNewLanguage = async () => {
|
||||
try {
|
||||
const JSONEditorModule = await import('jsoneditor');
|
||||
const JSONEditor = JSONEditorModule.default || JSONEditorModule;
|
||||
|
||||
const newOptions: JsonEditorOptions = {
|
||||
...defaultConfig,
|
||||
language: i18n.language === 'zh' ? 'zh-CN' : 'en',
|
||||
onChange: () => {
|
||||
if (editorRef.current && onChange) {
|
||||
try {
|
||||
const updatedJson = editorRef.current.get();
|
||||
onChange(updatedJson);
|
||||
} catch (err) {
|
||||
// Do not trigger onChange when parsing error occurs
|
||||
}
|
||||
}
|
||||
},
|
||||
...options, // Merge user provided options with defaults
|
||||
const newOptions: JsonEditorOptions = {
|
||||
...defaultConfig,
|
||||
language: i18n.language === 'zh' ? 'zh-CN' : 'en',
|
||||
onChange: () => {
|
||||
if (editorRef.current && onChange) {
|
||||
try {
|
||||
const updatedJson = editorRef.current.get();
|
||||
onChange(updatedJson);
|
||||
} catch (err) {
|
||||
// Do not trigger onChange when parsing error occurs
|
||||
}
|
||||
}
|
||||
},
|
||||
...options, // Merge user provided options with defaults
|
||||
};
|
||||
|
||||
editorRef.current = new JSONEditor(containerRef.current, newOptions);
|
||||
editorRef.current.set(currentData);
|
||||
} catch (error) {
|
||||
console.error(
|
||||
'Failed to reload jsoneditor with new language:',
|
||||
error,
|
||||
);
|
||||
}
|
||||
};
|
||||
|
||||
editorRef.current = new JSONEditor(containerRef.current, newOptions);
|
||||
editorRef.current.set(currentData);
|
||||
initEditorWithNewLanguage();
|
||||
}
|
||||
}, [i18n.language, value, onChange, options]);
|
||||
|
||||
|
|
@ -135,7 +168,13 @@ const JsonEditor: React.FC<JsonEditorProps> = ({
|
|||
ref={containerRef}
|
||||
style={{ height }}
|
||||
className={`ace-tomorrow-night w-full border border-border-button rounded-lg overflow-hidden bg-bg-input ${className} `}
|
||||
/>
|
||||
>
|
||||
{isLoading && (
|
||||
<div className="flex items-center justify-center h-full">
|
||||
<div className="text-text-secondary">Loading editor...</div>
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
);
|
||||
};
|
||||
|
||||
|
|
|
|||
|
|
@ -217,20 +217,23 @@ const MarkdownContent = ({
|
|||
const docType = chunkItem?.doc_type;
|
||||
|
||||
return showImage(docType) ? (
|
||||
<Image
|
||||
id={imageId}
|
||||
className={styles.referenceInnerChunkImage}
|
||||
onClick={
|
||||
documentId
|
||||
? handleDocumentButtonClick(
|
||||
documentId,
|
||||
chunkItem,
|
||||
fileExtension === 'pdf',
|
||||
documentUrl,
|
||||
)
|
||||
: () => {}
|
||||
}
|
||||
></Image>
|
||||
<section>
|
||||
<Image
|
||||
id={imageId}
|
||||
className={styles.referenceInnerChunkImage}
|
||||
onClick={
|
||||
documentId
|
||||
? handleDocumentButtonClick(
|
||||
documentId,
|
||||
chunkItem,
|
||||
fileExtension === 'pdf',
|
||||
documentUrl,
|
||||
)
|
||||
: () => {}
|
||||
}
|
||||
></Image>
|
||||
<span className="text-accent-primary"> {imageId}</span>
|
||||
</section>
|
||||
) : (
|
||||
<HoverCard key={i}>
|
||||
<HoverCardTrigger>
|
||||
|
|
|
|||
|
|
@ -220,20 +220,23 @@ function MarkdownContent({
|
|||
const docType = chunkItem?.doc_type;
|
||||
|
||||
return showImage(docType) ? (
|
||||
<Image
|
||||
id={imageId}
|
||||
className={styles.referenceInnerChunkImage}
|
||||
onClick={
|
||||
documentId
|
||||
? handleDocumentButtonClick(
|
||||
documentId,
|
||||
chunkItem,
|
||||
fileExtension === 'pdf',
|
||||
documentUrl,
|
||||
)
|
||||
: () => {}
|
||||
}
|
||||
></Image>
|
||||
<section>
|
||||
<Image
|
||||
id={imageId}
|
||||
className={styles.referenceInnerChunkImage}
|
||||
onClick={
|
||||
documentId
|
||||
? handleDocumentButtonClick(
|
||||
documentId,
|
||||
chunkItem,
|
||||
fileExtension === 'pdf',
|
||||
documentUrl,
|
||||
)
|
||||
: () => {}
|
||||
}
|
||||
></Image>
|
||||
<span className="text-accent-primary">{imageId}</span>
|
||||
</section>
|
||||
) : (
|
||||
<HoverCard key={i}>
|
||||
<HoverCardTrigger>
|
||||
|
|
|
|||
6
web/src/custom.d.ts
vendored
6
web/src/custom.d.ts
vendored
|
|
@ -2,3 +2,9 @@ declare module '*.md' {
|
|||
const content: string;
|
||||
export default content;
|
||||
}
|
||||
|
||||
declare module 'jsoneditor' {
|
||||
const JSONEditor: any;
|
||||
export default JSONEditor;
|
||||
export = JSONEditor;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -40,6 +40,7 @@ import { useDropdownManager } from './context';
|
|||
|
||||
import { AgentBackground } from '@/components/canvas/background';
|
||||
import Spotlight from '@/components/spotlight';
|
||||
import { useNodeLoading } from '../hooks/use-node-loading';
|
||||
import {
|
||||
useHideFormSheetOnNodeDeletion,
|
||||
useShowDrawer,
|
||||
|
|
@ -166,6 +167,8 @@ function AgentCanvas({ drawerVisible, hideDrawer }: IProps) {
|
|||
});
|
||||
const [lastSendLoading, setLastSendLoading] = useState(false);
|
||||
|
||||
const [currentSendLoading, setCurrentSendLoading] = useState(false);
|
||||
|
||||
const { handleBeforeDelete } = useBeforeDelete();
|
||||
|
||||
const { addCanvasNode, addNoteNode } = useAddNode(reactFlowInstance);
|
||||
|
|
@ -182,6 +185,7 @@ function AgentCanvas({ drawerVisible, hideDrawer }: IProps) {
|
|||
}, [chatVisible, clearEventList, currentTaskId, stopMessage]);
|
||||
|
||||
const setLastSendLoadingFunc = (loading: boolean, messageId: string) => {
|
||||
setCurrentSendLoading(!!loading);
|
||||
if (messageId === currentMessageId) {
|
||||
setLastSendLoading(loading);
|
||||
} else {
|
||||
|
|
@ -249,7 +253,10 @@ function AgentCanvas({ drawerVisible, hideDrawer }: IProps) {
|
|||
clearActiveDropdown,
|
||||
removePlaceholderNode,
|
||||
]);
|
||||
|
||||
const { lastNode, setDerivedMessages, startButNotFinishedNodeIds } =
|
||||
useNodeLoading({
|
||||
currentEventListWithoutMessageById,
|
||||
});
|
||||
return (
|
||||
<div className={cn(styles.canvasWrapper, 'px-5 pb-5')}>
|
||||
<svg
|
||||
|
|
@ -285,7 +292,15 @@ function AgentCanvas({ drawerVisible, hideDrawer }: IProps) {
|
|||
</marker>
|
||||
</defs>
|
||||
</svg>
|
||||
<AgentInstanceContext.Provider value={{ addCanvasNode, showFormDrawer }}>
|
||||
<AgentInstanceContext.Provider
|
||||
value={{
|
||||
addCanvasNode,
|
||||
showFormDrawer,
|
||||
lastNode,
|
||||
currentSendLoading,
|
||||
startButNotFinishedNodeIds,
|
||||
}}
|
||||
>
|
||||
<ReactFlow
|
||||
connectionMode={ConnectionMode.Loose}
|
||||
nodes={nodes}
|
||||
|
|
@ -380,9 +395,10 @@ function AgentCanvas({ drawerVisible, hideDrawer }: IProps) {
|
|||
></FormSheet>
|
||||
</AgentInstanceContext.Provider>
|
||||
)}
|
||||
|
||||
{chatVisible && (
|
||||
<AgentChatContext.Provider
|
||||
value={{ showLogSheet, setLastSendLoadingFunc }}
|
||||
value={{ showLogSheet, setLastSendLoadingFunc, setDerivedMessages }}
|
||||
>
|
||||
<AgentChatLogContext.Provider
|
||||
value={{ addEventList, setCurrentMessageId }}
|
||||
|
|
|
|||
|
|
@ -44,7 +44,7 @@ function InnerAgentNode({
|
|||
|
||||
return (
|
||||
<ToolBar selected={selected} id={id} label={data.label}>
|
||||
<NodeWrapper selected={selected}>
|
||||
<NodeWrapper selected={selected} id={id}>
|
||||
{isHeadAgent && (
|
||||
<>
|
||||
<LeftEndHandle></LeftEndHandle>
|
||||
|
|
|
|||
|
|
@ -24,7 +24,7 @@ function InnerBeginNode({ data, id, selected }: NodeProps<IBeginNode>) {
|
|||
const inputs: Record<string, BeginQuery> = get(data, 'form.inputs', {});
|
||||
|
||||
return (
|
||||
<NodeWrapper selected={selected}>
|
||||
<NodeWrapper selected={selected} id={id}>
|
||||
<CommonHandle
|
||||
type="source"
|
||||
position={Position.Right}
|
||||
|
|
|
|||
|
|
@ -18,7 +18,7 @@ export function InnerCategorizeNode({
|
|||
const { positions } = useBuildCategorizeHandlePositions({ data, id });
|
||||
return (
|
||||
<ToolBar selected={selected} id={id} label={data.label}>
|
||||
<NodeWrapper selected={selected}>
|
||||
<NodeWrapper selected={selected} id={id}>
|
||||
<LeftEndHandle></LeftEndHandle>
|
||||
|
||||
<NodeHeader id={id} name={data.name} label={data.label}></NodeHeader>
|
||||
|
|
|
|||
|
|
@ -14,7 +14,7 @@ export function ExitLoopNode({ id, data, selected }: NodeProps<BaseNode<any>>) {
|
|||
showRun={false}
|
||||
showCopy={false}
|
||||
>
|
||||
<NodeWrapper selected={selected}>
|
||||
<NodeWrapper selected={selected} id={id}>
|
||||
<LeftEndHandle></LeftEndHandle>
|
||||
<NodeHeader id={id} name={data.name} label={data.label}></NodeHeader>
|
||||
</NodeWrapper>
|
||||
|
|
|
|||
|
|
@ -23,7 +23,7 @@ function InnerFileNode({ data, id, selected }: NodeProps<IBeginNode>) {
|
|||
const inputs: Record<string, BeginQuery> = get(data, 'form.inputs', {});
|
||||
|
||||
return (
|
||||
<NodeWrapper selected={selected}>
|
||||
<NodeWrapper selected={selected} id={id}>
|
||||
<CommonHandle
|
||||
type="source"
|
||||
position={Position.Right}
|
||||
|
|
|
|||
|
|
@ -26,7 +26,7 @@ function InnerRagNode({
|
|||
showRun={needsSingleStepDebugging(data.label)}
|
||||
showCopy={showCopyIcon(data.label)}
|
||||
>
|
||||
<NodeWrapper selected={selected}>
|
||||
<NodeWrapper selected={selected} id={id}>
|
||||
<LeftEndHandle></LeftEndHandle>
|
||||
<CommonHandle
|
||||
type="source"
|
||||
|
|
|
|||
|
|
@ -16,7 +16,7 @@ function InnerMessageNode({ id, data, selected }: NodeProps<IMessageNode>) {
|
|||
const messages: string[] = get(data, 'form.content', []);
|
||||
return (
|
||||
<ToolBar selected={selected} id={id} label={data.label}>
|
||||
<NodeWrapper selected={selected}>
|
||||
<NodeWrapper selected={selected} id={id}>
|
||||
<LeftEndHandle></LeftEndHandle>
|
||||
<NodeHeader
|
||||
id={id}
|
||||
|
|
|
|||
|
|
@ -1,9 +1,13 @@
|
|||
import { cn } from '@/lib/utils';
|
||||
import { HTMLAttributes } from 'react';
|
||||
import { Loader } from 'lucide-react';
|
||||
import { HTMLAttributes, useContext } from 'react';
|
||||
import { AgentInstanceContext } from '../../context';
|
||||
|
||||
type IProps = HTMLAttributes<HTMLDivElement> & { selected?: boolean };
|
||||
|
||||
export function NodeWrapper({ children, className, selected }: IProps) {
|
||||
export function NodeWrapper({ children, className, selected, id }: IProps) {
|
||||
const { currentSendLoading, startButNotFinishedNodeIds = [] } =
|
||||
useContext(AgentInstanceContext);
|
||||
return (
|
||||
<section
|
||||
className={cn(
|
||||
|
|
@ -12,6 +16,13 @@ export function NodeWrapper({ children, className, selected }: IProps) {
|
|||
className,
|
||||
)}
|
||||
>
|
||||
{id &&
|
||||
startButNotFinishedNodeIds.indexOf(id as string) > -1 &&
|
||||
currentSendLoading && (
|
||||
<div className=" absolute right-0 left-0 top-0 flex items-start justify-end p-2">
|
||||
<Loader size={12} className=" animate-spin" />
|
||||
</div>
|
||||
)}
|
||||
{children}
|
||||
</section>
|
||||
);
|
||||
|
|
|
|||
|
|
@ -19,7 +19,7 @@ function ParserNode({
|
|||
}: NodeProps<BaseNode<ParserFormSchemaType>>) {
|
||||
const { t } = useTranslation();
|
||||
return (
|
||||
<NodeWrapper selected={selected}>
|
||||
<NodeWrapper selected={selected} id={id}>
|
||||
<CommonHandle
|
||||
id={NodeHandleId.End}
|
||||
type="target"
|
||||
|
|
|
|||
|
|
@ -27,7 +27,7 @@ function InnerRetrievalNode({
|
|||
|
||||
return (
|
||||
<ToolBar selected={selected} id={id} label={data.label}>
|
||||
<NodeWrapper selected={selected}>
|
||||
<NodeWrapper selected={selected} id={id}>
|
||||
<LeftEndHandle></LeftEndHandle>
|
||||
<CommonHandle
|
||||
id={NodeHandleId.Start}
|
||||
|
|
|
|||
|
|
@ -25,7 +25,7 @@ function InnerSplitterNode({
|
|||
showCopy={false}
|
||||
showRun={false}
|
||||
>
|
||||
<NodeWrapper selected={selected}>
|
||||
<NodeWrapper selected={selected} id={id}>
|
||||
<CommonHandle
|
||||
id={NodeHandleId.End}
|
||||
type="target"
|
||||
|
|
|
|||
|
|
@ -65,7 +65,7 @@ function InnerSwitchNode({ id, data, selected }: NodeProps<ISwitchNode>) {
|
|||
const { positions } = useBuildSwitchHandlePositions({ data, id });
|
||||
return (
|
||||
<ToolBar selected={selected} id={id} label={data.label} showRun={false}>
|
||||
<NodeWrapper selected={selected}>
|
||||
<NodeWrapper selected={selected} id={id}>
|
||||
<LeftEndHandle></LeftEndHandle>
|
||||
<NodeHeader id={id} name={data.name} label={data.label}></NodeHeader>
|
||||
<section className="gap-2.5 flex flex-col">
|
||||
|
|
|
|||
|
|
@ -27,7 +27,7 @@ function TokenizerNode({
|
|||
showRun={false}
|
||||
showCopy={false}
|
||||
>
|
||||
<NodeWrapper selected={selected}>
|
||||
<NodeWrapper selected={selected} id={id}>
|
||||
<CommonHandle
|
||||
id={NodeHandleId.End}
|
||||
type="target"
|
||||
|
|
|
|||
|
|
@ -44,7 +44,7 @@ function InnerToolNode({
|
|||
);
|
||||
|
||||
return (
|
||||
<NodeWrapper selected={selected}>
|
||||
<NodeWrapper selected={selected} id={id}>
|
||||
<Handle
|
||||
id={NodeHandleId.End}
|
||||
type="target"
|
||||
|
|
|
|||
|
|
@ -13,8 +13,9 @@ import {
|
|||
} from '@/hooks/use-agent-request';
|
||||
import { useFetchUserInfo } from '@/hooks/use-user-setting-request';
|
||||
import { buildMessageUuidWithRole } from '@/utils/chat';
|
||||
import { memo, useCallback } from 'react';
|
||||
import { memo, useCallback, useContext } from 'react';
|
||||
import { useParams } from 'umi';
|
||||
import { AgentChatContext } from '../context';
|
||||
import DebugContent from '../debug-content';
|
||||
import { useAwaitCompentData } from '../hooks/use-chat-logic';
|
||||
import { useIsTaskMode } from '../hooks/use-get-begin-query';
|
||||
|
|
@ -49,6 +50,9 @@ function AgentChatBox() {
|
|||
canvasId: canvasId as string,
|
||||
});
|
||||
|
||||
const { setDerivedMessages } = useContext(AgentChatContext);
|
||||
setDerivedMessages?.(derivedMessages);
|
||||
|
||||
const isTaskMode = useIsTaskMode();
|
||||
|
||||
const handleUploadFile: NonNullable<FileUploadProps['onUpload']> =
|
||||
|
|
|
|||
|
|
@ -1,6 +1,8 @@
|
|||
import { INodeEvent } from '@/hooks/use-send-message';
|
||||
import { IMessage } from '@/interfaces/database/chat';
|
||||
import { RAGFlowNodeType } from '@/interfaces/database/flow';
|
||||
import { HandleType, Position } from '@xyflow/react';
|
||||
import { createContext } from 'react';
|
||||
import { Dispatch, SetStateAction, createContext } from 'react';
|
||||
import { useAddNode } from './hooks/use-add-node';
|
||||
import { useCacheChatLog } from './hooks/use-cache-chat-log';
|
||||
import { useShowFormDrawer, useShowLogSheet } from './hooks/use-show-drawer';
|
||||
|
|
@ -13,7 +15,11 @@ type AgentInstanceContextType = Pick<
|
|||
ReturnType<typeof useAddNode>,
|
||||
'addCanvasNode'
|
||||
> &
|
||||
Pick<ReturnType<typeof useShowFormDrawer>, 'showFormDrawer'>;
|
||||
Pick<ReturnType<typeof useShowFormDrawer>, 'showFormDrawer'> & {
|
||||
lastNode: INodeEvent | null;
|
||||
currentSendLoading: boolean;
|
||||
startButNotFinishedNodeIds: string[];
|
||||
};
|
||||
|
||||
export const AgentInstanceContext = createContext<AgentInstanceContextType>(
|
||||
{} as AgentInstanceContextType,
|
||||
|
|
@ -22,7 +28,10 @@ export const AgentInstanceContext = createContext<AgentInstanceContextType>(
|
|||
type AgentChatContextType = Pick<
|
||||
ReturnType<typeof useShowLogSheet>,
|
||||
'showLogSheet'
|
||||
> & { setLastSendLoadingFunc: (loading: boolean, messageId: string) => void };
|
||||
> & {
|
||||
setLastSendLoadingFunc: (loading: boolean, messageId: string) => void;
|
||||
setDerivedMessages: Dispatch<SetStateAction<IMessage[] | undefined>>;
|
||||
};
|
||||
|
||||
export const AgentChatContext = createContext<AgentChatContextType>(
|
||||
{} as AgentChatContextType,
|
||||
|
|
|
|||
|
|
@ -55,7 +55,7 @@ const FormSheet = ({
|
|||
<Sheet open={visible} modal={false}>
|
||||
<SheetContent
|
||||
className={cn('top-20 p-0 flex flex-col pb-20', {
|
||||
'right-[620px]': chatVisible,
|
||||
'right-[clamp(0px,34%,620px)]': chatVisible,
|
||||
})}
|
||||
closeIcon={false}
|
||||
>
|
||||
|
|
|
|||
88
web/src/pages/agent/hooks/use-node-loading.ts
Normal file
88
web/src/pages/agent/hooks/use-node-loading.ts
Normal file
|
|
@ -0,0 +1,88 @@
|
|||
import {
|
||||
INodeData,
|
||||
INodeEvent,
|
||||
MessageEventType,
|
||||
} from '@/hooks/use-send-message';
|
||||
import { IMessage } from '@/interfaces/database/chat';
|
||||
import { useCallback, useMemo, useState } from 'react';
|
||||
|
||||
export const useNodeLoading = ({
|
||||
currentEventListWithoutMessageById,
|
||||
}: {
|
||||
currentEventListWithoutMessageById: (messageId: string) => INodeEvent[];
|
||||
}) => {
|
||||
const [derivedMessages, setDerivedMessages] = useState<IMessage[]>();
|
||||
|
||||
const lastMessageId = useMemo(() => {
|
||||
return derivedMessages?.[derivedMessages?.length - 1]?.id;
|
||||
}, [derivedMessages]);
|
||||
|
||||
const currentEventListWithoutMessage = useMemo(() => {
|
||||
if (!lastMessageId) {
|
||||
return [];
|
||||
}
|
||||
return currentEventListWithoutMessageById(lastMessageId);
|
||||
}, [currentEventListWithoutMessageById, lastMessageId]);
|
||||
|
||||
const startedNodeList = useMemo(() => {
|
||||
const duplicateList = currentEventListWithoutMessage?.filter(
|
||||
(x) => x.event === MessageEventType.NodeStarted,
|
||||
) as INodeEvent[];
|
||||
|
||||
// Remove duplicate nodes
|
||||
return duplicateList?.reduce<Array<INodeEvent>>((pre, cur) => {
|
||||
if (pre.every((x) => x.data.component_id !== cur.data.component_id)) {
|
||||
pre.push(cur);
|
||||
}
|
||||
return pre;
|
||||
}, []);
|
||||
}, [currentEventListWithoutMessage]);
|
||||
|
||||
const filterFinishedNodeList = useCallback(() => {
|
||||
const nodeEventList = currentEventListWithoutMessage
|
||||
.filter(
|
||||
(x) => x.event === MessageEventType.NodeFinished,
|
||||
// x.event === MessageEventType.NodeFinished &&
|
||||
// (x.data as INodeData)?.component_id === componentId,
|
||||
)
|
||||
.map((x) => x.data);
|
||||
|
||||
return nodeEventList;
|
||||
}, [currentEventListWithoutMessage]);
|
||||
|
||||
const lastNode = useMemo(() => {
|
||||
if (!startedNodeList) {
|
||||
return null;
|
||||
}
|
||||
return startedNodeList[startedNodeList.length - 1];
|
||||
}, [startedNodeList]);
|
||||
|
||||
const startNodeIds = useMemo(() => {
|
||||
if (!startedNodeList) {
|
||||
return [];
|
||||
}
|
||||
return startedNodeList.map((x) => x.data.component_id);
|
||||
}, [startedNodeList]);
|
||||
|
||||
const finishNodeIds = useMemo(() => {
|
||||
if (!lastNode) {
|
||||
return [];
|
||||
}
|
||||
const nodeDataList = filterFinishedNodeList();
|
||||
const finishNodeIdsTemp = nodeDataList.map(
|
||||
(x: INodeData) => x.component_id,
|
||||
);
|
||||
return Array.from(new Set(finishNodeIdsTemp));
|
||||
}, [lastNode, filterFinishedNodeList]);
|
||||
|
||||
const startButNotFinishedNodeIds = useMemo(() => {
|
||||
return startNodeIds.filter((x) => !finishNodeIds.includes(x));
|
||||
}, [finishNodeIds, startNodeIds]);
|
||||
|
||||
return {
|
||||
lastNode,
|
||||
startButNotFinishedNodeIds,
|
||||
filterFinishedNodeList,
|
||||
setDerivedMessages,
|
||||
};
|
||||
};
|
||||
|
|
@ -26,7 +26,7 @@ export function LogSheet({
|
|||
return (
|
||||
<Sheet open onOpenChange={hideModal} modal={false}>
|
||||
<SheetContent
|
||||
className={cn('top-20 right-[620px]')}
|
||||
className={cn('top-20 right-[clamp(0px,34%,620px)]')}
|
||||
onInteractOutside={(e) => e.preventDefault()}
|
||||
>
|
||||
<SheetHeader>
|
||||
|
|
|
|||
|
|
@ -0,0 +1,191 @@
|
|||
import { useEffect, useMemo } from 'react';
|
||||
import { ControllerRenderProps, useFormContext } from 'react-hook-form';
|
||||
|
||||
import { Checkbox } from '@/components/ui/checkbox';
|
||||
import { Input } from '@/components/ui/input';
|
||||
import { cn } from '@/lib/utils';
|
||||
|
||||
/* ---------------- Token Field ---------------- */
|
||||
|
||||
export type ConfluenceTokenFieldProps = ControllerRenderProps & {
|
||||
fieldType: 'username' | 'token';
|
||||
placeholder?: string;
|
||||
disabled?: boolean;
|
||||
};
|
||||
|
||||
const ConfluenceTokenField = ({
|
||||
fieldType,
|
||||
value,
|
||||
onChange,
|
||||
placeholder,
|
||||
disabled,
|
||||
...rest
|
||||
}: ConfluenceTokenFieldProps) => {
|
||||
return (
|
||||
<div className="flex w-full flex-col gap-2">
|
||||
<Input
|
||||
className="w-full"
|
||||
type={fieldType === 'token' ? 'password' : 'text'}
|
||||
value={value ?? ''}
|
||||
onChange={(e) => onChange(e.target.value)}
|
||||
placeholder={
|
||||
placeholder ||
|
||||
(fieldType === 'token'
|
||||
? 'Enter your Confluence access token'
|
||||
: 'Confluence username or email')
|
||||
}
|
||||
disabled={disabled}
|
||||
{...rest}
|
||||
/>
|
||||
</div>
|
||||
);
|
||||
};
|
||||
|
||||
/* ---------------- Indexing Mode Field ---------------- */
|
||||
|
||||
type ConfluenceIndexingMode = 'everything' | 'space' | 'page';
|
||||
|
||||
export type ConfluenceIndexingModeFieldProps = ControllerRenderProps;
|
||||
|
||||
export const ConfluenceIndexingModeField = (
|
||||
fieldProps: ConfluenceIndexingModeFieldProps,
|
||||
) => {
|
||||
const { value, onChange, disabled } = fieldProps;
|
||||
const { watch, setValue } = useFormContext();
|
||||
|
||||
const mode = useMemo<ConfluenceIndexingMode>(
|
||||
() => (value as ConfluenceIndexingMode) || 'everything',
|
||||
[value],
|
||||
);
|
||||
|
||||
const spaceValue = watch('config.space');
|
||||
const pageIdValue = watch('config.page_id');
|
||||
const indexRecursively = watch('config.index_recursively');
|
||||
|
||||
useEffect(() => {
|
||||
if (!value) onChange('everything');
|
||||
}, [value, onChange]);
|
||||
|
||||
const handleModeChange = (nextMode?: string) => {
|
||||
const normalized = (nextMode || 'everything') as ConfluenceIndexingMode;
|
||||
onChange(normalized);
|
||||
|
||||
if (normalized === 'everything') {
|
||||
setValue('config.space', '', { shouldDirty: true, shouldTouch: true });
|
||||
setValue('config.page_id', '', { shouldDirty: true, shouldTouch: true });
|
||||
setValue('config.index_recursively', false, {
|
||||
shouldDirty: true,
|
||||
shouldTouch: true,
|
||||
});
|
||||
} else if (normalized === 'space') {
|
||||
setValue('config.page_id', '', { shouldDirty: true, shouldTouch: true });
|
||||
setValue('config.index_recursively', false, {
|
||||
shouldDirty: true,
|
||||
shouldTouch: true,
|
||||
});
|
||||
} else if (normalized === 'page') {
|
||||
setValue('config.space', '', { shouldDirty: true, shouldTouch: true });
|
||||
}
|
||||
};
|
||||
|
||||
return (
|
||||
<div className="w-full rounded-lg border border-border-button bg-bg-card p-4 space-y-4">
|
||||
<div className="flex items-center gap-2 text-sm font-medium text-text-secondary">
|
||||
{INDEX_MODE_OPTIONS.map((option) => {
|
||||
const isActive = option.value === mode;
|
||||
return (
|
||||
<button
|
||||
key={option.value}
|
||||
type="button"
|
||||
disabled={disabled}
|
||||
onClick={() => handleModeChange(option.value)}
|
||||
className={cn(
|
||||
'flex-1 rounded-lg border px-3 py-2 transition-all',
|
||||
'border-transparent bg-transparent text-text-secondary hover:border-border-button hover:bg-bg-card-secondary',
|
||||
isActive &&
|
||||
'border-border-button bg-background text-primary shadow-sm',
|
||||
)}
|
||||
>
|
||||
{option.label}
|
||||
</button>
|
||||
);
|
||||
})}
|
||||
</div>
|
||||
|
||||
{mode === 'everything' && (
|
||||
<p className="text-sm text-text-secondary">
|
||||
This connector will index all pages the provided credentials have
|
||||
access to.
|
||||
</p>
|
||||
)}
|
||||
|
||||
{mode === 'space' && (
|
||||
<div className="space-y-2">
|
||||
<div className="text-sm font-semibold text-text-primary">
|
||||
Space Key
|
||||
</div>
|
||||
<Input
|
||||
className="w-full"
|
||||
value={spaceValue ?? ''}
|
||||
onChange={(e) =>
|
||||
setValue('config.space', e.target.value, {
|
||||
shouldDirty: true,
|
||||
shouldTouch: true,
|
||||
})
|
||||
}
|
||||
placeholder="e.g. KB"
|
||||
disabled={disabled}
|
||||
/>
|
||||
<p className="text-xs text-text-secondary">
|
||||
The Confluence space key to index.
|
||||
</p>
|
||||
</div>
|
||||
)}
|
||||
|
||||
{mode === 'page' && (
|
||||
<div className="space-y-2">
|
||||
<div className="text-sm font-semibold text-text-primary">Page ID</div>
|
||||
<Input
|
||||
className="w-full"
|
||||
value={pageIdValue ?? ''}
|
||||
onChange={(e) =>
|
||||
setValue('config.page_id', e.target.value, {
|
||||
shouldDirty: true,
|
||||
shouldTouch: true,
|
||||
})
|
||||
}
|
||||
placeholder="e.g. 123456"
|
||||
disabled={disabled}
|
||||
/>
|
||||
<p className="text-xs text-text-secondary">
|
||||
The Confluence page ID to index.
|
||||
</p>
|
||||
|
||||
<div className="flex items-center gap-2 pt-2">
|
||||
<Checkbox
|
||||
checked={Boolean(indexRecursively)}
|
||||
onCheckedChange={(checked) =>
|
||||
setValue('config.index_recursively', Boolean(checked), {
|
||||
shouldDirty: true,
|
||||
shouldTouch: true,
|
||||
})
|
||||
}
|
||||
disabled={disabled}
|
||||
/>
|
||||
<span className="text-sm text-text-secondary">
|
||||
Index child pages recursively
|
||||
</span>
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
);
|
||||
};
|
||||
|
||||
const INDEX_MODE_OPTIONS = [
|
||||
{ label: 'Everything', value: 'everything' },
|
||||
{ label: 'Space', value: 'space' },
|
||||
{ label: 'Page', value: 'page' },
|
||||
];
|
||||
|
||||
export default ConfluenceTokenField;
|
||||
|
|
@ -1,9 +1,9 @@
|
|||
import { FormFieldType } from '@/components/dynamic-form';
|
||||
import SvgIcon from '@/components/svg-icon';
|
||||
import { t } from 'i18next';
|
||||
import { ConfluenceIndexingModeField } from './component/confluence-token-field';
|
||||
import GmailTokenField from './component/gmail-token-field';
|
||||
import GoogleDriveTokenField from './component/google-drive-token-field';
|
||||
|
||||
export enum DataSourceKey {
|
||||
CONFLUENCE = 'confluence',
|
||||
S3 = 's3',
|
||||
|
|
@ -230,12 +230,35 @@ export const DataSourceFormFields = {
|
|||
required: false,
|
||||
tooltip: t('setting.confluenceIsCloudTip'),
|
||||
},
|
||||
{
|
||||
label: 'Index Method',
|
||||
name: 'config.index_mode',
|
||||
type: FormFieldType.Text, // keep as text so RHF registers it
|
||||
required: false,
|
||||
horizontal: true,
|
||||
labelClassName: 'self-start pt-4',
|
||||
render: (fieldProps) => <ConfluenceIndexingModeField {...fieldProps} />,
|
||||
},
|
||||
{
|
||||
label: 'Space Key',
|
||||
name: 'config.space',
|
||||
type: FormFieldType.Text,
|
||||
required: false,
|
||||
tooltip: t('setting.confluenceSpaceKeyTip'),
|
||||
hidden: true,
|
||||
},
|
||||
{
|
||||
label: 'Page ID',
|
||||
name: 'config.page_id',
|
||||
type: FormFieldType.Text,
|
||||
required: false,
|
||||
hidden: true,
|
||||
},
|
||||
{
|
||||
label: 'Index Recursively',
|
||||
name: 'config.index_recursively',
|
||||
type: FormFieldType.Checkbox,
|
||||
required: false,
|
||||
hidden: true,
|
||||
},
|
||||
],
|
||||
[DataSourceKey.GOOGLE_DRIVE]: [
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue