diff --git a/agent/component/base.py b/agent/component/base.py index 81d3fac56..321907dbe 100644 --- a/agent/component/base.py +++ b/agent/component/base.py @@ -393,7 +393,7 @@ class ComponentParamBase(ABC): class ComponentBase(ABC): component_name: str thread_limiter = asyncio.Semaphore(int(os.environ.get("MAX_CONCURRENT_CHATS", 10))) - variable_ref_patt = r"\{* *\{([a-zA-Z:0-9]+@[A-Za-z0-9_.]+|sys\.[A-Za-z0-9_.]+|env\.[A-Za-z0-9_.]+)\} *\}*" + variable_ref_patt = r"\{* *\{([a-zA-Z_:0-9]+@[A-Za-z0-9_.]+|sys\.[A-Za-z0-9_.]+|env\.[A-Za-z0-9_.]+)\} *\}*" def __str__(self): """ diff --git a/agent/component/message.py b/agent/component/message.py index b4e2985e0..e2428a084 100644 --- a/agent/component/message.py +++ b/agent/component/message.py @@ -14,6 +14,8 @@ # limitations under the License. # import asyncio +import nest_asyncio +nest_asyncio.apply() import inspect import json import os @@ -207,7 +209,7 @@ class Message(ComponentBase): import pypandoc doc_id = get_uuid() - if self._param.output_format.lower() not in {"markdown", "html", "pdf", "docx"}: + if self._param.output_format.lower() not in {"markdown", "html", "pdf", "docx", "xlsx"}: self._param.output_format = "markdown" try: @@ -227,6 +229,46 @@ class Message(ComponentBase): binary_content = converted.encode("utf-8") + elif self._param.output_format == "xlsx": + import pandas as pd + from io import BytesIO + + if isinstance(content, str): + try: + # Convert markdown to HTML tables to help pandas parse it + html_content = pypandoc.convert_text(content, to="html", format="markdown") + dfs = pd.read_html(html_content) + except Exception as e: + dfs = [] + + if not dfs: + df = pd.DataFrame({"Content": [content]}) + dfs = [df] + else: + # Should not accept file path for Excel generation from agent response usually, + # but if it does, read it as text + with open(content, "r") as f: + txt_content = f.read() + try: + html_content = pypandoc.convert_text(txt_content, to="html", format="markdown") + dfs = pd.read_html(html_content) + except Exception: + dfs = [] + + if not dfs: + df = pd.DataFrame({"Content": [txt_content]}) + dfs = [df] + + # Write to Excel + excel_io = BytesIO() + with pd.ExcelWriter(excel_io, engine='openpyxl') as writer: + for i, df in enumerate(dfs): + sheet_name = f"Sheet{i+1}" + df.to_excel(writer, sheet_name=sheet_name, index=False) + + excel_io.seek(0) + binary_content = excel_io.read() + else: # pdf, docx with tempfile.NamedTemporaryFile(suffix=f".{self._param.output_format}", delete=False) as tmp: tmp_name = tmp.name diff --git a/api/apps/canvas_app.py b/api/apps/canvas_app.py index ed8c8c7a0..64b0d0f55 100644 --- a/api/apps/canvas_app.py +++ b/api/apps/canvas_app.py @@ -14,6 +14,7 @@ # limitations under the License. # import asyncio +import inspect import json import logging from functools import partial @@ -299,8 +300,13 @@ async def debug(): for k in outputs.keys(): if isinstance(outputs[k], partial): txt = "" - for c in outputs[k](): - txt += c + iter_obj = outputs[k]() + if inspect.isasyncgen(iter_obj): + async for c in iter_obj: + txt += c + else: + for c in iter_obj: + txt += c outputs[k] = txt return get_json_result(data=outputs) except Exception as e: diff --git a/web/src/pages/agent/constant/index.tsx b/web/src/pages/agent/constant/index.tsx index 5c25b7fe0..8775c6288 100644 --- a/web/src/pages/agent/constant/index.tsx +++ b/web/src/pages/agent/constant/index.tsx @@ -832,6 +832,7 @@ export enum ExportFileType { HTML = 'html', Markdown = 'md', DOCX = 'docx', + Excel = 'xlsx', } export enum TypesWithArray {