feat: Add markdown table parsing for XLSX output.
This commit is contained in:
parent
4cc984928c
commit
9ab745e1e9
1 changed files with 53 additions and 26 deletions
|
|
@ -232,39 +232,66 @@ class Message(ComponentBase):
|
||||||
elif self._param.output_format == "xlsx":
|
elif self._param.output_format == "xlsx":
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
from io import BytesIO
|
from io import BytesIO
|
||||||
from datetime import datetime
|
import re
|
||||||
|
|
||||||
# Get all conversation history from canvas
|
# Try to parse markdown table from the content
|
||||||
history = getattr(self._canvas, 'history', [])
|
df = None
|
||||||
|
|
||||||
# Build rows from conversation history
|
if isinstance(content, str):
|
||||||
rows = []
|
# Extract markdown table from content
|
||||||
for role, msg in history:
|
# Pattern: lines starting with | and containing |
|
||||||
if isinstance(msg, dict):
|
lines = content.strip().split('\n')
|
||||||
msg_content = msg.get("content", str(msg))
|
table_lines = []
|
||||||
else:
|
in_table = False
|
||||||
msg_content = str(msg) if msg else ""
|
|
||||||
rows.append({
|
for line in lines:
|
||||||
"Role": role,
|
line = line.strip()
|
||||||
"Content": msg_content,
|
if line.startswith('|') and '|' in line[1:]:
|
||||||
})
|
in_table = True
|
||||||
|
# Skip separator line (|---|---| or |:---:|:---:| etc.)
|
||||||
|
# Check if line only contains |, -, :, and whitespace
|
||||||
|
cleaned = line.replace(' ', '').replace('|', '').replace('-', '').replace(':', '')
|
||||||
|
if cleaned == '':
|
||||||
|
continue # Skip separator line
|
||||||
|
table_lines.append(line)
|
||||||
|
elif in_table and not line.startswith('|'):
|
||||||
|
# End of table
|
||||||
|
break
|
||||||
|
|
||||||
|
if table_lines:
|
||||||
|
# Parse the markdown table
|
||||||
|
rows = []
|
||||||
|
headers = None
|
||||||
|
|
||||||
|
for line in table_lines:
|
||||||
|
# Split by | and clean up
|
||||||
|
cells = [cell.strip() for cell in line.split('|')]
|
||||||
|
# Remove empty first and last elements from split
|
||||||
|
cells = [c for c in cells if c]
|
||||||
|
|
||||||
|
if headers is None:
|
||||||
|
headers = cells
|
||||||
|
else:
|
||||||
|
rows.append(cells)
|
||||||
|
|
||||||
|
if headers and rows:
|
||||||
|
# Ensure all rows have same number of columns as headers
|
||||||
|
normalized_rows = []
|
||||||
|
for row in rows:
|
||||||
|
while len(row) < len(headers):
|
||||||
|
row.append('')
|
||||||
|
normalized_rows.append(row[:len(headers)])
|
||||||
|
|
||||||
|
df = pd.DataFrame(normalized_rows, columns=headers)
|
||||||
|
|
||||||
# Add current message if not already in history
|
# Fallback: if no table found, create single column with content
|
||||||
if content and (not rows or rows[-1].get("Content") != content):
|
if df is None or df.empty:
|
||||||
rows.append({
|
df = pd.DataFrame({"Content": [content if content else ""]})
|
||||||
"Role": "assistant",
|
|
||||||
"Content": content,
|
|
||||||
})
|
|
||||||
|
|
||||||
if rows:
|
|
||||||
df = pd.DataFrame(rows)
|
|
||||||
else:
|
|
||||||
df = pd.DataFrame({"Role": ["assistant"], "Content": [content if content else ""]})
|
|
||||||
|
|
||||||
# Write to Excel
|
# Write to Excel
|
||||||
excel_io = BytesIO()
|
excel_io = BytesIO()
|
||||||
with pd.ExcelWriter(excel_io, engine='openpyxl') as writer:
|
with pd.ExcelWriter(excel_io, engine='openpyxl') as writer:
|
||||||
df.to_excel(writer, sheet_name="Conversation", index=False)
|
df.to_excel(writer, sheet_name="Data", index=False)
|
||||||
|
|
||||||
excel_io.seek(0)
|
excel_io.seek(0)
|
||||||
binary_content = excel_io.read()
|
binary_content = excel_io.read()
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue