feat: Add markdown table parsing for XLSX output.

This commit is contained in:
shivamjohri247 2025-12-12 00:09:39 +05:30
parent 4cc984928c
commit 9ab745e1e9

View file

@ -232,39 +232,66 @@ class Message(ComponentBase):
elif self._param.output_format == "xlsx":
import pandas as pd
from io import BytesIO
from datetime import datetime
import re
# Get all conversation history from canvas
history = getattr(self._canvas, 'history', [])
# Try to parse markdown table from the content
df = None
# Build rows from conversation history
rows = []
for role, msg in history:
if isinstance(msg, dict):
msg_content = msg.get("content", str(msg))
else:
msg_content = str(msg) if msg else ""
rows.append({
"Role": role,
"Content": msg_content,
})
if isinstance(content, str):
# Extract markdown table from content
# Pattern: lines starting with | and containing |
lines = content.strip().split('\n')
table_lines = []
in_table = False
for line in lines:
line = line.strip()
if line.startswith('|') and '|' in line[1:]:
in_table = True
# Skip separator line (|---|---| or |:---:|:---:| etc.)
# Check if line only contains |, -, :, and whitespace
cleaned = line.replace(' ', '').replace('|', '').replace('-', '').replace(':', '')
if cleaned == '':
continue # Skip separator line
table_lines.append(line)
elif in_table and not line.startswith('|'):
# End of table
break
if table_lines:
# Parse the markdown table
rows = []
headers = None
for line in table_lines:
# Split by | and clean up
cells = [cell.strip() for cell in line.split('|')]
# Remove empty first and last elements from split
cells = [c for c in cells if c]
if headers is None:
headers = cells
else:
rows.append(cells)
if headers and rows:
# Ensure all rows have same number of columns as headers
normalized_rows = []
for row in rows:
while len(row) < len(headers):
row.append('')
normalized_rows.append(row[:len(headers)])
df = pd.DataFrame(normalized_rows, columns=headers)
# Add current message if not already in history
if content and (not rows or rows[-1].get("Content") != content):
rows.append({
"Role": "assistant",
"Content": content,
})
if rows:
df = pd.DataFrame(rows)
else:
df = pd.DataFrame({"Role": ["assistant"], "Content": [content if content else ""]})
# Fallback: if no table found, create single column with content
if df is None or df.empty:
df = pd.DataFrame({"Content": [content if content else ""]})
# Write to Excel
excel_io = BytesIO()
with pd.ExcelWriter(excel_io, engine='openpyxl') as writer:
df.to_excel(writer, sheet_name="Conversation", index=False)
df.to_excel(writer, sheet_name="Data", index=False)
excel_io.seek(0)
binary_content = excel_io.read()