feat: Add markdown table parsing for XLSX output.

This commit is contained in:
shivamjohri247 2025-12-12 00:09:39 +05:30
parent 4cc984928c
commit 9ab745e1e9

View file

@ -232,39 +232,66 @@ class Message(ComponentBase):
elif self._param.output_format == "xlsx": elif self._param.output_format == "xlsx":
import pandas as pd import pandas as pd
from io import BytesIO from io import BytesIO
from datetime import datetime import re
# Get all conversation history from canvas # Try to parse markdown table from the content
history = getattr(self._canvas, 'history', []) df = None
# Build rows from conversation history if isinstance(content, str):
rows = [] # Extract markdown table from content
for role, msg in history: # Pattern: lines starting with | and containing |
if isinstance(msg, dict): lines = content.strip().split('\n')
msg_content = msg.get("content", str(msg)) table_lines = []
else: in_table = False
msg_content = str(msg) if msg else ""
rows.append({
"Role": role,
"Content": msg_content,
})
# Add current message if not already in history for line in lines:
if content and (not rows or rows[-1].get("Content") != content): line = line.strip()
rows.append({ if line.startswith('|') and '|' in line[1:]:
"Role": "assistant", in_table = True
"Content": content, # Skip separator line (|---|---| or |:---:|:---:| etc.)
}) # Check if line only contains |, -, :, and whitespace
cleaned = line.replace(' ', '').replace('|', '').replace('-', '').replace(':', '')
if cleaned == '':
continue # Skip separator line
table_lines.append(line)
elif in_table and not line.startswith('|'):
# End of table
break
if rows: if table_lines:
df = pd.DataFrame(rows) # Parse the markdown table
else: rows = []
df = pd.DataFrame({"Role": ["assistant"], "Content": [content if content else ""]}) headers = None
for line in table_lines:
# Split by | and clean up
cells = [cell.strip() for cell in line.split('|')]
# Remove empty first and last elements from split
cells = [c for c in cells if c]
if headers is None:
headers = cells
else:
rows.append(cells)
if headers and rows:
# Ensure all rows have same number of columns as headers
normalized_rows = []
for row in rows:
while len(row) < len(headers):
row.append('')
normalized_rows.append(row[:len(headers)])
df = pd.DataFrame(normalized_rows, columns=headers)
# Fallback: if no table found, create single column with content
if df is None or df.empty:
df = pd.DataFrame({"Content": [content if content else ""]})
# Write to Excel # Write to Excel
excel_io = BytesIO() excel_io = BytesIO()
with pd.ExcelWriter(excel_io, engine='openpyxl') as writer: with pd.ExcelWriter(excel_io, engine='openpyxl') as writer:
df.to_excel(writer, sheet_name="Conversation", index=False) df.to_excel(writer, sheet_name="Data", index=False)
excel_io.seek(0) excel_io.seek(0)
binary_content = excel_io.read() binary_content = excel_io.read()