Update MSC dataset (#208)

jsonl
This commit is contained in:
Preston Rasmussen 2024-11-06 11:00:06 -05:00 committed by GitHub
parent 3199e893ed
commit c266f55b88
No known key found for this signature in database
GPG key ID: B5690EEEBB952194

View file

@ -31,8 +31,8 @@ def parse_msc_messages() -> list[list[ParsedMscMessage]]:
msc_messages: list[list[ParsedMscMessage]] = []
speakers = ['Alice', 'Bob']
with open('../data/msc.json') as file:
data = json.load(file)['data']
with open('../data/msc.jsonl') as file:
data = [json.loads(line) for line in file]
for i, conversation in enumerate(data):
messages: list[ParsedMscMessage] = []
for previous_dialog in conversation['previous_dialogs']:
@ -73,8 +73,8 @@ def parse_msc_messages() -> list[list[ParsedMscMessage]]:
def conversation_q_and_a() -> list[tuple[str, str]]:
with open('../data/msc.json') as file:
data = json.load(file)['data']
with open('../data/msc.jsonl') as file:
data = [json.loads(line) for line in file]
qa: list[tuple[str, str]] = []
for conversation in data: