Commit 97aca067 by nanziyuan

update scripts: convert pairwise reward dataset to sft dataset

parent e86afa63
......@@ -4,31 +4,40 @@
# 2. Using SFT (Supervised Fine-Tuning) directly
# This experiment aims to fairly compare these two approaches.
import argparse
from codecritic.data.utils import mk_message, mk_messages, save_jsonl_dataset
from codecritic.utils.json import load_json
from codecritic.data.utils import mk_message, save_jsonl_dataset
from codecritic.utils.json import load_jsonl
from codecritic.data.verify import mk_critic_verify
def mk_sft_dataset(messages):
assert len(messages) == 4
question = messages[:3]
response = messages[3:]
return dict(question=question, response=response)
def convert_preference_to_sft(item):
message = item["messages"][0]["content"]
chosen = item["chosen"]["content"]
rejected = item["rejected"]["content"]
chosen = item["chosen"][0]["content"]
rejected = item["rejected"][0]["content"]
messages1 = mk_message(message, chosen) + mk_critic_verify(True)
messages2 = mk_message(message, rejected) + mk_critic_verify(False)
return mk_messages(messages1), mk_messages(messages2)
return mk_sft_dataset(messages1), mk_sft_dataset(messages2)
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("--preference_dataset", type=str)
parser.add_argument("--output_dir", type=str)
parser.add_argument("--split", type=str, default="train")
args = parser.parse_args()
preference_dataset = load_json(args.preference_dataset)
preference_dataset = load_jsonl(args.preference_dataset)
sft_dataset = []
for item in preference_dataset:
sft_dataset.extend(convert_preference_to_sft(item))
save_jsonl_dataset(sft_dataset, args.output_dir)
save_jsonl_dataset(sft_dataset, args.output_dir, args.split)
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment