Commit 64012d3d by nzy

step2: remove read_config

parent cabb00c5
......@@ -3,8 +3,8 @@
# 1. Using reward loss
# 2. Using SFT (Supervised Fine-Tuning) directly
# This experiment aims to fairly compare these two approaches.
from utils import load_json, read_config
import argparse
from utils import load_json
from utils_dataset import mk_critic_qa, mk_critic_verify, mk_sft_item, mk_sft_dataset_info, save_dataset
......@@ -19,14 +19,17 @@ def convert_preference_to_sft(item):
if __name__ == "__main__":
cfg = read_config()
parser = argparse.ArgumentParser()
parser.add_argument("--preference_dataset", type=str)
parser.add_argument("--llamafactory", type=str)
parser.add_argument("--dataset_name", type=str)
args = parser.parse_args()
preference_path = cfg["preference_dataset"]["min_edit_distance"]["preference_dataset_path"]
preference_dataset = load_json(preference_path)
preference_dataset = load_json(args.preference_dataset)
sft_dataset = []
for item in preference_dataset:
sft_dataset.extend(convert_preference_to_sft(item))
dataset_info = mk_sft_dataset_info(cfg["sftorm"]["dataset_name"])
save_dataset(cfg["llamafactory_path"], dataset_info, sft_dataset)
dataset_info = mk_sft_dataset_info(args.dataset_name)
save_dataset(args.llamafactory, dataset_info, sft_dataset)
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment