Commit 61469b06 by nzy

step3: merge into one scripts & remove an unused import in step2

parent ecf95207
...@@ -4,7 +4,7 @@ ...@@ -4,7 +4,7 @@
# 2. Using SFT (Supervised Fine-Tuning) directly # 2. Using SFT (Supervised Fine-Tuning) directly
# This experiment aims to fairly compare these two approaches. # This experiment aims to fairly compare these two approaches.
from utils import load_json, save_json, read_config from utils import load_json, read_config
from utils_dataset import mk_critic_qa, mk_critic_verify, mk_sft_item, mk_sft_dataset_info, save_dataset from utils_dataset import mk_critic_qa, mk_critic_verify, mk_sft_item, mk_sft_dataset_info, save_dataset
......
import argparse
from pathlib import Path
orm_yaml = """\
### model
model_name_or_path: {model_path}
### method
stage: rm
do_train: true
finetuning_type: full
deepspeed: {deepspeed_config_path}
### dataset
dataset: {dataset_name}
template: deepseekcoder
cutoff_len: 4096
max_samples: 10000
overwrite_cache: true
preprocessing_num_workers: 16
### output
output_dir: {output_dir}
logging_steps: 10
save_steps: 100
plot_loss: true
overwrite_output_dir: true
### train
per_device_train_batch_size: 1
gradient_accumulation_steps: 8
learning_rate: 1.0e-5
num_train_epochs: 1.0
lr_scheduler_type: cosine
warmup_ratio: 0.1
bf16: true
ddp_timeout: 180000000
### eval
val_size: 0.01
per_device_eval_batch_size: 1
eval_strategy: steps
eval_steps: 500
"""
def mk_orm_train_config(model_path, dataset_name, output_dir, deepspeed):
train_str = orm_yaml.format(
model_path=model_path,
dataset_name=dataset_name,
output_dir=output_dir,
deepspeed_config_path=deepspeed,
)
return train_str
test_yaml = """\
model_name_or_path: {orm_model_path}
template: deepseekcoder
stage: rm
"""
def mk_orm_test_config(model_path):
test_str = test_yaml.format(orm_model_path=model_path)
return test_str
sft_yaml = """\
### model
model_name_or_path: {model_path}
### method
stage: sft
do_train: true
finetuning_type: full
deepspeed: {deepspeed_config_path}
### dataset
dataset: {dataset_name}
template: deepseekcoder
cutoff_len: 4096
max_samples: 10000
overwrite_cache: true
preprocessing_num_workers: 16
mask_history: true
### output
output_dir: {output_dir}
logging_steps: 10
save_steps: 500
plot_loss: true
overwrite_output_dir: true
### train
per_device_train_batch_size: 1
gradient_accumulation_steps: 2
learning_rate: 1.0e-5
num_train_epochs: 3.0
lr_scheduler_type: cosine
warmup_ratio: 0.1
bf16: true
ddp_timeout: 180000000
### eval
val_size: 0.1
per_device_eval_batch_size: 1
eval_strategy: steps
eval_steps: 500
"""
def mk_sft_train_config(model_path, dataset_name, output_dir, deepspeed):
train_str = sft_yaml.format(
model_path=model_path,
dataset_name=dataset_name,
output_dir=output_dir,
deepspeed_config_path=deepspeed,
)
return train_str
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("--model", type=str)
parser.add_argument("--dataset", type=str)
parser.add_argument("--output_dir", type=str)
parser.add_argument("--deepspeed", type=str)
parser.add_argument("--type", type=str, choices=["orm", "sft"])
args = parser.parse_args()
output_dir = Path(args.output_dir)
output_dir.mkdir(parents=True, exist_ok=True)
model_output_dir = (output_dir / "model").absolute().as_posix()
if args.type == "sft":
train_yaml = mk_sft_train_config(
args.model,
args.dataset,
model_output_dir,
args.deepspeed,
)
with open(output_dir / "train.yaml", "w") as f:
f.write(train_yaml)
elif args.type == "orm":
train_yaml = mk_orm_train_config(
args.model,
args.dataset,
model_output_dir,
args.deepspeed,
)
with open(output_dir / "train.yaml", "w") as f:
f.write(train_yaml)
test_yaml = mk_orm_test_config(model_output_dir)
with open(output_dir / "test.yaml", "w") as f:
f.write(test_yaml)
else:
raise NotImplementedError("Unknown training")
from utils import read_config
train_yaml = """\
### model
model_name_or_path: {model_path}
### method
stage: rm
do_train: true
finetuning_type: full
deepspeed: {deepspeed_config_path}
### dataset
dataset: {dataset_name}
template: deepseekcoder
cutoff_len: 4096
max_samples: 10000
overwrite_cache: true
preprocessing_num_workers: 16
### output
output_dir: {orm_model_path}
logging_steps: 10
save_steps: 100
plot_loss: true
overwrite_output_dir: true
### train
per_device_train_batch_size: 1
gradient_accumulation_steps: 8
learning_rate: 1.0e-5
num_train_epochs: 1.0
lr_scheduler_type: cosine
warmup_ratio: 0.1
bf16: true
ddp_timeout: 180000000
### eval
val_size: 0.01
per_device_eval_batch_size: 1
eval_strategy: steps
eval_steps: 500
"""
test_yaml = """\
model_name_or_path: {orm_model_path}
template: {model_template}
stage: rm
"""
def mk_llamafactory_orm_yaml(cfg):
orm_dataset = cfg["orm_dataset"]
orm_cfg = cfg["orm"][orm_dataset]
data_cfg = cfg["preference_dataset"][orm_dataset]
with open(orm_cfg["train_yaml_path"], "w") as f:
train_str = train_yaml.format(
model_path=cfg["model"],
dataset_name=data_cfg["dataset_name"],
orm_model_path=orm_cfg["model_path"],
deepspeed_config_path=orm_cfg["deepspeed_cfg_path"]
)
f.write(train_str)
orm_cfg = cfg["orm"][orm_dataset]
with open(orm_cfg["test_yaml_path"], "w") as f:
test_str = test_yaml.format(
orm_model_path=orm_cfg["model_path"],
model_template=cfg["llamafactory_model_template"]
)
f.write(test_str)
if __name__ == "__main__":
cfg = read_config(["orm_dataset"])
mk_llamafactory_orm_yaml(cfg)
\ No newline at end of file
from utils import read_config
train_yaml = """\
### model
model_name_or_path: {model_path}
### method
stage: sft
do_train: true
finetuning_type: full
deepspeed: {deepspeed_config_path}
### dataset
dataset: {dataset_name}
template: deepseekcoder
cutoff_len: 4096
max_samples: 10000
overwrite_cache: true
preprocessing_num_workers: 16
mask_history: true
### output
output_dir: {critic_model_path}
logging_steps: 10
save_steps: 500
plot_loss: true
overwrite_output_dir: true
### train
per_device_train_batch_size: 1
gradient_accumulation_steps: 2
learning_rate: 1.0e-5
num_train_epochs: 3.0
lr_scheduler_type: cosine
warmup_ratio: 0.1
bf16: true
ddp_timeout: 180000000
### eval
val_size: 0.1
per_device_eval_batch_size: 1
eval_strategy: steps
eval_steps: 500
"""
def mk_llamafactory_sft_yaml(cfg):
model_type = cfg["model_type"]
with open(cfg[model_type]["train"]["train_yaml_path"], "w") as f:
train_str = train_yaml.format(
model_path=cfg["model"],
deepspeed_config_path=cfg[model_type]["train"]["deepspeed_cfg_path"],
dataset_name=cfg[model_type]["dataset_name"],
critic_model_path=cfg[model_type]["model_path"],
)
f.write(train_str)
if __name__ == "__main__":
cfg = read_config(["model_type"])
mk_llamafactory_sft_yaml(cfg)
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment