step3: merge into one scripts & remove an unused import in step2

61469b06 · nzy · ecf95207 · 61469b06 · 61469b06 · ecf95207
Commit 61469b06 authored Oct 25, 2024 by nzy
Hide whitespace changes
Inline Side-by-side

Showing with 160 additions and 139 deletions

step2_sftorm_convert_preference_to_sft.py
+1 -1

step3_mk_training_config.py
+159 -0

step3_train_outcome_reward_model.py
+0 -77

step3_train_sft_model.py
+0 -61

No files found.
--- a/step2_sftorm_convert_preference_to_sft.py
+++ b/step2_sftorm_convert_preference_to_sft.py
@@ -4,7 +4,7 @@
 # 2. Using SFT (Supervised Fine-Tuning) directly
 # This experiment aims to fairly compare these two approaches.
-from utils import load_json, save_json, read_config
+from utils import load_json, read_config
 from utils_dataset import mk_critic_qa, mk_critic_verify, mk_sft_item, mk_sft_dataset_info, save_dataset

--- a/step3_mk_training_config.py
+++ b/step3_mk_training_config.py
+import argparse
+from pathlib import Path
+orm_yaml = """\
+### model
+model_name_or_path: {model_path}
+### method
+stage: rm
+do_train: true
+finetuning_type: full
+deepspeed: {deepspeed_config_path}
+### dataset
+dataset: {dataset_name}
+template: deepseekcoder
+cutoff_len: 4096
+max_samples: 10000
+overwrite_cache: true
+preprocessing_num_workers: 16
+### output
+output_dir: {output_dir}
+logging_steps: 10
+save_steps: 100
+plot_loss: true
+overwrite_output_dir: true
+### train
+per_device_train_batch_size: 1
+gradient_accumulation_steps: 8
+learning_rate: 1.0e-5
+num_train_epochs: 1.0
+lr_scheduler_type: cosine
+warmup_ratio: 0.1
+bf16: true
+ddp_timeout: 180000000
+### eval
+val_size: 0.01
+per_device_eval_batch_size: 1
+eval_strategy: steps
+eval_steps: 500
+"""
+def mk_orm_train_config(model_path, dataset_name, output_dir, deepspeed):
+    train_str = orm_yaml.format(
+        model_path=model_path,
+        dataset_name=dataset_name,
+        output_dir=output_dir,
+        deepspeed_config_path=deepspeed,
+    )
+    return train_str
+test_yaml = """\
+model_name_or_path: {orm_model_path}
+template: deepseekcoder
+stage: rm
+"""
+def mk_orm_test_config(model_path):
+    test_str = test_yaml.format(orm_model_path=model_path)
+    return test_str
+sft_yaml = """\
+### model
+model_name_or_path: {model_path}
+### method
+stage: sft
+do_train: true
+finetuning_type: full
+deepspeed: {deepspeed_config_path}
+### dataset
+dataset: {dataset_name}
+template: deepseekcoder
+cutoff_len: 4096
+max_samples: 10000
+overwrite_cache: true
+preprocessing_num_workers: 16
+mask_history: true
+### output
+output_dir: {output_dir}
+logging_steps: 10
+save_steps: 500
+plot_loss: true
+overwrite_output_dir: true
+### train
+per_device_train_batch_size: 1
+gradient_accumulation_steps: 2
+learning_rate: 1.0e-5
+num_train_epochs: 3.0
+lr_scheduler_type: cosine
+warmup_ratio: 0.1
+bf16: true
+ddp_timeout: 180000000
+### eval
+val_size: 0.1
+per_device_eval_batch_size: 1
+eval_strategy: steps
+eval_steps: 500
+"""
+def mk_sft_train_config(model_path, dataset_name, output_dir, deepspeed):
+    train_str = sft_yaml.format(
+        model_path=model_path,
+        dataset_name=dataset_name,
+        output_dir=output_dir,
+        deepspeed_config_path=deepspeed,
+    )
+    return train_str
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--model", type=str)
+    parser.add_argument("--dataset", type=str)
+    parser.add_argument("--output_dir", type=str)
+    parser.add_argument("--deepspeed", type=str)
+    parser.add_argument("--type", type=str, choices=["orm", "sft"])
+    args = parser.parse_args()
+    output_dir = Path(args.output_dir)
+    output_dir.mkdir(parents=True, exist_ok=True)
+    model_output_dir = (output_dir / "model").absolute().as_posix()
+    if args.type == "sft":
+        train_yaml = mk_sft_train_config(
+            args.model,
+            args.dataset,
+            model_output_dir,
+            args.deepspeed,
+        )
+        with open(output_dir / "train.yaml", "w") as f:
+            f.write(train_yaml)
+    elif args.type == "orm":
+        train_yaml = mk_orm_train_config(
+            args.model,
+            args.dataset,
+            model_output_dir,
+            args.deepspeed,
+        )
+        with open(output_dir / "train.yaml", "w") as f:
+            f.write(train_yaml)
+        test_yaml = mk_orm_test_config(model_output_dir)
+        with open(output_dir / "test.yaml", "w") as f:
+            f.write(test_yaml)
+    else:
+        raise NotImplementedError("Unknown training")
--- a/step3_train_outcome_reward_model.py
+++ b/step3_train_outcome_reward_model.py
-from utils import read_config
-train_yaml = """\
-### model
-model_name_or_path: {model_path}
-### method
-stage: rm
-do_train: true
-finetuning_type: full
-deepspeed: {deepspeed_config_path}
-### dataset
-dataset: {dataset_name}
-template: deepseekcoder
-cutoff_len: 4096
-max_samples: 10000
-overwrite_cache: true
-preprocessing_num_workers: 16
-### output
-output_dir: {orm_model_path}
-logging_steps: 10
-save_steps: 100
-plot_loss: true
-overwrite_output_dir: true
-### train
-per_device_train_batch_size: 1
-gradient_accumulation_steps: 8
-learning_rate: 1.0e-5
-num_train_epochs: 1.0
-lr_scheduler_type: cosine
-warmup_ratio: 0.1
-bf16: true
-ddp_timeout: 180000000
-### eval
-val_size: 0.01
-per_device_eval_batch_size: 1
-eval_strategy: steps
-eval_steps: 500
-"""
-test_yaml = """\
-model_name_or_path: {orm_model_path}
-template: {model_template}
-stage: rm
-"""
-def mk_llamafactory_orm_yaml(cfg):
-    orm_dataset = cfg["orm_dataset"]
-    orm_cfg = cfg["orm"][orm_dataset]
-    data_cfg = cfg["preference_dataset"][orm_dataset]
-    with open(orm_cfg["train_yaml_path"], "w") as f:
-        train_str = train_yaml.format(
-            model_path=cfg["model"],
-            dataset_name=data_cfg["dataset_name"],
-            orm_model_path=orm_cfg["model_path"],
-            deepspeed_config_path=orm_cfg["deepspeed_cfg_path"]
-        )
-        f.write(train_str)
-    orm_cfg = cfg["orm"][orm_dataset]
-    with open(orm_cfg["test_yaml_path"], "w") as f:
-        test_str = test_yaml.format(
-            orm_model_path=orm_cfg["model_path"],
-            model_template=cfg["llamafactory_model_template"]
-        )
-        f.write(test_str)
-if __name__ == "__main__":
-    cfg = read_config(["orm_dataset"])
-    mk_llamafactory_orm_yaml(cfg)
\ No newline at end of file
--- a/step3_train_sft_model.py
+++ b/step3_train_sft_model.py
-from utils import read_config
-train_yaml = """\
-### model
-model_name_or_path: {model_path}
-### method
-stage: sft
-do_train: true
-finetuning_type: full
-deepspeed: {deepspeed_config_path}
-### dataset
-dataset: {dataset_name}
-template: deepseekcoder
-cutoff_len: 4096
-max_samples: 10000
-overwrite_cache: true
-preprocessing_num_workers: 16
-mask_history: true
-### output
-output_dir: {critic_model_path}
-logging_steps: 10
-save_steps: 500
-plot_loss: true
-overwrite_output_dir: true
-### train
-per_device_train_batch_size: 1
-gradient_accumulation_steps: 2
-learning_rate: 1.0e-5
-num_train_epochs: 3.0
-lr_scheduler_type: cosine
-warmup_ratio: 0.1
-bf16: true
-ddp_timeout: 180000000
-### eval
-val_size: 0.1
-per_device_eval_batch_size: 1
-eval_strategy: steps
-eval_steps: 500
-"""
-def mk_llamafactory_sft_yaml(cfg):
-    model_type = cfg["model_type"]
-    with open(cfg[model_type]["train"]["train_yaml_path"], "w") as f:
-        train_str = train_yaml.format(
-            model_path=cfg["model"],
-            deepspeed_config_path=cfg[model_type]["train"]["deepspeed_cfg_path"],
-            dataset_name=cfg[model_type]["dataset_name"],
-            critic_model_path=cfg[model_type]["model_path"],
-        )
-        f.write(train_str)
-if __name__ == "__main__":
-    cfg = read_config(["model_type"])
-    mk_llamafactory_sft_yaml(cfg)