Commit a89b519a by nanziyuan

refactor: large-scale project structure refactor (WIP)

- Reorganized project structure for better modularity and maintainability.
- Note: This commit is a work in progress and is not yet functional. Further adjustments to imports and code logic are required to make the project runnable.
parent b3bf4ddf
......@@ -164,4 +164,4 @@ cython_debug/
readme.pdf
*.json
*.jsonl
test_*
\ No newline at end of file
test/
import argparse
from pathlib import Path
orm_yaml = """\
### model
model_name_or_path: {model_path}
### method
stage: rm
do_train: true
finetuning_type: full
deepspeed: {deepspeed_config_path}
### dataset
dataset: {dataset_name}
template: deepseekcoder
cutoff_len: 4096
max_samples: 10000
overwrite_cache: true
preprocessing_num_workers: 16
### output
output_dir: {output_dir}
logging_steps: 10
save_steps: 100
plot_loss: true
overwrite_output_dir: true
### train
per_device_train_batch_size: 1
gradient_accumulation_steps: 8
learning_rate: 1.0e-5
num_train_epochs: 1.0
lr_scheduler_type: cosine
warmup_ratio: 0.1
bf16: true
ddp_timeout: 180000000
### eval
val_size: 0.01
per_device_eval_batch_size: 1
eval_strategy: steps
eval_steps: 500
"""
def mk_orm_train_config(model_path, dataset_name, output_dir, deepspeed):
train_str = orm_yaml.format(
model_path=model_path,
dataset_name=dataset_name,
output_dir=output_dir,
deepspeed_config_path=deepspeed,
)
return train_str
test_yaml = """\
model_name_or_path: {orm_model_path}
template: deepseekcoder
stage: rm
"""
def mk_orm_test_config(model_path):
test_str = test_yaml.format(orm_model_path=model_path)
return test_str
sft_yaml = """\
### model
model_name_or_path: {model_path}
### method
stage: sft
do_train: true
finetuning_type: full
deepspeed: {deepspeed_config_path}
### dataset
dataset: {dataset_name}
template: deepseekcoder
cutoff_len: 4096
max_samples: 10000
overwrite_cache: true
preprocessing_num_workers: 16
mask_history: true
### output
output_dir: {output_dir}
logging_steps: 10
save_steps: 500
plot_loss: true
overwrite_output_dir: true
### train
per_device_train_batch_size: 1
gradient_accumulation_steps: 2
learning_rate: 1.0e-5
num_train_epochs: 3.0
lr_scheduler_type: cosine
warmup_ratio: 0.1
bf16: true
ddp_timeout: 180000000
### eval
val_size: 0.1
per_device_eval_batch_size: 1
eval_strategy: steps
eval_steps: 500
"""
def mk_sft_train_config(model_path, dataset_name, output_dir, deepspeed):
train_str = sft_yaml.format(
model_path=model_path,
dataset_name=dataset_name,
output_dir=output_dir,
deepspeed_config_path=deepspeed,
)
return train_str
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("--model", type=str)
parser.add_argument("--dataset", type=str)
parser.add_argument("--output_dir", type=str)
parser.add_argument("--deepspeed", type=str)
parser.add_argument("--type", type=str, choices=["orm", "sft"])
args = parser.parse_args()
output_dir = Path(args.output_dir)
output_dir.mkdir(parents=True, exist_ok=True)
model_output_dir = (output_dir / "model").absolute().as_posix()
if args.type == "sft":
train_yaml = mk_sft_train_config(
args.model,
args.dataset,
model_output_dir,
args.deepspeed,
)
with open(output_dir / "train.yaml", "w") as f:
f.write(train_yaml)
elif args.type == "orm":
train_yaml = mk_orm_train_config(
args.model,
args.dataset,
model_output_dir,
args.deepspeed,
)
with open(output_dir / "train.yaml", "w") as f:
f.write(train_yaml)
test_yaml = mk_orm_test_config(model_output_dir)
with open(output_dir / "test.yaml", "w") as f:
f.write(test_yaml)
else:
raise NotImplementedError("Unknown training")
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment