set -xe

model="/lustre/S/huangdi/open_for_out/models/Qwen2.5-Coder-7B-Instruct/"
project="/lustre/S/nanziyuan/projects/ccc"
modelname="qwen25_coder_inst"
data="${project}/data"

trainset="${data}/train/${modelname}-apps-train.jsonl"
testset="${data}/test/${modelname}-apps-test.jsonl"

train_selected_pairs="${data}/train/${modelname}-apps-train-selected_pairs.jsonl"

apps="/lustre/S/nanziyuan/datasets/apps/"
# sft="${data}/train/${modelname}-sft.jsonl"
sft="${data}/train/qwen25_coder_inst-sft-balanced.jsonl"

ftmodel="${project}/model/qwen25_coder_inst_7b-algolr_balance_epoch3_bs32"
evalresults="${data}/eval/qwen25_code_inst-apps-test-algolr-balance_epoch3_bs32.jsonl"

# export CUDA_VISIBLE_DEVICES=0,1,2,3


# python -m codecritic.cli.algolr \
#        --model ${model} \
#        --dataset ${trainset} \
#        --pairinfo ${train_selected_pairs} \
#        --apps ${apps} \
#        --output ${sft} \
#        --level beginner \
#        --tp 1


deepspeed --module \
openrlhf.cli.train_sft \
   --max_len 5632 \
   --dataset ${sft} \
   --input_key question \
   --output_key response \
   --apply_chat_template \
   --train_batch_size 32 \
   --micro_train_batch_size 1 \
   --max_samples 500000 \
   --pretrain ${model} \
   --save_path ${ftmodel} \
   --save_steps -1 \
   --logging_steps 1 \
   --eval_steps -1 \
   --zero_stage 3 \
   --max_epochs 3 \
   --bf16 \
   --flash_attn \
   --learning_rate 5e-6 \
   --gradient_checkpointing \
   --use_tensorboard "${ftmodel}_log"


python -m codecritic.cli.test_genrm \
       --model ${ftmodel} \
       --testset ${testset} \
       --output ${evalresults} \
       --reasoning \
       --tp 1
