set -xe

model="/lustre/S/huangdi/open_for_out/models/Qwen2.5-Coder-7B-Instruct/"
project="/lustre/S/nanziyuan/projects/ccc"
modelname="qwen25_coder_inst"

trainset="${project}/data/train/${modelname}-apps-train.jsonl"
testset="${project}/data/test/${modelname}-apps-test.jsonl"

train_selected_pairs="${project}/data/train/${modelname}-apps-train-selected_pairs.jsonl"

apps="/lustre/S/nanziyuan/datasets/apps/"
sft="${project}/data/train/${modelname}-sft.jsonl"

ftmodel="${project}/model/qwen25_coder_inst_7b-algolr"
testset="/lustre/S/nanziyuan/projects/ccc/data/test/qwen25_coder_inst-apps-test.jsonl"
evalresults="/lustre/S/nanziyuan/projects/ccc/data/eval/qwen25_code_inst-apps-test-genrm-score.jsonl"

export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7


# python -m codecritic.cli.algolr \
#        --model ${model} \
#        --dataset ${trainset} \
#        --pairinfo ${train_selected_pairs} \
#        --apps ${apps} \
#        --output ${sft} \
#        --level beginner \
#        --tp 1


# deepspeed --module \
# openrlhf.cli.train_sft \
#    --max_len 4096 \
#    --dataset ${sft} \
#    --input_key question \
#    --output_key response \
#    --apply_chat_template \
#    --train_batch_size 256 \
#    --micro_train_batch_size 2 \
#    --max_samples 500000 \
#    --pretrain ${model} \
#    --save_path ${ftmodel} \
#    --save_steps -1 \
#    --logging_steps 1 \
#    --eval_steps -1 \
#    --zero_stage 2 \
#    --max_epochs 1 \
#    --bf16 \
#    --flash_attn \
#    --learning_rate 5e-6 \
#    --load_checkpoint \
#    --gradient_checkpointing \
#    --use_tensorboard "${ftmodel}_log"


python -m codecritic.cli.test_genrm \
       --model ${model} \
       --testset ${testset} \
       --output ${evalresults} \
       --reasoning \
       --tp 1
