set -xe

model="/lustre/S/huangdi/open_for_out/models/Qwen2.5-Coder-32B-Instruct"
project="/lustre/S/nanziyuan/projects/ccc"
modelname="qwen25_coder_inst_32b"
tp=4

apps="/lustre/S/nanziyuan/datasets/apps/"
data="${project}/data"
trainset="${data}/train/${modelname}-apps-train.jsonl"
testset="${data}/test/${modelname}-apps-test.jsonl"

train_selected_pairs="${project}/data/train/${modelname}-apps-train-selected_pairs.jsonl"
reward_ds="${project}/data/train/${modelname}-apps-train-reward_dataset.jsonl"

python -m codecritic.cli.gen_dataset \
    --model ${model} \
    --apps ${apps} \
    --train ${trainset} \
    --test ${testset} \
    --tp ${tp}

python -m codecritic.cli.select_preference_pairs \
       --dataset ${trainset} \
       --output ${train_selected_pairs}

python -m codecritic.cli.reformat \
       --dataset ${trainset} \
       --pairs ${train_selected_pairs} \
       --format reward \
       --output ${reward_ds}


sft="${data}/train/${modelname}-sft.jsonl"
ftmodel="${project}/model/${modelname}-algolr"
algolrscore="${data}/eval/${$modelname}-apps-test-algolr-score.jsonl"

python -m codecritic.cli.algolr \
       --model ${model} \
       --dataset ${trainset} \
       --pairinfo ${train_selected_pairs} \
       --apps ${apps} \
       --output ${sft} \
       --level beginner \
       --tp ${tp}


# deepspeed --module \
# openrlhf.cli.train_sft \
#    --max_len 4096 \
#    --dataset ${sft} \
#    --input_key question \
#    --output_key response \
#    --apply_chat_template \
#    --train_batch_size 128 \
#    --micro_train_batch_size 1 \
#    --max_samples 500000 \
#    --pretrain ${model} \
#    --save_path ${ftmodel} \
#    --save_steps -1 \
#    --logging_steps 1 \
#    --eval_steps -1 \
#    --zero_stage 3 \
#    --beta 0.1 \
#    --max_epochs 1 \
#    --bf16 \
#    --flash_attn \
#    --learning_rate 5e-6 \
#    --load_checkpoint \
#    --gradient_checkpointing \
#    --adam_offload \
#    --use_tensorboard "${ftmodel}_log"


# python -m codecritic.cli.test_genrm \
#        --model ${ftmodel} \
#        --testset ${testset} \
#        --output ${algolrscore} \
#        --reasoning \
#        --tp 1

# # ORM
# ormmodel="${project}/model/${modelname}-orm"
# ormscore="${data}/eval/${$modelname}-apps-test-orm-score.jsonl"

# deepspeed --module \
# openrlhf.cli.train_rm \
#    --save_path ${ormmodel} \
#    --save_steps -1 \
#    --logging_steps 1 \
#    --eval_steps -1 \
#    --train_batch_size 128 \
#    --micro_train_batch_size 1 \
#    --pretrain ${model} \
#    --bf16 \
#    --max_epochs 1 \
#    --max_len 8192 \
#    --zero_stage 3 \
#    --beta 0.1 \
#    --learning_rate 9e-6 \
#    --dataset  ${reward_ds} \
#    --apply_chat_template \
#    --prompt_key messages \
#    --chosen_key chosen \
#    --rejected_key rejected \
#    --flash_attn \
#    --gradient_checkpointing \
#    --adam_offload \
#    --use_tensorboard "${ormmodel}_log"


# start_server() {
#     echo "Starting server..."
#     CUDA_VISIBLE_DEVICES=0,1,2,3 \
#         python -m openrlhf.cli.serve_rm \
#         --reward_pretrain ${ormmodel} \
#         --normalize_reward \
#         --port 5000 \
#         --bf16 \
#         --max_len 8192 &
#     SERVER_PID=$!
#     echo "Server started with PID: $SERVER_PID"
# }

# # Function to start the client
# start_client() {
#     echo "Starting client..."
#     python -m codecritic.cli.test_orm \
#            --model ${ftmodel} \
#            --testset ${testset} \
#            --output ${ormscore}
#     CLIENT_EXIT_CODE=$?
#     echo "Client finished with exit code: $CLIENT_EXIT_CODE"
# }

# # Function to stop the server
# stop_server() {
#     echo "Stopping server..."
#     kill -SIGINT $SERVER_PID
#     wait $SERVER_PID 2>/dev/null
#     echo "Server stopped."
# }

# start_server
# # Give the server some time to initialize (optional)
# sleep 600
# start_client
# stop_server
# echo "Execution complete."
