set -xe

dataset="/lustre/S/nanziyuan/projects/ccc/data/train/qwen25_coder_inst-apps-train-reward_dataset.jsonl"

project="/lustre/S/nanziyuan/projects/ccc"
model="/lustre/S/huangdi/open_for_out/models/Qwen2.5-Coder-7B-Instruct/"
ftmodel="${project}/model/qwen25_coder_inst_7b-orm"

testset="/lustre/S/nanziyuan/projects/ccc/data/test/qwen25_coder_inst-apps-test.jsonl"
evalresults="/lustre/S/nanziyuan/projects/ccc/data/eval/qwen25_code_inst-apps-test-orm-score.jsonl"

deepspeed --module \
openrlhf.cli.train_rm \
   --save_path ${ftmodel} \
   --save_steps -1 \
   --logging_steps 1 \
   --eval_steps -1 \
   --train_batch_size 256 \
   --micro_train_batch_size 1 \
   --pretrain ${model} \
   --bf16 \
   --max_epochs 1 \
   --max_len 8192 \
   --zero_stage 3 \
   --learning_rate 9e-6 \
   --dataset  ${dataset} \
   --apply_chat_template \
   --prompt_key messages \
   --chosen_key chosen \
   --rejected_key rejected \
   --flash_attn \
   --load_checkpoint \
   --gradient_checkpointing \
   --use_tensorboard "${ftmodel}_log"


start_server() {
    echo "Starting server..."
    CUDA_VISIBLE_DEVICES=0 \
        python -m openrlhf.cli.serve_rm \
        --reward_pretrain ${ftmodel} \
        --normalize_reward \
        --port 5000 \
        --bf16 \
        --max_len 8192 &
    SERVER_PID=$!
    echo "Server started with PID: $SERVER_PID"
}

# Function to start the client
start_client() {
    echo "Starting client..."
    python -m codecritic.cli.test_orm \
           --model ${ftmodel} \
           --testset ${testset} \
           --output ${evalresults}
    CLIENT_EXIT_CODE=$?
    echo "Client finished with exit code: $CLIENT_EXIT_CODE"
}

# Function to stop the server
stop_server() {
    echo "Stopping server..."
    kill -SIGINT $SERVER_PID
    wait $SERVER_PID 2>/dev/null
    echo "Server stopped."
}

start_server
# Give the server some time to initialize (optional)
sleep 60
start_client
stop_server
echo "Execution complete."
