set -xe

model="/lustre/S/huangdi/open_for_out/models/Qwen2.5-Coder-7B-Instruct/"
project="/lustre/S/nanziyuan/projects/ccc"
modelname="qwen25_coder_inst"

trainset="${project}/data/train/${modelname}-apps-train.jsonl"
testset="${project}/data/test/${modelname}-apps-test.jsonl"

train_selected_pairs="${project}/data/train/${modelname}-apps-train-selected_pairs.jsonl"


reward_ds="${project}/data/train/${modelname}-apps-train-reward_dataset.jsonl"

# export CUDA_VISIBLE_DEVICES=0,1,2,3

## Sampling
## APPS
# python -m codecritic.cli.gen_dataset \
#     --model ${model} \
#     --apps /lustre/S/nanziyuan/datasets/apps/ \
#     --train ${trainset} \
#     --test ${testset}

## HumanEval & MBPP
# evalplus.evaluate \
#     --model ${model} \
#     --n_samples 50 \
#     --temperature 0.8 \
#     --dataset humaneval \
#     --root "${project}/data/test/${modelname}-humaneval" \
#     --backend vllm

# evalplus.evaluate \
#     --model ${model} \
#     --n_samples 50 \
#     --temperature 0.8 \
#     --dataset mbpp \
#     --root "${project}/data/test/${modelname}-mbpp" \
#     --backend vllm

## HumanEvalPack

## BigCodeBench

## Training dataset
python -m codecritic.cli.select_preference_pairs \
       --dataset ${trainset} \
       --output ${train_selected_pairs}

python -m codecritic.cli.reformat \
       --dataset ${trainset} \
       --pairs ${train_selected_pairs} \
       --format reward \
       --output ${reward_ds}
