Commit 96b68353 by Your Name

push result

parent 2c7b8a6b
......@@ -21,7 +21,7 @@
#SBATCH --gres=gpu:8 # Request M GPU per node
#SBATCH --gres-flags=enforce-binding # CPU-GPU Affinity
#SBATCH --qos=gpu-normal # Request QOS Type
#SBATCH --constraint="L40"
#SBATCH --constraint="L40"|"L40S"
###
### The system will alloc 8 or 16 cores per gpu by default.
......@@ -80,10 +80,18 @@ ulimit -u 20000
cd /nfs_global/S/shiwenxuan/prm/openr
export PYTHONPATH=$(pwd)
bash reason/llm_service/create_service_qwen2.5_math_vllm_gold_prm_speed.sh --acc 0.3 --policy_model_name Qwen2.5-Math-1.5B-Instruct
sleep 100s
bash scripts/eval/beam_search.sh --acc 0.3 --policy_model_name Qwen2.5-Math-1.5B-Instruct
# bash reason/llm_service/create_service_qwen2.5_math_vllm_gold_prm_speed.sh --acc 1.0 --policy_model_name Qwen2.5-Math-1.5B-Instruct
# sleep 100s
# bash scripts/eval/beam_search.sh --acc 1.0 --policy_model_name Qwen2.5-Math-1.5B-Instruct --num_sequence 1 --tree_max_width 4
for acc in $(seq 0.3 0.1 1.0); do
echo "当前循环,ACC=${acc}"
bash reason/llm_service/create_service_qwen2.5_math_vllm_gold_prm_speed.sh --acc ${acc} --policy_model_name Qwen2.5-Math-1.5B-Instruct
sleep 100s
bash scripts/eval/beam_search.sh --acc ${acc} --policy_model_name Qwen2.5-Math-1.5B-Instruct --num_sequence 2 --tree_max_width 8
tmux kill-session -t FastChat
done
# sleep 6h
#- End
......
......@@ -431,9 +431,9 @@ class SearchTree:
self.root = root
simulate_env_copy = simulate_env.copy()
print(simulate_env_copy.config["max_actions"])
# print(simulate_env_copy.config["max_actions"])
simulate_env_copy.config["max_actions"] = int(simulate_env_copy.config["max_actions"] / beam_size)
print(simulate_env_copy.config["max_actions"])
# print(simulate_env_copy.config["max_actions"])
# end_nodes, top_k_nodes = [], [(-root._initial_value, root, simulate_env.copy())]
end_nodes, top_k_nodes = [], [(-root._initial_value, root, simulate_env_copy)]
k = beam_size
......
......@@ -4,10 +4,15 @@ echo "RAY TEMP DIR is $RAY_TEMP_DIR"
POLICY_MODEL_NAME=Qwen2.5-Math-1.5B-Instruct
ACC=0.5
NUM_SEQUENCE=1
TREE_MAX_WIDTH=4
while [[ "$#" -gt 0 ]]; do
case $1 in
--acc) ACC="$2"; shift ;;
--policy_model_name) POLICY_MODEL_NAME="$2"; shift ;;
--num_sequence) NUM_SEQUENCE="$2"; shift ;;
--tree_max_width) TREE_MAX_WIDTH="$2"; shift ;;
*) echo "Unknown parameter passed: $1"; exit 1 ;;
esac
shift
......@@ -15,7 +20,7 @@ done
VALUE_MODEL_NAME=${POLICY_MODEL_NAME}_RM
SAVE_DIR="/nfs_global/S/shiwenxuan/prm/openr/results/${POLICY_MODEL_NAME}/${ACC}"
SAVE_DIR="/nfs_global/S/shiwenxuan/prm/openr/results/${POLICY_MODEL_NAME}/${NUM_SEQUENCE}/${TREE_MAX_WIDTH}/${ACC}"
echo "POLICY_MODEL_NAME is $POLICY_MODEL_NAME"
echo "VALUE_MODEL_NAME is $VALUE_MODEL_NAME"
......@@ -26,8 +31,8 @@ python reason/evaluation/evaluate.py \
--task_name MATH \
--temperature 0.7 \
--max_new_tokens 2048 \
--num_sequence 2 \
--tree_max_width 4 \
--num_sequence $NUM_SEQUENCE \
--tree_max_width $TREE_MAX_WIDTH \
--tree_max_depth 50 \
--save_dir $SAVE_DIR \
--method beam_search \
......
### experiment
- policy model: Qwen2.5-Math-1.5B-Instruct
- reward model: Qwen2.5-Math-1.5B-Instruct
- method: beam search
- - ACC:1.0 result:[{"majority_vote": 0.828, "total_completion_tokens": 2544.756}]
- - ACC:0.9 result:[{"majority_vote": 0.798, "total_completion_tokens": 2576.35}]
- - ACC:0.8 result:[{"majority_vote": 0.794, "total_completion_tokens": 2497.672}]
- - ACC:0.7 result:[{"majority_vote": 0.782, "total_completion_tokens": 2502.832}]
- - ACC:0.6 result:[{"majority_vote": 0.76, "total_completion_tokens": 2491.27}]
- - ACC:0.5 result:[{'majority_vote': 0.724, 'total_completion_tokens': 2400.16}]
- - ACC:0.4 result:[{'majority_vote': 0.75, 'total_completion_tokens': 2418.876}]
- - ACC:0.3 result:[{'majority_vote': 0.748, 'total_completion_tokens': 2463.39}]
- method: beam search (num_seq=2 tree_width=4)
- task: Math500
- - ACC:0.3 [{"majority_vote": 0.708, "prm_min_max": 0.71, "prm_min_vote": 0.71, "prm_last_max": 0.708, "prm_last_vote": 0.708, "total_completion_tokens": 2196.464}]
- - ACC:0.4 [{"majority_vote": 0.726, "prm_min_max": 0.726, "prm_min_vote": 0.726, "prm_last_max": 0.722, "prm_last_vote": 0.722, "total_completion_tokens": 2175.506}]
- - ACC:0.5 [{"majority_vote": 0.738, "prm_min_max": 0.738, "prm_min_vote": 0.738, "prm_last_max": 0.742, "prm_last_vote": 0.742, "total_completion_tokens": 2234.252}]
- - ACC:0.6 [{"majority_vote": 0.746, "prm_min_max": 0.746, "prm_min_vote": 0.746, "prm_last_max": 0.752, "prm_last_vote": 0.752, "total_completion_tokens": 2178.612}]
- - ACC:0.7 [{"majority_vote": 0.774, "prm_min_max": 0.776, "prm_min_vote": 0.776, "prm_last_max": 0.78, "prm_last_vote": 0.78, "total_completion_tokens": 2251.602}]
- - ACC:0.8 [{"majority_vote": 0.804, "prm_min_max": 0.804, "prm_min_vote": 0.804, "prm_last_max": 0.812, "prm_last_vote": 0.812, "total_completion_tokens": 2251.908}]
- - ACC:0.9 [{"majority_vote": 0.838, "prm_min_max": 0.84, "prm_min_vote": 0.84, "prm_last_max": 0.852, "prm_last_vote": 0.852, "total_completion_tokens": 2265.382}]
- - ACC:1.0 [{"majority_vote": 0.838, "prm_min_max": 0.838, "prm_min_vote": 0.838, "prm_last_max": 0.838, "prm_last_vote": 0.838, "total_completion_tokens": 2276.126}]
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment