Commit 96b68353 by Your Name

push result

parent 2c7b8a6b
...@@ -21,7 +21,7 @@ ...@@ -21,7 +21,7 @@
#SBATCH --gres=gpu:8 # Request M GPU per node #SBATCH --gres=gpu:8 # Request M GPU per node
#SBATCH --gres-flags=enforce-binding # CPU-GPU Affinity #SBATCH --gres-flags=enforce-binding # CPU-GPU Affinity
#SBATCH --qos=gpu-normal # Request QOS Type #SBATCH --qos=gpu-normal # Request QOS Type
#SBATCH --constraint="L40" #SBATCH --constraint="L40"|"L40S"
### ###
### The system will alloc 8 or 16 cores per gpu by default. ### The system will alloc 8 or 16 cores per gpu by default.
...@@ -80,10 +80,18 @@ ulimit -u 20000 ...@@ -80,10 +80,18 @@ ulimit -u 20000
cd /nfs_global/S/shiwenxuan/prm/openr cd /nfs_global/S/shiwenxuan/prm/openr
export PYTHONPATH=$(pwd) export PYTHONPATH=$(pwd)
bash reason/llm_service/create_service_qwen2.5_math_vllm_gold_prm_speed.sh --acc 0.3 --policy_model_name Qwen2.5-Math-1.5B-Instruct # bash reason/llm_service/create_service_qwen2.5_math_vllm_gold_prm_speed.sh --acc 1.0 --policy_model_name Qwen2.5-Math-1.5B-Instruct
sleep 100s # sleep 100s
bash scripts/eval/beam_search.sh --acc 0.3 --policy_model_name Qwen2.5-Math-1.5B-Instruct # bash scripts/eval/beam_search.sh --acc 1.0 --policy_model_name Qwen2.5-Math-1.5B-Instruct --num_sequence 1 --tree_max_width 4
for acc in $(seq 0.3 0.1 1.0); do
echo "当前循环,ACC=${acc}"
bash reason/llm_service/create_service_qwen2.5_math_vllm_gold_prm_speed.sh --acc ${acc} --policy_model_name Qwen2.5-Math-1.5B-Instruct
sleep 100s
bash scripts/eval/beam_search.sh --acc ${acc} --policy_model_name Qwen2.5-Math-1.5B-Instruct --num_sequence 2 --tree_max_width 8
tmux kill-session -t FastChat
done
# sleep 6h # sleep 6h
#- End #- End
......
...@@ -431,9 +431,9 @@ class SearchTree: ...@@ -431,9 +431,9 @@ class SearchTree:
self.root = root self.root = root
simulate_env_copy = simulate_env.copy() simulate_env_copy = simulate_env.copy()
print(simulate_env_copy.config["max_actions"]) # print(simulate_env_copy.config["max_actions"])
simulate_env_copy.config["max_actions"] = int(simulate_env_copy.config["max_actions"] / beam_size) simulate_env_copy.config["max_actions"] = int(simulate_env_copy.config["max_actions"] / beam_size)
print(simulate_env_copy.config["max_actions"]) # print(simulate_env_copy.config["max_actions"])
# end_nodes, top_k_nodes = [], [(-root._initial_value, root, simulate_env.copy())] # end_nodes, top_k_nodes = [], [(-root._initial_value, root, simulate_env.copy())]
end_nodes, top_k_nodes = [], [(-root._initial_value, root, simulate_env_copy)] end_nodes, top_k_nodes = [], [(-root._initial_value, root, simulate_env_copy)]
k = beam_size k = beam_size
......
...@@ -4,10 +4,15 @@ echo "RAY TEMP DIR is $RAY_TEMP_DIR" ...@@ -4,10 +4,15 @@ echo "RAY TEMP DIR is $RAY_TEMP_DIR"
POLICY_MODEL_NAME=Qwen2.5-Math-1.5B-Instruct POLICY_MODEL_NAME=Qwen2.5-Math-1.5B-Instruct
ACC=0.5 ACC=0.5
NUM_SEQUENCE=1
TREE_MAX_WIDTH=4
while [[ "$#" -gt 0 ]]; do while [[ "$#" -gt 0 ]]; do
case $1 in case $1 in
--acc) ACC="$2"; shift ;; --acc) ACC="$2"; shift ;;
--policy_model_name) POLICY_MODEL_NAME="$2"; shift ;; --policy_model_name) POLICY_MODEL_NAME="$2"; shift ;;
--num_sequence) NUM_SEQUENCE="$2"; shift ;;
--tree_max_width) TREE_MAX_WIDTH="$2"; shift ;;
*) echo "Unknown parameter passed: $1"; exit 1 ;; *) echo "Unknown parameter passed: $1"; exit 1 ;;
esac esac
shift shift
...@@ -15,7 +20,7 @@ done ...@@ -15,7 +20,7 @@ done
VALUE_MODEL_NAME=${POLICY_MODEL_NAME}_RM VALUE_MODEL_NAME=${POLICY_MODEL_NAME}_RM
SAVE_DIR="/nfs_global/S/shiwenxuan/prm/openr/results/${POLICY_MODEL_NAME}/${ACC}" SAVE_DIR="/nfs_global/S/shiwenxuan/prm/openr/results/${POLICY_MODEL_NAME}/${NUM_SEQUENCE}/${TREE_MAX_WIDTH}/${ACC}"
echo "POLICY_MODEL_NAME is $POLICY_MODEL_NAME" echo "POLICY_MODEL_NAME is $POLICY_MODEL_NAME"
echo "VALUE_MODEL_NAME is $VALUE_MODEL_NAME" echo "VALUE_MODEL_NAME is $VALUE_MODEL_NAME"
...@@ -26,8 +31,8 @@ python reason/evaluation/evaluate.py \ ...@@ -26,8 +31,8 @@ python reason/evaluation/evaluate.py \
--task_name MATH \ --task_name MATH \
--temperature 0.7 \ --temperature 0.7 \
--max_new_tokens 2048 \ --max_new_tokens 2048 \
--num_sequence 2 \ --num_sequence $NUM_SEQUENCE \
--tree_max_width 4 \ --tree_max_width $TREE_MAX_WIDTH \
--tree_max_depth 50 \ --tree_max_depth 50 \
--save_dir $SAVE_DIR \ --save_dir $SAVE_DIR \
--method beam_search \ --method beam_search \
......
### experiment ### experiment
- policy model: Qwen2.5-Math-1.5B-Instruct - policy model: Qwen2.5-Math-1.5B-Instruct
- reward model: Qwen2.5-Math-1.5B-Instruct - reward model: Qwen2.5-Math-1.5B-Instruct
- method: beam search - method: beam search (num_seq=2 tree_width=4)
- - ACC:1.0 result:[{"majority_vote": 0.828, "total_completion_tokens": 2544.756}] - task: Math500
- - ACC:0.9 result:[{"majority_vote": 0.798, "total_completion_tokens": 2576.35}] - - ACC:0.3 [{"majority_vote": 0.708, "prm_min_max": 0.71, "prm_min_vote": 0.71, "prm_last_max": 0.708, "prm_last_vote": 0.708, "total_completion_tokens": 2196.464}]
- - ACC:0.8 result:[{"majority_vote": 0.794, "total_completion_tokens": 2497.672}] - - ACC:0.4 [{"majority_vote": 0.726, "prm_min_max": 0.726, "prm_min_vote": 0.726, "prm_last_max": 0.722, "prm_last_vote": 0.722, "total_completion_tokens": 2175.506}]
- - ACC:0.7 result:[{"majority_vote": 0.782, "total_completion_tokens": 2502.832}] - - ACC:0.5 [{"majority_vote": 0.738, "prm_min_max": 0.738, "prm_min_vote": 0.738, "prm_last_max": 0.742, "prm_last_vote": 0.742, "total_completion_tokens": 2234.252}]
- - ACC:0.6 result:[{"majority_vote": 0.76, "total_completion_tokens": 2491.27}] - - ACC:0.6 [{"majority_vote": 0.746, "prm_min_max": 0.746, "prm_min_vote": 0.746, "prm_last_max": 0.752, "prm_last_vote": 0.752, "total_completion_tokens": 2178.612}]
- - ACC:0.5 result:[{'majority_vote': 0.724, 'total_completion_tokens': 2400.16}] - - ACC:0.7 [{"majority_vote": 0.774, "prm_min_max": 0.776, "prm_min_vote": 0.776, "prm_last_max": 0.78, "prm_last_vote": 0.78, "total_completion_tokens": 2251.602}]
- - ACC:0.4 result:[{'majority_vote': 0.75, 'total_completion_tokens': 2418.876}] - - ACC:0.8 [{"majority_vote": 0.804, "prm_min_max": 0.804, "prm_min_vote": 0.804, "prm_last_max": 0.812, "prm_last_vote": 0.812, "total_completion_tokens": 2251.908}]
- - ACC:0.3 result:[{'majority_vote': 0.748, 'total_completion_tokens': 2463.39}] - - ACC:0.9 [{"majority_vote": 0.838, "prm_min_max": 0.84, "prm_min_vote": 0.84, "prm_last_max": 0.852, "prm_last_vote": 0.852, "total_completion_tokens": 2265.382}]
- - ACC:1.0 [{"majority_vote": 0.838, "prm_min_max": 0.838, "prm_min_vote": 0.838, "prm_last_max": 0.838, "prm_last_vote": 0.838, "total_completion_tokens": 2276.126}]
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment