msg for beam-size

ad1b62bf · Your Name · ca6feac2 · ad1b62bf · ad1b62bf · ad1b62bf
Commit ad1b62bf authored Mar 01, 2025 by Your Name
8 changed files
--- a/analysis/README.md
+++ b/analysis/README.md
+### beam size关键路径
+prm/openr/reason/evaluation/evaluate.py line 201
+prm/openr/reason/evaluation/methods.py line 122
+prm/openr/reason/guided_search/tree.py line 434
+prm/openr/reason/guided_search/tree.py line 449
+prm/openr/reason/guided_search/tree.py line 461
\ No newline at end of file
--- a/openr/infer.sh
+++ b/openr/infer.sh
 #!/bin/bash
+if [ ! -d "ret_one" ]; then
+    mkdir -p "ret_one"
+fi 
-#- Job parameters
+sbatch --job-name=test -o "ret_one/%j.out" -e "ret_one/%j.err" infer.slurm
\ No newline at end of file
-# (TODO)
-# Please modify job name
-#SBATCH -J inference            # The job name
-#SBATCH -o inference.out        # Write the standard output to file named 'ret-<job_number>.out'
-#SBATCH -e inference.err        # Write the standard error to file named 'ret-<job_number>.err'
-#- Resources
-# (TODO)
-# Please modify your requirements
-#SBATCH -p r8nv-gpu-hw                   # Submit to 'nv-gpu' Partitiion
-#SBATCH -t 1-06:00:00                # Run for a maximum time of 0 days, 12 hours, 00 mins, 00 secs
-#SBATCH --nodes=1                    # Request N nodes
-#SBATCH --gres=gpu:8                 # Request M GPU per node
-#SBATCH --gres-flags=enforce-binding # CPU-GPU Affinity
-#SBATCH --qos=gpu-normal           # Request QOS Type
-#SBATCH --constraint="L40S"
-###
-### The system will alloc 8 or 16 cores per gpu by default.
-### If you need more or less, use following:
-### #SBATCH --cpus-per-task=K            # Request K cores
-###
-### 
-### Without specifying the constraint, any available nodes that meet the requirement will be allocated
-### You can specify the characteristics of the compute nodes, and even the names of the compute nodes
-###
-### #SBATCH --nodelist=gpu-v00           # Request a specific list of hosts 
-### #SBATCH --constraint="Volta|RTX8000" # Request GPU Type: Volta(V100 or V100S) or RTX8000
-###
-#- Log information
-echo "Job start at $(date "+%Y-%m-%d %H:%M:%S")"
-echo "Job run at:"
-echo "$(hostnamectl)"
-# echo "$(df -h | grep -v tmpfs)"
-#- Load environments
-#- Load environments
-module unload cuda-cudnn
-source ~/.bashrc
-module list                       # list modules loaded
-conda activate open_reasoner
-echo $(module list)              # list modules loaded
-echo $(which gcc)
-echo $(which python)
-echo $(which python3)
-cluster-quota                    # nas quota
-# nvidia-smi --format=csv --query-gpu=name,driver_version,power.limit # gpu info
-nvidia-smi
-#- Warning! Please not change your CUDA_VISIBLE_DEVICES
-#- in `.bashrc`, `env.sh`, or your job script
-echo "Use GPU ${CUDA_VISIBLE_DEVICES}"                              # which gpus
-#- The CUDA_VISIBLE_DEVICES variable is assigned and specified by SLURM
-# export CUDA_DEVICE_ORDER="PCI_BUS_ID"
-#- Important setting!!!
-##  otherwise it will cause an error of insufficient RDMA resources:
-ulimit -l unlimited
-##  otherwise it will result in an insufficient virtual memory size error, especially when loading LLM:
-ulimit -v unlimited
-ulimit -n 65535
-ulimit -u 4125556
-#- Job step
-# sleep 30h
-cd /nfs_global/S/zhangxiaoyun/prm/openr
-export PYTHONPATH=$(pwd)
-bash reason/llm_service/create_service_qwen2.5_math_vllm_gold_prm_speed.sh --acc 1.0 --policy_model_name Qwen2.5-Math-7B-Instruct
-sleep 100s
-bash scripts/eval/beam_search.sh --acc 1.0 --policy_model_name Qwen2.5-Math-7B-Instruct
-#- End
-echo "Job end at $(date "+%Y-%m-%d %H:%M:%S")"
--- a/openr/infer.slurm
+++ b/openr/infer.slurm
+#!/bin/bash
+#- Job parameters
+# (TODO)
+# Please modify job name
+#SBATCH -J inference            # The job name
+#SBATCH -o inference.out        # Write the standard output to file named 'ret-<job_number>.out'
+#SBATCH -e inference.err        # Write the standard error to file named 'ret-<job_number>.err'
+#- Resources
+# (TODO)
+# Please modify your requirements
+#SBATCH -p r8nv-gpu-hw                   # Submit to 'nv-gpu' Partitiion
+#SBATCH -t 1-06:00:00                # Run for a maximum time of 0 days, 12 hours, 00 mins, 00 secs
+#SBATCH --nodes=1                    # Request N nodes
+#SBATCH --gres=gpu:8                 # Request M GPU per node
+#SBATCH --gres-flags=enforce-binding # CPU-GPU Affinity
+#SBATCH --qos=gpu-normal           # Request QOS Type
+#SBATCH --constraint="L40S"
+###
+### The system will alloc 8 or 16 cores per gpu by default.
+### If you need more or less, use following:
+### #SBATCH --cpus-per-task=K            # Request K cores
+###
+### 
+### Without specifying the constraint, any available nodes that meet the requirement will be allocated
+### You can specify the characteristics of the compute nodes, and even the names of the compute nodes
+###
+### #SBATCH --nodelist=gpu-v00           # Request a specific list of hosts 
+### #SBATCH --constraint="Volta|RTX8000" # Request GPU Type: Volta(V100 or V100S) or RTX8000
+###
+#- Log information
+echo "Job start at $(date "+%Y-%m-%d %H:%M:%S")"
+echo "Job run at:"
+echo "$(hostnamectl)"
+# echo "$(df -h | grep -v tmpfs)"
+#- Load environments
+#- Load environments
+module unload cuda-cudnn
+source ~/.bashrc
+module list                       # list modules loaded
+conda activate open_reasoner
+echo $(module list)              # list modules loaded
+echo $(which gcc)
+echo $(which python)
+echo $(which python3)
+cluster-quota                    # nas quota
+# nvidia-smi --format=csv --query-gpu=name,driver_version,power.limit # gpu info
+nvidia-smi
+#- Warning! Please not change your CUDA_VISIBLE_DEVICES
+#- in `.bashrc`, `env.sh`, or your job script
+echo "Use GPU ${CUDA_VISIBLE_DEVICES}"                              # which gpus
+#- The CUDA_VISIBLE_DEVICES variable is assigned and specified by SLURM
+# export CUDA_DEVICE_ORDER="PCI_BUS_ID"
+#- Important setting!!!
+##  otherwise it will cause an error of insufficient RDMA resources:
+ulimit -l unlimited
+##  otherwise it will result in an insufficient virtual memory size error, especially when loading LLM:
+ulimit -v unlimited
+ulimit -n 65535
+ulimit -u 20000
+#- Job step
+# sleep 30h
+cd /nfs_global/S/shiwenxuan/prm/openr
+export PYTHONPATH=$(pwd)
+bash reason/llm_service/create_service_qwen2.5_math_vllm_gold_prm_speed.sh --acc 0.3 --policy_model_name Qwen2.5-Math-1.5B-Instruct
+sleep 100s
+bash scripts/eval/beam_search.sh --acc 0.3 --policy_model_name Qwen2.5-Math-1.5B-Instruct
+# sleep 6h
+#- End
+echo "Job end at $(date "+%Y-%m-%d %H:%M:%S")"
--- a/openr/prm/infer_fns.py
+++ b/openr/prm/infer_fns.py
@@ -11,7 +11,7 @@ lock = multiprocessing.Lock()
 print(id(lock), os.getpid())
 import redis
-redis_client = redis.Redis(host='127.0.0.1', port=20001, db=0)
+redis_client = redis.Redis(host='127.0.0.1', port=20002, db=0)
 def set_shared_value(key, value):
    redis_client.set(key, value)

--- a/openr/reason/controller.log
+++ b/openr/reason/controller.log
-2024-09-28 13:31:28 | INFO | controller | args: Namespace(host='0.0.0.0', port=28777, dispatch_method='shortest_queue', ssl=False)
-2024-09-28 13:31:28 | ERROR | stderr | [32mINFO[0m:     Started server process [[36m821247[0m]
-2024-09-28 13:31:28 | ERROR | stderr | [32mINFO[0m:     Waiting for application startup.
-2024-09-28 13:31:28 | ERROR | stderr | [32mINFO[0m:     Application startup complete.
-2024-09-28 13:31:28 | ERROR | stderr | [32mINFO[0m:     Uvicorn running on [1mhttp://0.0.0.0:28777[0m (Press CTRL+C to quit)
-2024-09-28 13:31:56 | INFO | controller | Register a new worker: http://0.0.0.0:40010
-2024-09-28 13:31:56 | INFO | controller | Register done: http://0.0.0.0:40010, {'model_names': ['math-shepherd-mistral-7b-prm'], 'speed': 1, 'queue_length': 0}
-2024-09-28 13:31:56 | INFO | stdout | [32mINFO[0m:     127.0.0.1:34020 - "[1mPOST /register_worker HTTP/1.1[0m" [32m200 OK[0m
-2024-09-28 13:32:20 | INFO | controller | Register a new worker: http://0.0.0.0:30010
-2024-09-28 13:32:20 | INFO | controller | Register done: http://0.0.0.0:30010, {'model_names': ['mistral-7b-sft'], 'speed': 1, 'queue_length': 0}
-2024-09-28 13:32:20 | INFO | stdout | [32mINFO[0m:     127.0.0.1:53512 - "[1mPOST /register_worker HTTP/1.1[0m" [32m200 OK[0m
-2024-09-28 13:32:41 | INFO | controller | Receive heart beat. http://0.0.0.0:40010
-2024-09-28 13:32:41 | INFO | stdout | [32mINFO[0m:     127.0.0.1:50970 - "[1mPOST /receive_heart_beat HTTP/1.1[0m" [32m200 OK[0m
-2024-09-28 13:33:05 | INFO | controller | Receive heart beat. http://0.0.0.0:30010
-2024-09-28 13:33:05 | INFO | stdout | [32mINFO[0m:     127.0.0.1:48436 - "[1mPOST /receive_heart_beat HTTP/1.1[0m" [32m200 OK[0m
-2024-09-28 13:33:26 | INFO | controller | Receive heart beat. http://0.0.0.0:40010
-2024-09-28 13:33:26 | INFO | stdout | [32mINFO[0m:     127.0.0.1:35488 - "[1mPOST /receive_heart_beat HTTP/1.1[0m" [32m200 OK[0m
-2024-09-28 13:33:50 | INFO | controller | Receive heart beat. http://0.0.0.0:30010
-2024-09-28 13:33:50 | INFO | stdout | [32mINFO[0m:     127.0.0.1:46292 - "[1mPOST /receive_heart_beat HTTP/1.1[0m" [32m200 OK[0m
-2024-09-28 13:34:03 | INFO | controller | names: ['http://0.0.0.0:30010'], queue_lens: [0.0], ret: http://0.0.0.0:30010
-2024-09-28 13:34:03 | INFO | stdout | [32mINFO[0m:     127.0.0.1:41020 - "[1mPOST /get_worker_address HTTP/1.1[0m" [32m200 OK[0m
-2024-09-28 13:34:05 | INFO | controller | names: ['http://0.0.0.0:40010'], queue_lens: [0.0], ret: http://0.0.0.0:40010
-2024-09-28 13:34:05 | INFO | stdout | [32mINFO[0m:     127.0.0.1:41034 - "[1mPOST /get_worker_address HTTP/1.1[0m" [32m200 OK[0m
-2024-09-28 13:34:11 | INFO | controller | Receive heart beat. http://0.0.0.0:40010
-2024-09-28 13:34:11 | INFO | stdout | [32mINFO[0m:     127.0.0.1:41044 - "[1mPOST /receive_heart_beat HTTP/1.1[0m" [32m200 OK[0m
-2024-09-28 13:34:35 | INFO | controller | Receive heart beat. http://0.0.0.0:30010
-2024-09-28 13:34:35 | INFO | stdout | [32mINFO[0m:     127.0.0.1:44238 - "[1mPOST /receive_heart_beat HTTP/1.1[0m" [32m200 OK[0m
--- a/openr/reason/llm_service/create_service_qwen2.5_math_vllm_gold_prm_speed.sh
+++ b/openr/reason/llm_service/create_service_qwen2.5_math_vllm_gold_prm_speed.sh
@@ -6,11 +6,11 @@ echo "RAY TEMP DIR is $RAY_TEMP_DIR"
 HOST_ADDR=0.0.0.0
 CONTROLER_PORT=28777
 WORKER_BASE_PORT=30010
-ACC=1.0
+ACC=0.5
 MODEL_BASE=/share/collab/codemodel/models
 CUDA_DEVICE_BASE=0
-POLICY_MODEL_NAME=Qwen2.5-Math-7B-Instruct
+POLICY_MODEL_NAME=Qwen2.5-Math-1.5B-Instruct
 while [[ "$#" -gt 0 ]]; do
    case $1 in

--- a/openr/scripts/eval/beam_search.sh
+++ b/openr/scripts/eval/beam_search.sh
 export RAY_TEMP_DIR="/tmp/ray_$SLURM_JOBID"
 echo "RAY TEMP DIR is $RAY_TEMP_DIR"
-POLICY_MODEL_NAME=Qwen2.5-Math-7B-Instruct
+POLICY_MODEL_NAME=Qwen2.5-Math-1.5B-Instruct
-ACC=1.0
+ACC=0.5
 while [[ "$#" -gt 0 ]]; do
    case $1 in
@@ -15,7 +15,7 @@ done
 VALUE_MODEL_NAME=${POLICY_MODEL_NAME}_RM
-SAVE_DIR="results/${POLICY_MODEL_NAME}/${ACC}"
+SAVE_DIR="/nfs_global/S/shiwenxuan/prm/openr/results/${POLICY_MODEL_NAME}/${ACC}"
 echo "POLICY_MODEL_NAME is $POLICY_MODEL_NAME"
 echo "VALUE_MODEL_NAME is $VALUE_MODEL_NAME"

--- a/result.md
+++ b/result.md
+### experiment
+- policy model: Qwen2.5-Math-1.5B-Instruct
+- reward model: Qwen2.5-Math-1.5B-Instruct
+- method: beam search
+- - ACC:1.0 result:[{"majority_vote": 0.828, "total_completion_tokens": 2544.756}]
+- - ACC:0.9 result:[{"majority_vote": 0.798, "total_completion_tokens": 2576.35}]
+- - ACC:0.8 result:[{"majority_vote": 0.794, "total_completion_tokens": 2497.672}]
+- - ACC:0.7 result:[{"majority_vote": 0.782, "total_completion_tokens": 2502.832}]
+- - ACC:0.6 result:[{"majority_vote": 0.76, "total_completion_tokens": 2491.27}]
+- - ACC:0.5 result:[{'majority_vote': 0.724, 'total_completion_tokens': 2400.16}]
+- - ACC:0.4 result:[{'majority_vote': 0.75, 'total_completion_tokens': 2418.876}]
+- - ACC:0.3 result:[{'majority_vote': 0.748, 'total_completion_tokens': 2463.39}]