Commit ad1b62bf by Your Name

msg for beam-size

parent ca6feac2
### beam size关键路径
prm/openr/reason/evaluation/evaluate.py line 201
prm/openr/reason/evaluation/methods.py line 122
prm/openr/reason/guided_search/tree.py line 434
prm/openr/reason/guided_search/tree.py line 449
prm/openr/reason/guided_search/tree.py line 461
\ No newline at end of file
#!/bin/bash #!/bin/bash
if [ ! -d "ret_one" ]; then
mkdir -p "ret_one"
fi
#- Job parameters sbatch --job-name=test -o "ret_one/%j.out" -e "ret_one/%j.err" infer.slurm
\ No newline at end of file
# (TODO)
# Please modify job name
#SBATCH -J inference # The job name
#SBATCH -o inference.out # Write the standard output to file named 'ret-<job_number>.out'
#SBATCH -e inference.err # Write the standard error to file named 'ret-<job_number>.err'
#- Resources
# (TODO)
# Please modify your requirements
#SBATCH -p r8nv-gpu-hw # Submit to 'nv-gpu' Partitiion
#SBATCH -t 1-06:00:00 # Run for a maximum time of 0 days, 12 hours, 00 mins, 00 secs
#SBATCH --nodes=1 # Request N nodes
#SBATCH --gres=gpu:8 # Request M GPU per node
#SBATCH --gres-flags=enforce-binding # CPU-GPU Affinity
#SBATCH --qos=gpu-normal # Request QOS Type
#SBATCH --constraint="L40S"
###
### The system will alloc 8 or 16 cores per gpu by default.
### If you need more or less, use following:
### #SBATCH --cpus-per-task=K # Request K cores
###
###
### Without specifying the constraint, any available nodes that meet the requirement will be allocated
### You can specify the characteristics of the compute nodes, and even the names of the compute nodes
###
### #SBATCH --nodelist=gpu-v00 # Request a specific list of hosts
### #SBATCH --constraint="Volta|RTX8000" # Request GPU Type: Volta(V100 or V100S) or RTX8000
###
#- Log information
echo "Job start at $(date "+%Y-%m-%d %H:%M:%S")"
echo "Job run at:"
echo "$(hostnamectl)"
# echo "$(df -h | grep -v tmpfs)"
#- Load environments
#- Load environments
module unload cuda-cudnn
source ~/.bashrc
module list # list modules loaded
conda activate open_reasoner
echo $(module list) # list modules loaded
echo $(which gcc)
echo $(which python)
echo $(which python3)
cluster-quota # nas quota
# nvidia-smi --format=csv --query-gpu=name,driver_version,power.limit # gpu info
nvidia-smi
#- Warning! Please not change your CUDA_VISIBLE_DEVICES
#- in `.bashrc`, `env.sh`, or your job script
echo "Use GPU ${CUDA_VISIBLE_DEVICES}" # which gpus
#- The CUDA_VISIBLE_DEVICES variable is assigned and specified by SLURM
# export CUDA_DEVICE_ORDER="PCI_BUS_ID"
#- Important setting!!!
## otherwise it will cause an error of insufficient RDMA resources:
ulimit -l unlimited
## otherwise it will result in an insufficient virtual memory size error, especially when loading LLM:
ulimit -v unlimited
ulimit -n 65535
ulimit -u 4125556
#- Job step
# sleep 30h
cd /nfs_global/S/zhangxiaoyun/prm/openr
export PYTHONPATH=$(pwd)
bash reason/llm_service/create_service_qwen2.5_math_vllm_gold_prm_speed.sh --acc 1.0 --policy_model_name Qwen2.5-Math-7B-Instruct
sleep 100s
bash scripts/eval/beam_search.sh --acc 1.0 --policy_model_name Qwen2.5-Math-7B-Instruct
#- End
echo "Job end at $(date "+%Y-%m-%d %H:%M:%S")"
#!/bin/bash
#- Job parameters
# (TODO)
# Please modify job name
#SBATCH -J inference # The job name
#SBATCH -o inference.out # Write the standard output to file named 'ret-<job_number>.out'
#SBATCH -e inference.err # Write the standard error to file named 'ret-<job_number>.err'
#- Resources
# (TODO)
# Please modify your requirements
#SBATCH -p r8nv-gpu-hw # Submit to 'nv-gpu' Partitiion
#SBATCH -t 1-06:00:00 # Run for a maximum time of 0 days, 12 hours, 00 mins, 00 secs
#SBATCH --nodes=1 # Request N nodes
#SBATCH --gres=gpu:8 # Request M GPU per node
#SBATCH --gres-flags=enforce-binding # CPU-GPU Affinity
#SBATCH --qos=gpu-normal # Request QOS Type
#SBATCH --constraint="L40S"
###
### The system will alloc 8 or 16 cores per gpu by default.
### If you need more or less, use following:
### #SBATCH --cpus-per-task=K # Request K cores
###
###
### Without specifying the constraint, any available nodes that meet the requirement will be allocated
### You can specify the characteristics of the compute nodes, and even the names of the compute nodes
###
### #SBATCH --nodelist=gpu-v00 # Request a specific list of hosts
### #SBATCH --constraint="Volta|RTX8000" # Request GPU Type: Volta(V100 or V100S) or RTX8000
###
#- Log information
echo "Job start at $(date "+%Y-%m-%d %H:%M:%S")"
echo "Job run at:"
echo "$(hostnamectl)"
# echo "$(df -h | grep -v tmpfs)"
#- Load environments
#- Load environments
module unload cuda-cudnn
source ~/.bashrc
module list # list modules loaded
conda activate open_reasoner
echo $(module list) # list modules loaded
echo $(which gcc)
echo $(which python)
echo $(which python3)
cluster-quota # nas quota
# nvidia-smi --format=csv --query-gpu=name,driver_version,power.limit # gpu info
nvidia-smi
#- Warning! Please not change your CUDA_VISIBLE_DEVICES
#- in `.bashrc`, `env.sh`, or your job script
echo "Use GPU ${CUDA_VISIBLE_DEVICES}" # which gpus
#- The CUDA_VISIBLE_DEVICES variable is assigned and specified by SLURM
# export CUDA_DEVICE_ORDER="PCI_BUS_ID"
#- Important setting!!!
## otherwise it will cause an error of insufficient RDMA resources:
ulimit -l unlimited
## otherwise it will result in an insufficient virtual memory size error, especially when loading LLM:
ulimit -v unlimited
ulimit -n 65535
ulimit -u 20000
#- Job step
# sleep 30h
cd /nfs_global/S/shiwenxuan/prm/openr
export PYTHONPATH=$(pwd)
bash reason/llm_service/create_service_qwen2.5_math_vllm_gold_prm_speed.sh --acc 0.3 --policy_model_name Qwen2.5-Math-1.5B-Instruct
sleep 100s
bash scripts/eval/beam_search.sh --acc 0.3 --policy_model_name Qwen2.5-Math-1.5B-Instruct
# sleep 6h
#- End
echo "Job end at $(date "+%Y-%m-%d %H:%M:%S")"
...@@ -11,7 +11,7 @@ lock = multiprocessing.Lock() ...@@ -11,7 +11,7 @@ lock = multiprocessing.Lock()
print(id(lock), os.getpid()) print(id(lock), os.getpid())
import redis import redis
redis_client = redis.Redis(host='127.0.0.1', port=20001, db=0) redis_client = redis.Redis(host='127.0.0.1', port=20002, db=0)
def set_shared_value(key, value): def set_shared_value(key, value):
redis_client.set(key, value) redis_client.set(key, value)
......
2024-09-28 13:31:28 | INFO | controller | args: Namespace(host='0.0.0.0', port=28777, dispatch_method='shortest_queue', ssl=False)
2024-09-28 13:31:28 | ERROR | stderr | INFO: Started server process [821247]
2024-09-28 13:31:28 | ERROR | stderr | INFO: Waiting for application startup.
2024-09-28 13:31:28 | ERROR | stderr | INFO: Application startup complete.
2024-09-28 13:31:28 | ERROR | stderr | INFO: Uvicorn running on http://0.0.0.0:28777 (Press CTRL+C to quit)
2024-09-28 13:31:56 | INFO | controller | Register a new worker: http://0.0.0.0:40010
2024-09-28 13:31:56 | INFO | controller | Register done: http://0.0.0.0:40010, {'model_names': ['math-shepherd-mistral-7b-prm'], 'speed': 1, 'queue_length': 0}
2024-09-28 13:31:56 | INFO | stdout | INFO: 127.0.0.1:34020 - "POST /register_worker HTTP/1.1" 200 OK
2024-09-28 13:32:20 | INFO | controller | Register a new worker: http://0.0.0.0:30010
2024-09-28 13:32:20 | INFO | controller | Register done: http://0.0.0.0:30010, {'model_names': ['mistral-7b-sft'], 'speed': 1, 'queue_length': 0}
2024-09-28 13:32:20 | INFO | stdout | INFO: 127.0.0.1:53512 - "POST /register_worker HTTP/1.1" 200 OK
2024-09-28 13:32:41 | INFO | controller | Receive heart beat. http://0.0.0.0:40010
2024-09-28 13:32:41 | INFO | stdout | INFO: 127.0.0.1:50970 - "POST /receive_heart_beat HTTP/1.1" 200 OK
2024-09-28 13:33:05 | INFO | controller | Receive heart beat. http://0.0.0.0:30010
2024-09-28 13:33:05 | INFO | stdout | INFO: 127.0.0.1:48436 - "POST /receive_heart_beat HTTP/1.1" 200 OK
2024-09-28 13:33:26 | INFO | controller | Receive heart beat. http://0.0.0.0:40010
2024-09-28 13:33:26 | INFO | stdout | INFO: 127.0.0.1:35488 - "POST /receive_heart_beat HTTP/1.1" 200 OK
2024-09-28 13:33:50 | INFO | controller | Receive heart beat. http://0.0.0.0:30010
2024-09-28 13:33:50 | INFO | stdout | INFO: 127.0.0.1:46292 - "POST /receive_heart_beat HTTP/1.1" 200 OK
2024-09-28 13:34:03 | INFO | controller | names: ['http://0.0.0.0:30010'], queue_lens: [0.0], ret: http://0.0.0.0:30010
2024-09-28 13:34:03 | INFO | stdout | INFO: 127.0.0.1:41020 - "POST /get_worker_address HTTP/1.1" 200 OK
2024-09-28 13:34:05 | INFO | controller | names: ['http://0.0.0.0:40010'], queue_lens: [0.0], ret: http://0.0.0.0:40010
2024-09-28 13:34:05 | INFO | stdout | INFO: 127.0.0.1:41034 - "POST /get_worker_address HTTP/1.1" 200 OK
2024-09-28 13:34:11 | INFO | controller | Receive heart beat. http://0.0.0.0:40010
2024-09-28 13:34:11 | INFO | stdout | INFO: 127.0.0.1:41044 - "POST /receive_heart_beat HTTP/1.1" 200 OK
2024-09-28 13:34:35 | INFO | controller | Receive heart beat. http://0.0.0.0:30010
2024-09-28 13:34:35 | INFO | stdout | INFO: 127.0.0.1:44238 - "POST /receive_heart_beat HTTP/1.1" 200 OK
...@@ -6,11 +6,11 @@ echo "RAY TEMP DIR is $RAY_TEMP_DIR" ...@@ -6,11 +6,11 @@ echo "RAY TEMP DIR is $RAY_TEMP_DIR"
HOST_ADDR=0.0.0.0 HOST_ADDR=0.0.0.0
CONTROLER_PORT=28777 CONTROLER_PORT=28777
WORKER_BASE_PORT=30010 WORKER_BASE_PORT=30010
ACC=1.0 ACC=0.5
MODEL_BASE=/share/collab/codemodel/models MODEL_BASE=/share/collab/codemodel/models
CUDA_DEVICE_BASE=0 CUDA_DEVICE_BASE=0
POLICY_MODEL_NAME=Qwen2.5-Math-7B-Instruct POLICY_MODEL_NAME=Qwen2.5-Math-1.5B-Instruct
while [[ "$#" -gt 0 ]]; do while [[ "$#" -gt 0 ]]; do
case $1 in case $1 in
......
export RAY_TEMP_DIR="/tmp/ray_$SLURM_JOBID" export RAY_TEMP_DIR="/tmp/ray_$SLURM_JOBID"
echo "RAY TEMP DIR is $RAY_TEMP_DIR" echo "RAY TEMP DIR is $RAY_TEMP_DIR"
POLICY_MODEL_NAME=Qwen2.5-Math-7B-Instruct POLICY_MODEL_NAME=Qwen2.5-Math-1.5B-Instruct
ACC=1.0 ACC=0.5
while [[ "$#" -gt 0 ]]; do while [[ "$#" -gt 0 ]]; do
case $1 in case $1 in
...@@ -15,7 +15,7 @@ done ...@@ -15,7 +15,7 @@ done
VALUE_MODEL_NAME=${POLICY_MODEL_NAME}_RM VALUE_MODEL_NAME=${POLICY_MODEL_NAME}_RM
SAVE_DIR="results/${POLICY_MODEL_NAME}/${ACC}" SAVE_DIR="/nfs_global/S/shiwenxuan/prm/openr/results/${POLICY_MODEL_NAME}/${ACC}"
echo "POLICY_MODEL_NAME is $POLICY_MODEL_NAME" echo "POLICY_MODEL_NAME is $POLICY_MODEL_NAME"
echo "VALUE_MODEL_NAME is $VALUE_MODEL_NAME" echo "VALUE_MODEL_NAME is $VALUE_MODEL_NAME"
......
### experiment
- policy model: Qwen2.5-Math-1.5B-Instruct
- reward model: Qwen2.5-Math-1.5B-Instruct
- method: beam search
- - ACC:1.0 result:[{"majority_vote": 0.828, "total_completion_tokens": 2544.756}]
- - ACC:0.9 result:[{"majority_vote": 0.798, "total_completion_tokens": 2576.35}]
- - ACC:0.8 result:[{"majority_vote": 0.794, "total_completion_tokens": 2497.672}]
- - ACC:0.7 result:[{"majority_vote": 0.782, "total_completion_tokens": 2502.832}]
- - ACC:0.6 result:[{"majority_vote": 0.76, "total_completion_tokens": 2491.27}]
- - ACC:0.5 result:[{'majority_vote': 0.724, 'total_completion_tokens': 2400.16}]
- - ACC:0.4 result:[{'majority_vote': 0.75, 'total_completion_tokens': 2418.876}]
- - ACC:0.3 result:[{'majority_vote': 0.748, 'total_completion_tokens': 2463.39}]
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment