Commit 00cffb98 by Shi wenxuan

Merge branch 'swx' into 'master'

Swx

See merge request !1
parents ca6feac2 2c7b8a6b
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
### beam size关键路径
prm/openr/reason/evaluation/evaluate.py line 201
prm/openr/reason/evaluation/methods.py line 122
prm/openr/reason/guided_search/tree.py line 434
prm/openr/reason/guided_search/tree.py line 449
prm/openr/reason/guided_search/tree.py line 461
\ No newline at end of file
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
......@@ -218,7 +218,7 @@ class CoTEnv(BaseEnv):
processed_act = self.post_process_act(texts[i])
if (
len(processed_act) > 0
and processed_act not in text_list
# and processed_act not in text_list
# only stop is valid, otherwise the output action is truncated actually
and result.finish_reason[i] == "stop"
):
......
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
#!/bin/bash
if [ ! -d "ret_one" ]; then
mkdir -p "ret_one"
fi
#- Job parameters
# (TODO)
# Please modify job name
#SBATCH -J inference # The job name
#SBATCH -o inference.out # Write the standard output to file named 'ret-<job_number>.out'
#SBATCH -e inference.err # Write the standard error to file named 'ret-<job_number>.err'
#- Resources
# (TODO)
# Please modify your requirements
#SBATCH -p r8nv-gpu-hw # Submit to 'nv-gpu' Partitiion
#SBATCH -t 1-06:00:00 # Run for a maximum time of 0 days, 12 hours, 00 mins, 00 secs
#SBATCH --nodes=1 # Request N nodes
#SBATCH --gres=gpu:8 # Request M GPU per node
#SBATCH --gres-flags=enforce-binding # CPU-GPU Affinity
#SBATCH --qos=gpu-normal # Request QOS Type
#SBATCH --constraint="L40S"
###
### The system will alloc 8 or 16 cores per gpu by default.
### If you need more or less, use following:
### #SBATCH --cpus-per-task=K # Request K cores
###
###
### Without specifying the constraint, any available nodes that meet the requirement will be allocated
### You can specify the characteristics of the compute nodes, and even the names of the compute nodes
###
### #SBATCH --nodelist=gpu-v00 # Request a specific list of hosts
### #SBATCH --constraint="Volta|RTX8000" # Request GPU Type: Volta(V100 or V100S) or RTX8000
###
#- Log information
echo "Job start at $(date "+%Y-%m-%d %H:%M:%S")"
echo "Job run at:"
echo "$(hostnamectl)"
# echo "$(df -h | grep -v tmpfs)"
#- Load environments
#- Load environments
module unload cuda-cudnn
source ~/.bashrc
module list # list modules loaded
conda activate open_reasoner
echo $(module list) # list modules loaded
echo $(which gcc)
echo $(which python)
echo $(which python3)
cluster-quota # nas quota
# nvidia-smi --format=csv --query-gpu=name,driver_version,power.limit # gpu info
nvidia-smi
#- Warning! Please not change your CUDA_VISIBLE_DEVICES
#- in `.bashrc`, `env.sh`, or your job script
echo "Use GPU ${CUDA_VISIBLE_DEVICES}" # which gpus
#- The CUDA_VISIBLE_DEVICES variable is assigned and specified by SLURM
# export CUDA_DEVICE_ORDER="PCI_BUS_ID"
#- Important setting!!!
## otherwise it will cause an error of insufficient RDMA resources:
ulimit -l unlimited
## otherwise it will result in an insufficient virtual memory size error, especially when loading LLM:
ulimit -v unlimited
ulimit -n 65535
ulimit -u 4125556
#- Job step
# sleep 30h
cd /nfs_global/S/zhangxiaoyun/prm/openr
export PYTHONPATH=$(pwd)
bash reason/llm_service/create_service_qwen2.5_math_vllm_gold_prm_speed.sh --acc 1.0 --policy_model_name Qwen2.5-Math-7B-Instruct
sleep 100s
bash scripts/eval/beam_search.sh --acc 1.0 --policy_model_name Qwen2.5-Math-7B-Instruct
#- End
echo "Job end at $(date "+%Y-%m-%d %H:%M:%S")"
sbatch --job-name=test -o "ret_one/%j.out" -e "ret_one/%j.err" infer.slurm
\ No newline at end of file
#!/bin/bash
#- Job parameters
# (TODO)
# Please modify job name
#SBATCH -J inference # The job name
#SBATCH -o inference.out # Write the standard output to file named 'ret-<job_number>.out'
#SBATCH -e inference.err # Write the standard error to file named 'ret-<job_number>.err'
#- Resources
# (TODO)
# Please modify your requirements
#SBATCH -p r8nv-gpu-hw # Submit to 'nv-gpu' Partitiion
#SBATCH -t 1-06:00:00 # Run for a maximum time of 0 days, 12 hours, 00 mins, 00 secs
#SBATCH --nodes=1 # Request N nodes
#SBATCH --gres=gpu:8 # Request M GPU per node
#SBATCH --gres-flags=enforce-binding # CPU-GPU Affinity
#SBATCH --qos=gpu-normal # Request QOS Type
#SBATCH --constraint="L40"
###
### The system will alloc 8 or 16 cores per gpu by default.
### If you need more or less, use following:
### #SBATCH --cpus-per-task=K # Request K cores
###
###
### Without specifying the constraint, any available nodes that meet the requirement will be allocated
### You can specify the characteristics of the compute nodes, and even the names of the compute nodes
###
### #SBATCH --nodelist=gpu-v00 # Request a specific list of hosts
### #SBATCH --constraint="Volta|RTX8000" # Request GPU Type: Volta(V100 or V100S) or RTX8000
###
#- Log information
echo "Job start at $(date "+%Y-%m-%d %H:%M:%S")"
echo "Job run at:"
echo "$(hostnamectl)"
# echo "$(df -h | grep -v tmpfs)"
#- Load environments
#- Load environments
module unload cuda-cudnn
source ~/.bashrc
module list # list modules loaded
conda activate open_reasoner
echo $(module list) # list modules loaded
echo $(which gcc)
echo $(which python)
echo $(which python3)
cluster-quota # nas quota
# nvidia-smi --format=csv --query-gpu=name,driver_version,power.limit # gpu info
nvidia-smi
#- Warning! Please not change your CUDA_VISIBLE_DEVICES
#- in `.bashrc`, `env.sh`, or your job script
echo "Use GPU ${CUDA_VISIBLE_DEVICES}" # which gpus
#- The CUDA_VISIBLE_DEVICES variable is assigned and specified by SLURM
# export CUDA_DEVICE_ORDER="PCI_BUS_ID"
#- Important setting!!!
## otherwise it will cause an error of insufficient RDMA resources:
ulimit -l unlimited
## otherwise it will result in an insufficient virtual memory size error, especially when loading LLM:
ulimit -v unlimited
ulimit -n 65535
ulimit -u 20000
#- Job step
# sleep 30h
cd /nfs_global/S/shiwenxuan/prm/openr
export PYTHONPATH=$(pwd)
bash reason/llm_service/create_service_qwen2.5_math_vllm_gold_prm_speed.sh --acc 0.3 --policy_model_name Qwen2.5-Math-1.5B-Instruct
sleep 100s
bash scripts/eval/beam_search.sh --acc 0.3 --policy_model_name Qwen2.5-Math-1.5B-Instruct
# sleep 6h
#- End
echo "Job end at $(date "+%Y-%m-%d %H:%M:%S")"
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
......@@ -11,7 +11,7 @@ lock = multiprocessing.Lock()
print(id(lock), os.getpid())
import redis
redis_client = redis.Redis(host='127.0.0.1', port=20001, db=0)
redis_client = redis.Redis(host='127.0.0.1', port=20002, db=0)
def set_shared_value(key, value):
redis_client.set(key, value)
......@@ -71,6 +71,8 @@ def _qwen_math_gold_infer_fn(input_str: str, model, tokenizer, device, acc):
pattern = '<\|im_start\|>user\s*\n\s*(.*?)\s*<\|im_end\|>'
match = re.search(pattern, input_str, re.DOTALL)
if match is None:
print(input_str)
assert match is not None, f"No match found for pattern: {pattern}"
question = match.group(1)
solution = question_item_map[question]["solution"]
......
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
2024-09-28 13:31:28 | INFO | controller | args: Namespace(host='0.0.0.0', port=28777, dispatch_method='shortest_queue', ssl=False)
2024-09-28 13:31:28 | ERROR | stderr | INFO: Started server process [821247]
2024-09-28 13:31:28 | ERROR | stderr | INFO: Waiting for application startup.
2024-09-28 13:31:28 | ERROR | stderr | INFO: Application startup complete.
2024-09-28 13:31:28 | ERROR | stderr | INFO: Uvicorn running on http://0.0.0.0:28777 (Press CTRL+C to quit)
2024-09-28 13:31:56 | INFO | controller | Register a new worker: http://0.0.0.0:40010
2024-09-28 13:31:56 | INFO | controller | Register done: http://0.0.0.0:40010, {'model_names': ['math-shepherd-mistral-7b-prm'], 'speed': 1, 'queue_length': 0}
2024-09-28 13:31:56 | INFO | stdout | INFO: 127.0.0.1:34020 - "POST /register_worker HTTP/1.1" 200 OK
2024-09-28 13:32:20 | INFO | controller | Register a new worker: http://0.0.0.0:30010
2024-09-28 13:32:20 | INFO | controller | Register done: http://0.0.0.0:30010, {'model_names': ['mistral-7b-sft'], 'speed': 1, 'queue_length': 0}
2024-09-28 13:32:20 | INFO | stdout | INFO: 127.0.0.1:53512 - "POST /register_worker HTTP/1.1" 200 OK
2024-09-28 13:32:41 | INFO | controller | Receive heart beat. http://0.0.0.0:40010
2024-09-28 13:32:41 | INFO | stdout | INFO: 127.0.0.1:50970 - "POST /receive_heart_beat HTTP/1.1" 200 OK
2024-09-28 13:33:05 | INFO | controller | Receive heart beat. http://0.0.0.0:30010
2024-09-28 13:33:05 | INFO | stdout | INFO: 127.0.0.1:48436 - "POST /receive_heart_beat HTTP/1.1" 200 OK
2024-09-28 13:33:26 | INFO | controller | Receive heart beat. http://0.0.0.0:40010
2024-09-28 13:33:26 | INFO | stdout | INFO: 127.0.0.1:35488 - "POST /receive_heart_beat HTTP/1.1" 200 OK
2024-09-28 13:33:50 | INFO | controller | Receive heart beat. http://0.0.0.0:30010
2024-09-28 13:33:50 | INFO | stdout | INFO: 127.0.0.1:46292 - "POST /receive_heart_beat HTTP/1.1" 200 OK
2024-09-28 13:34:03 | INFO | controller | names: ['http://0.0.0.0:30010'], queue_lens: [0.0], ret: http://0.0.0.0:30010
2024-09-28 13:34:03 | INFO | stdout | INFO: 127.0.0.1:41020 - "POST /get_worker_address HTTP/1.1" 200 OK
2024-09-28 13:34:05 | INFO | controller | names: ['http://0.0.0.0:40010'], queue_lens: [0.0], ret: http://0.0.0.0:40010
2024-09-28 13:34:05 | INFO | stdout | INFO: 127.0.0.1:41034 - "POST /get_worker_address HTTP/1.1" 200 OK
2024-09-28 13:34:11 | INFO | controller | Receive heart beat. http://0.0.0.0:40010
2024-09-28 13:34:11 | INFO | stdout | INFO: 127.0.0.1:41044 - "POST /receive_heart_beat HTTP/1.1" 200 OK
2024-09-28 13:34:35 | INFO | controller | Receive heart beat. http://0.0.0.0:30010
2024-09-28 13:34:35 | INFO | stdout | INFO: 127.0.0.1:44238 - "POST /receive_heart_beat HTTP/1.1" 200 OK
......@@ -78,7 +78,8 @@ if __name__ == "__main__":
else:
# assume qwen
prm_step_tag = "\n\n\n\n\n "
prm_format_str = "{question} {answer}"
# prm_format_str = "{question} {answer}"
prm_format_str = "<|im_start|>user\n{question}<|im_end|> {answer}"
if "qwen" in config.LM.lower():
lm_step_tag = "\n\n"
......
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
......@@ -430,14 +430,22 @@ class SearchTree:
self._expand_leaf_node(root, simulate_env, reward_model_fn)
self.root = root
end_nodes, top_k_nodes = [], [(-root._initial_value, root, simulate_env.copy())]
simulate_env_copy = simulate_env.copy()
print(simulate_env_copy.config["max_actions"])
simulate_env_copy.config["max_actions"] = int(simulate_env_copy.config["max_actions"] / beam_size)
print(simulate_env_copy.config["max_actions"])
# end_nodes, top_k_nodes = [], [(-root._initial_value, root, simulate_env.copy())]
end_nodes, top_k_nodes = [], [(-root._initial_value, root, simulate_env_copy)]
k = beam_size
print("k: ", k)
for _ in range(max_step + 1):
cur_nodes_to_search = top_k_nodes
top_k_nodes = []
for cur_neg_v, cur_node, cur_env in cur_nodes_to_search:
print("cur_node.children_num: ", len(cur_node.children))
if cur_node.terminated:
print("signal for k-1")
end_nodes.append((cur_neg_v, cur_node, cur_env))
k -= 1
elif k > 0:
......@@ -454,6 +462,7 @@ class SearchTree:
key=lambda x: x[2],
reverse=True,
)[:k]
print("top_k_children_num: ", len(top_k_children))
for c_act, c_node, c_value in top_k_children:
new_env = cur_env.copy()
heapq.heappush(top_k_nodes, (-c_value, c_node, new_env))
......@@ -679,7 +688,7 @@ class SearchTree:
prms = reward_fn(
[
(
f'<|im_start|>user\n{simulate_env.question}<|im_end|>',
simulate_env.question,
simulate_env.answer + x["action"],
)
for x in simulate_env.legal_actions
......
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
......@@ -6,11 +6,11 @@ echo "RAY TEMP DIR is $RAY_TEMP_DIR"
HOST_ADDR=0.0.0.0
CONTROLER_PORT=28777
WORKER_BASE_PORT=30010
ACC=1.0
ACC=0.5
MODEL_BASE=/share/collab/codemodel/models
CUDA_DEVICE_BASE=0
POLICY_MODEL_NAME=Qwen2.5-Math-7B-Instruct
POLICY_MODEL_NAME=Qwen2.5-Math-1.5B-Instruct
while [[ "$#" -gt 0 ]]; do
case $1 in
......
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
export RAY_TEMP_DIR="/tmp/ray_$SLURM_JOBID"
echo "RAY TEMP DIR is $RAY_TEMP_DIR"
POLICY_MODEL_NAME=Qwen2.5-Math-7B-Instruct
ACC=1.0
POLICY_MODEL_NAME=Qwen2.5-Math-1.5B-Instruct
ACC=0.5
while [[ "$#" -gt 0 ]]; do
case $1 in
......@@ -15,7 +15,7 @@ done
VALUE_MODEL_NAME=${POLICY_MODEL_NAME}_RM
SAVE_DIR="results/${POLICY_MODEL_NAME}/${ACC}"
SAVE_DIR="/nfs_global/S/shiwenxuan/prm/openr/results/${POLICY_MODEL_NAME}/${ACC}"
echo "POLICY_MODEL_NAME is $POLICY_MODEL_NAME"
echo "VALUE_MODEL_NAME is $VALUE_MODEL_NAME"
......@@ -26,7 +26,7 @@ python reason/evaluation/evaluate.py \
--task_name MATH \
--temperature 0.7 \
--max_new_tokens 2048 \
--num_sequence 1 \
--num_sequence 2 \
--tree_max_width 4 \
--tree_max_depth 50 \
--save_dir $SAVE_DIR \
......
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
### experiment
- policy model: Qwen2.5-Math-1.5B-Instruct
- reward model: Qwen2.5-Math-1.5B-Instruct
- method: beam search
- - ACC:1.0 result:[{"majority_vote": 0.828, "total_completion_tokens": 2544.756}]
- - ACC:0.9 result:[{"majority_vote": 0.798, "total_completion_tokens": 2576.35}]
- - ACC:0.8 result:[{"majority_vote": 0.794, "total_completion_tokens": 2497.672}]
- - ACC:0.7 result:[{"majority_vote": 0.782, "total_completion_tokens": 2502.832}]
- - ACC:0.6 result:[{"majority_vote": 0.76, "total_completion_tokens": 2491.27}]
- - ACC:0.5 result:[{'majority_vote': 0.724, 'total_completion_tokens': 2400.16}]
- - ACC:0.4 result:[{'majority_vote': 0.75, 'total_completion_tokens': 2418.876}]
- - ACC:0.3 result:[{'majority_vote': 0.748, 'total_completion_tokens': 2463.39}]
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment