Commit 69ea6d70 by ZhangXiaoyun

speed upgit add .git add .!

parent 9bc550dc
{"gen_config": {"n": 1, "temperature": 0.7, "top_p": 1, "top_k": -1, "max_new_tokens": 2048, "stop_token_ids": null, "stop_str": null, "include_stop_str_in_output": false}, "method": "beam_search", "method_config": {"task_name": "MATH", "tree_max_width": 4, "tree_max_depth": 50, "init_critic_value": true, "beam_size": 1}, "LM": "Qwen2.5-Math-1.5B-Instruct", "RM": "Qwen2.5-Math-1.5B-Instruct_RM"}
\ No newline at end of file
{"gen_config": {"n": 1, "temperature": 0.7, "top_p": 1, "top_k": -1, "max_new_tokens": 2048, "stop_token_ids": null, "stop_str": null, "include_stop_str_in_output": false}, "method": "beam_search", "method_config": {"task_name": "MATH", "tree_max_width": 4, "tree_max_depth": 50, "init_critic_value": true, "beam_size": 1}, "LM": "Qwen2.5-Math-1.5B-Instruct", "RM": "Qwen2.5-Math-1.5B-Instruct_RM"}
\ No newline at end of file
{"gen_config": {"n": 1, "temperature": 0.7, "top_p": 1, "top_k": -1, "max_new_tokens": 2048, "stop_token_ids": null, "stop_str": null, "include_stop_str_in_output": false}, "method": "beam_search", "method_config": {"task_name": "MATH", "tree_max_width": 4, "tree_max_depth": 50, "init_critic_value": true, "beam_size": 1}, "LM": "Qwen2.5-Math-1.5B-Instruct", "RM": "Qwen2.5-Math-1.5B-Instruct_RM"}
\ No newline at end of file
{"gen_config": {"n": 1, "temperature": 0.7, "top_p": 1, "top_k": -1, "max_new_tokens": 2048, "stop_token_ids": null, "stop_str": null, "include_stop_str_in_output": false}, "method": "beam_search", "method_config": {"task_name": "MATH", "tree_max_width": 4, "tree_max_depth": 50, "init_critic_value": true, "beam_size": 1}, "LM": "Qwen2.5-Math-1.5B-Instruct", "RM": "Qwen2.5-Math-1.5B-Instruct_RM"}
\ No newline at end of file
{"gen_config": {"n": 1, "temperature": 0.7, "top_p": 1, "top_k": -1, "max_new_tokens": 2048, "stop_token_ids": null, "stop_str": null, "include_stop_str_in_output": false}, "method": "beam_search", "method_config": {"task_name": "MATH", "tree_max_width": 4, "tree_max_depth": 50, "init_critic_value": true, "beam_size": 1}, "LM": "Qwen2.5-Math-1.5B-Instruct", "RM": "Qwen2.5-Math-1.5B-Instruct_RM"}
\ No newline at end of file
This source diff could not be displayed because it is too large. You can view the blob instead.
{"gen_config": {"n": 1, "temperature": 0.7, "top_p": 1, "top_k": -1, "max_new_tokens": 2048, "stop_token_ids": null, "stop_str": null, "include_stop_str_in_output": false}, "method": "beam_search", "method_config": {"task_name": "MATH", "tree_max_width": 4, "tree_max_depth": 50, "init_critic_value": true, "beam_size": 1}, "LM": "Qwen2.5-Math-1.5B-Instruct", "RM": "Qwen2.5-Math-1.5B-Instruct_RM"}
\ No newline at end of file
This source diff could not be displayed because it is too large. You can view the blob instead.
{"gen_config": {"n": 1, "temperature": 0.7, "top_p": 1, "top_k": -1, "max_new_tokens": 2048, "stop_token_ids": null, "stop_str": null, "include_stop_str_in_output": false}, "method": "beam_search", "method_config": {"task_name": "MATH", "tree_max_width": 4, "tree_max_depth": 50, "init_critic_value": true, "beam_size": 1}, "LM": "Qwen2.5-Math-1.5B-Instruct", "RM": "Qwen2.5-Math-1.5B-Instruct_RM"}
\ No newline at end of file
#!/bin/bash
#- Job parameters
# (TODO)
# Please modify job name
#SBATCH -J inference # The job name
#SBATCH -o inference.out # Write the standard output to file named 'ret-<job_number>.out'
#SBATCH -e inference.err # Write the standard error to file named 'ret-<job_number>.err'
#- Resources
# (TODO)
# Please modify your requirements
#SBATCH -p r8nv-gpu-hw-80g # Submit to 'nv-gpu' Partitiion
#SBATCH -t 1-06:00:00 # Run for a maximum time of 0 days, 12 hours, 00 mins, 00 secs
#SBATCH --nodes=1 # Request N nodes
#SBATCH --gres=gpu:8 # Request M GPU per node
#SBATCH --gres-flags=enforce-binding # CPU-GPU Affinity
#SBATCH --qos=gpu-normal # Request QOS Type
#SBATCH --constraint="A100_80G"
###
### The system will alloc 8 or 16 cores per gpu by default.
### If you need more or less, use following:
### #SBATCH --cpus-per-task=K # Request K cores
###
###
### Without specifying the constraint, any available nodes that meet the requirement will be allocated
### You can specify the characteristics of the compute nodes, and even the names of the compute nodes
###
### #SBATCH --nodelist=gpu-v00 # Request a specific list of hosts
### #SBATCH --constraint="Volta|RTX8000" # Request GPU Type: Volta(V100 or V100S) or RTX8000
###
#- Log information
echo "Job start at $(date "+%Y-%m-%d %H:%M:%S")"
echo "Job run at:"
echo "$(hostnamectl)"
# echo "$(df -h | grep -v tmpfs)"
#- Load environments
#- Load environments
module unload cuda-cudnn
source ~/.bashrc
module list # list modules loaded
conda activate open_reasoner
echo $(module list) # list modules loaded
echo $(which gcc)
echo $(which python)
echo $(which python3)
cluster-quota # nas quota
# nvidia-smi --format=csv --query-gpu=name,driver_version,power.limit # gpu info
nvidia-smi
#- Warning! Please not change your CUDA_VISIBLE_DEVICES
#- in `.bashrc`, `env.sh`, or your job script
echo "Use GPU ${CUDA_VISIBLE_DEVICES}" # which gpus
#- The CUDA_VISIBLE_DEVICES variable is assigned and specified by SLURM
# export CUDA_DEVICE_ORDER="PCI_BUS_ID"
#- Job step
# sleep 30h
cd /nfs_global/S/zhangxiaoyun/prm/openr
export PYTHONPATH=$(pwd)
bash reason/llm_service/create_service_qwen2.5_math_vllm_gold_prm.sh
sleep 100s
bash scripts/eval/beam_search.sh
#- End
echo "Job end at $(date "+%Y-%m-%d %H:%M:%S")"
Currently Loaded Modulefiles:
1) cluster-tools/v1.0 4) slurm-tools/v1.0 7) cuda-cudnn/11.8-8.8.1
2) git/2.31.1 5) cmake/3.21.7
3) python3/3.8.16 6) mpich/3.2.1
Currently Loaded Modulefiles:
1) cluster-tools/v1.0 4) slurm-tools/v1.0 7) cuda-cudnn/11.8-8.8.1
2) git/2.31.1 5) cmake/3.21.7
3) python3/3.8.16 6) mpich/3.2.1
Job start at 2025-02-27 13:10:47
Job run at:
Static hostname: localhost.localdomain
Transient hostname: r8a100-d01
Icon name: computer-server
Chassis: server
Machine ID: af6fe29e1ea7413c9518073fffae5e4a
Boot ID: 41d3b695cf27447cb7da3a3bfb840cb5
Operating System: Rocky Linux 8.7 (Green Obsidian)
CPE OS Name: cpe:/o:rocky:rocky:8:GA
Kernel: Linux 4.18.0-425.10.1.el8_7.x86_64
Architecture: x86-64
Have already added /tools/cluster-modulefiles into $MODULEPATH
/usr/bin/gcc
/workspace/S/zhangxiaoyun/miniconda3/envs/open_reasoner/bin/python
/workspace/S/zhangxiaoyun/miniconda3/envs/open_reasoner/bin/python3
############### /home : /home/S/zhangxiaoyun
Disk quotas for user zhangxiaoyun (uid 6191):
Filesystem space quota limit grace files quota limit grace
/home 5198M 16384M 20480M 115k 0 0
############### /workspace
Disk quotas for user zhangxiaoyun (uid 6191):
Filesystem space quota limit grace files quota limit grace
/workspace 78781M 400G 500G 719k 0 0
############### /nfs_global
Disk quotas for user zhangxiaoyun (uid 6191):
Filesystem space quota limit grace files quota limit grace
/nfs_global 1594G 5120G 7168G 39469 5000k 10000k
############### /lustre
Disk quotas for usr zhangxiaoyun (uid 6191):
Filesystem used quota limit grace files quota limit grace
/lustre 0k 8T 10T - 0 3000000 36000000 -
uid 6191 is using default block quota setting
uid 6191 is using default file quota setting
Thu Feb 27 13:10:48 2025
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 550.54.15 Driver Version: 550.54.15 CUDA Version: 12.4 |
|-----------------------------------------+------------------------+----------------------+
| GPU Name Persistence-M | Bus-Id Disp.A | Volatile Uncorr. ECC |
| Fan Temp Perf Pwr:Usage/Cap | Memory-Usage | GPU-Util Compute M. |
| | | MIG M. |
|=========================================+========================+======================|
| 0 NVIDIA A100 80GB PCIe On | 00000000:35:00.0 Off | 0 |
| N/A 37C P0 57W / 300W | 0MiB / 81920MiB | 0% Default |
| | | Disabled |
+-----------------------------------------+------------------------+----------------------+
| 1 NVIDIA A100 80GB PCIe On | 00000000:36:00.0 Off | 0 |
| N/A 40C P0 57W / 300W | 0MiB / 81920MiB | 0% Default |
| | | Disabled |
+-----------------------------------------+------------------------+----------------------+
| 2 NVIDIA A100 80GB PCIe On | 00000000:39:00.0 Off | 0 |
| N/A 42C P0 57W / 300W | 0MiB / 81920MiB | 0% Default |
| | | Disabled |
+-----------------------------------------+------------------------+----------------------+
| 3 NVIDIA A100 80GB PCIe On | 00000000:3D:00.0 Off | 0 |
| N/A 37C P0 56W / 300W | 0MiB / 81920MiB | 0% Default |
| | | Disabled |
+-----------------------------------------+------------------------+----------------------+
| 4 NVIDIA A100 80GB PCIe On | 00000000:9C:00.0 Off | 0 |
| N/A 37C P0 56W / 300W | 0MiB / 81920MiB | 0% Default |
| | | Disabled |
+-----------------------------------------+------------------------+----------------------+
| 5 NVIDIA A100 80GB PCIe On | 00000000:9D:00.0 Off | 0 |
| N/A 40C P0 58W / 300W | 0MiB / 81920MiB | 0% Default |
| | | Disabled |
+-----------------------------------------+------------------------+----------------------+
| 6 NVIDIA A100 80GB PCIe On | 00000000:A0:00.0 Off | 0 |
| N/A 38C P0 55W / 300W | 0MiB / 81920MiB | 0% Default |
| | | Disabled |
+-----------------------------------------+------------------------+----------------------+
| 7 NVIDIA A100 80GB PCIe On | 00000000:A4:00.0 Off | 0 |
| N/A 39C P0 56W / 300W | 0MiB / 81920MiB | 0% Default |
| | | Disabled |
+-----------------------------------------+------------------------+----------------------+
+-----------------------------------------------------------------------------------------+
| Processes: |
| GPU GI CI PID Type Process name GPU Memory |
| ID ID Usage |
|=========================================================================================|
| No running processes found |
+-----------------------------------------------------------------------------------------+
Use GPU 0,1,2,3,4,5,6,7
PYTHON_EXECUTABLE=/workspace/S/zhangxiaoyun/miniconda3/envs/open_reasoner/bin/python3
Wait 5 seconds ...
Starting workers
...@@ -2,9 +2,13 @@ import torch ...@@ -2,9 +2,13 @@ import torch
from transformers import AutoTokenizer, AutoConfig from transformers import AutoTokenizer, AutoConfig
from vllm import LLM, SamplingParams from vllm import LLM, SamplingParams
import re import re
import os
import json import json
from envs.MATH import extract_answer, extract_groundtruth, judge_correct from envs.MATH import extract_answer, extract_groundtruth, judge_correct
import random import random
import multiprocessing
lock = multiprocessing.Lock()
print(id(lock), os.getpid())
question_item_map = {} question_item_map = {}
with open("./envs/MATH/dataset/test500.jsonl", encoding="utf-8") as f: with open("./envs/MATH/dataset/test500.jsonl", encoding="utf-8") as f:
...@@ -49,18 +53,29 @@ def _math_shepherd_infer_fn(input_str: str, model, tokenizer, device): ...@@ -49,18 +53,29 @@ def _math_shepherd_infer_fn(input_str: str, model, tokenizer, device):
@torch.inference_mode() @torch.inference_mode()
def _qwen_math_gold_infer_fn(input_str: str, model, tokenizer, device, acc): def _qwen_math_gold_infer_fn(input_str: str, model, tokenizer, device, acc):
STEP_TAG = '\n\n\n\n\n' STEP_TAG = '\n\n\n\n\n'
# print("-----------------------------")
# print(input_str)
pattern = r'<\|im_start\|>user\s*\n\s*(.*?)\s*<\|im_end\|>' pattern = '<\|im_start\|>user\s*\n\s*(.*?)\s*<\|im_end\|>'
match = re.search(pattern, input_str, re.DOTALL) match = re.search(pattern, input_str, re.DOTALL)
assert match is not None, f"No match found for pattern: {pattern}" assert match is not None, f"No match found for pattern: {pattern}"
question = match.group(1) question = match.group(1)
solution = question_item_map[question]["solution"] solution = question_item_map[question]["solution"]
answer = question_item_map[question]["answer"] answer = question_item_map[question]["answer"]
input_str = input_str.replace('<\|im_start\|>user\n','').replace('<\|im_end\|>','')
# print("-----------------------------")
# print(input_str)
steps = input_str.split(STEP_TAG) steps = input_str.split(STEP_TAG)
inputs = [steps[0]] inputs = [steps[0]]
for i in range(len(steps)-1): for i in range(len(steps)-2):
inputs.append(steps[i+1] + inputs[i]) inputs.append(inputs[i] + steps[i+1])
# print("-----------------------------")
# print("steps:", steps)
# print("-----------------------------")
# print("inputs:", inputs)
# print("-----------------------------")
inference_params = { inference_params = {
"temperature": 0.6, "temperature": 0.6,
...@@ -71,6 +86,8 @@ def _qwen_math_gold_infer_fn(input_str: str, model, tokenizer, device, acc): ...@@ -71,6 +86,8 @@ def _qwen_math_gold_infer_fn(input_str: str, model, tokenizer, device, acc):
} }
sampling_params = SamplingParams(**inference_params) sampling_params = SamplingParams(**inference_params)
global lock
with lock:
outputs = model.generate(inputs, sampling_params) outputs = model.generate(inputs, sampling_params)
step_scores = [] step_scores = []
......
...@@ -679,7 +679,7 @@ class SearchTree: ...@@ -679,7 +679,7 @@ class SearchTree:
prms = reward_fn( prms = reward_fn(
[ [
( (
simulate_env.question, f'<|im_start|>user\n{simulate_env.question}<|im_end|>',
simulate_env.answer + x["action"], simulate_env.answer + x["action"],
) )
for x in simulate_env.legal_actions for x in simulate_env.legal_actions
......
...@@ -13,9 +13,9 @@ CUDA_DEVICE_BASE=0 ...@@ -13,9 +13,9 @@ CUDA_DEVICE_BASE=0
POLICY_MODEL_NAME=Qwen2.5-Math-1.5B-Instruct POLICY_MODEL_NAME=Qwen2.5-Math-1.5B-Instruct
# VALUE_MODEL_NAME=qwen_prm/checkpoint-6898/ # VALUE_MODEL_NAME=qwen_prm/checkpoint-6898/
# VALUE_MODEL_NAME=Qwen/Qwen2.5-Math-7B-PRM # VALUE_MODEL_NAME=Qwen/Qwen2.5-Math-7B-PRM
VALUE_MODEL_NAME=$POLICY_MODEL_NAME VALUE_MODEL_NAME=${POLICY_MODEL_NAME}_RM
MODEL_PATH=$MODEL_BASE/$POLICY_MODEL_NAME MODEL_PATH=$MODEL_BASE/$POLICY_MODEL_NAME
VALUE_MODEL_PATH=$MODEL_BASE/$VALUE_MODEL_NAME VALUE_MODEL_PATH=$MODEL_BASE/$POLICY_MODEL_NAME
LOGDIR=logs_fastchat LOGDIR=logs_fastchat
...@@ -24,8 +24,8 @@ tmux new-session -s FastChat -n controller -d ...@@ -24,8 +24,8 @@ tmux new-session -s FastChat -n controller -d
tmux send-keys "export LOGDIR=${LOGDIR}" Enter tmux send-keys "export LOGDIR=${LOGDIR}" Enter
tmux send-keys "$PYTHON_EXECUTABLE -m fastchat.serve.controller --port ${CONTROLER_PORT} --host $HOST_ADDR" Enter tmux send-keys "$PYTHON_EXECUTABLE -m fastchat.serve.controller --port ${CONTROLER_PORT} --host $HOST_ADDR" Enter
NUM_LM_WORKER=2 NUM_LM_WORKER=1
NUM_RM_WORKER=6 NUM_RM_WORKER=7
echo "Wait 5 seconds ..." echo "Wait 5 seconds ..."
sleep 5 sleep 5
...@@ -46,5 +46,5 @@ do ...@@ -46,5 +46,5 @@ do
WORKER_PORT=$((i+WORKER_BASE_PORT+NUM_LM_WORKER)) WORKER_PORT=$((i+WORKER_BASE_PORT+NUM_LM_WORKER))
tmux new-window -n value_worker tmux new-window -n value_worker
tmux send-keys "export LOGDIR=${LOGDIR}" Enter tmux send-keys "export LOGDIR=${LOGDIR}" Enter
tmux send-keys "CUDA_VISIBLE_DEVICES=$((i+NUM_LM_WORKER+CUDA_DEVICE_BASE)) $PYTHON_EXECUTABLE -m reason.llm_service.workers.gold_reward_model_worker --model-path $VALUE_MODEL_PATH --controller-address http://$HOST_ADDR:$CONTROLER_PORT --host $HOST_ADDR --port $WORKER_PORT --worker-address http://$HOST_ADDR:$WORKER_PORT --acc $ACC" Enter tmux send-keys "CUDA_VISIBLE_DEVICES=$((i+NUM_LM_WORKER+CUDA_DEVICE_BASE)) $PYTHON_EXECUTABLE -m reason.llm_service.workers.gold_reward_model_worker --model-path $VALUE_MODEL_PATH --model-names $VALUE_MODEL_NAME --controller-address http://$HOST_ADDR:$CONTROLER_PORT --host $HOST_ADDR --port $WORKER_PORT --worker-address http://$HOST_ADDR:$WORKER_PORT --acc $ACC" Enter
done done
...@@ -111,8 +111,8 @@ class ModelWorker(BaseModelWorker): ...@@ -111,8 +111,8 @@ class ModelWorker(BaseModelWorker):
self.device = device self.device = device
if self.tokenizer.pad_token == None: if self.tokenizer.pad_token == None:
self.tokenizer.pad_token = self.tokenizer.eos_token self.tokenizer.pad_token = self.tokenizer.eos_token
self.context_len = get_context_length(self.model.config) # self.context_len = get_context_length(self.model.config)
self.generate_stream_func = get_generate_stream_function(self.model, model_path) # self.generate_stream_func = get_generate_stream_function(self.model, model_path)
self.stream_interval = stream_interval self.stream_interval = stream_interval
self.embed_in_truncate = embed_in_truncate self.embed_in_truncate = embed_in_truncate
self.seed = seed self.seed = seed
...@@ -137,13 +137,13 @@ class ModelWorker(BaseModelWorker): ...@@ -137,13 +137,13 @@ class ModelWorker(BaseModelWorker):
# ) # )
# value = value_2 # value = value_2
value = [self.infer_fn(s).tolist() for s in input_str] value = [self.infer_fn(s) for s in input_str]
# # verify two values # # verify two values
# for v1, v2 in zip(value, value_2): # for v1, v2 in zip(value, value_2):
# assert torch.allclose( # assert torch.allclose(
# torch.tensor(v1), torch.tensor(v2), 1e-6), [v1, v2] # torch.tensor(v1), torch.tensor(v2), 1e-6), [v1, v2]
else: else:
value = self.infer_fn(input_str).tolist() value = self.infer_fn(input_str)
ret = {"input": input_str, "value": value} ret = {"input": input_str, "value": value}
gc.collect() gc.collect()
torch.cuda.empty_cache() torch.cuda.empty_cache()
...@@ -206,6 +206,9 @@ def create_model_worker(): ...@@ -206,6 +206,9 @@ def create_model_worker():
args = parser.parse_args() args = parser.parse_args()
logger.info(f"args: {args}") logger.info(f"args: {args}")
if args.dtype is None:
args.dtype = "float16"
if args.gpus: if args.gpus:
if len(args.gpus.split(",")) < args.num_gpus: if len(args.gpus.split(",")) < args.num_gpus:
raise ValueError( raise ValueError(
......
python reason/evaluation/evaluate.py \ python reason/evaluation/evaluate.py \
--LM Qwen2.5-Math-1.5B-Instruct \ --LM Qwen2.5-Math-1.5B-Instruct \
--RM checkpoint-6898 \ --RM Qwen2.5-Math-1.5B-Instruct_RM \
--task_name MATH \ --task_name MATH \
--temperature 0.7 \ --temperature 0.7 \
--max_new_tokens 2048 \ --max_new_tokens 2048 \
......
![alt text](image.png)
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment