Commit 69ea6d70 by ZhangXiaoyun

speed upgit add .git add .!

parent 9bc550dc
{"gen_config": {"n": 1, "temperature": 0.7, "top_p": 1, "top_k": -1, "max_new_tokens": 2048, "stop_token_ids": null, "stop_str": null, "include_stop_str_in_output": false}, "method": "beam_search", "method_config": {"task_name": "MATH", "tree_max_width": 4, "tree_max_depth": 50, "init_critic_value": true, "beam_size": 1}, "LM": "Qwen2.5-Math-1.5B-Instruct", "RM": "Qwen2.5-Math-1.5B-Instruct_RM"}
\ No newline at end of file
{"gen_config": {"n": 1, "temperature": 0.7, "top_p": 1, "top_k": -1, "max_new_tokens": 2048, "stop_token_ids": null, "stop_str": null, "include_stop_str_in_output": false}, "method": "beam_search", "method_config": {"task_name": "MATH", "tree_max_width": 4, "tree_max_depth": 50, "init_critic_value": true, "beam_size": 1}, "LM": "Qwen2.5-Math-1.5B-Instruct", "RM": "Qwen2.5-Math-1.5B-Instruct_RM"}
\ No newline at end of file
{"gen_config": {"n": 1, "temperature": 0.7, "top_p": 1, "top_k": -1, "max_new_tokens": 2048, "stop_token_ids": null, "stop_str": null, "include_stop_str_in_output": false}, "method": "beam_search", "method_config": {"task_name": "MATH", "tree_max_width": 4, "tree_max_depth": 50, "init_critic_value": true, "beam_size": 1}, "LM": "Qwen2.5-Math-1.5B-Instruct", "RM": "Qwen2.5-Math-1.5B-Instruct_RM"}
\ No newline at end of file
{"gen_config": {"n": 1, "temperature": 0.7, "top_p": 1, "top_k": -1, "max_new_tokens": 2048, "stop_token_ids": null, "stop_str": null, "include_stop_str_in_output": false}, "method": "beam_search", "method_config": {"task_name": "MATH", "tree_max_width": 4, "tree_max_depth": 50, "init_critic_value": true, "beam_size": 1}, "LM": "Qwen2.5-Math-1.5B-Instruct", "RM": "Qwen2.5-Math-1.5B-Instruct_RM"}
\ No newline at end of file
{"gen_config": {"n": 1, "temperature": 0.7, "top_p": 1, "top_k": -1, "max_new_tokens": 2048, "stop_token_ids": null, "stop_str": null, "include_stop_str_in_output": false}, "method": "beam_search", "method_config": {"task_name": "MATH", "tree_max_width": 4, "tree_max_depth": 50, "init_critic_value": true, "beam_size": 1}, "LM": "Qwen2.5-Math-1.5B-Instruct", "RM": "Qwen2.5-Math-1.5B-Instruct_RM"}
\ No newline at end of file
This source diff could not be displayed because it is too large. You can view the blob instead.
{"gen_config": {"n": 1, "temperature": 0.7, "top_p": 1, "top_k": -1, "max_new_tokens": 2048, "stop_token_ids": null, "stop_str": null, "include_stop_str_in_output": false}, "method": "beam_search", "method_config": {"task_name": "MATH", "tree_max_width": 4, "tree_max_depth": 50, "init_critic_value": true, "beam_size": 1}, "LM": "Qwen2.5-Math-1.5B-Instruct", "RM": "Qwen2.5-Math-1.5B-Instruct_RM"}
\ No newline at end of file
This source diff could not be displayed because it is too large. You can view the blob instead.
{"gen_config": {"n": 1, "temperature": 0.7, "top_p": 1, "top_k": -1, "max_new_tokens": 2048, "stop_token_ids": null, "stop_str": null, "include_stop_str_in_output": false}, "method": "beam_search", "method_config": {"task_name": "MATH", "tree_max_width": 4, "tree_max_depth": 50, "init_critic_value": true, "beam_size": 1}, "LM": "Qwen2.5-Math-1.5B-Instruct", "RM": "Qwen2.5-Math-1.5B-Instruct_RM"}
\ No newline at end of file
#!/bin/bash
#- Job parameters
# (TODO)
# Please modify job name
#SBATCH -J inference # The job name
#SBATCH -o inference.out # Write the standard output to file named 'ret-<job_number>.out'
#SBATCH -e inference.err # Write the standard error to file named 'ret-<job_number>.err'
#- Resources
# (TODO)
# Please modify your requirements
#SBATCH -p r8nv-gpu-hw-80g # Submit to 'nv-gpu' Partitiion
#SBATCH -t 1-06:00:00 # Run for a maximum time of 0 days, 12 hours, 00 mins, 00 secs
#SBATCH --nodes=1 # Request N nodes
#SBATCH --gres=gpu:8 # Request M GPU per node
#SBATCH --gres-flags=enforce-binding # CPU-GPU Affinity
#SBATCH --qos=gpu-normal # Request QOS Type
#SBATCH --constraint="A100_80G"
###
### The system will alloc 8 or 16 cores per gpu by default.
### If you need more or less, use following:
### #SBATCH --cpus-per-task=K # Request K cores
###
###
### Without specifying the constraint, any available nodes that meet the requirement will be allocated
### You can specify the characteristics of the compute nodes, and even the names of the compute nodes
###
### #SBATCH --nodelist=gpu-v00 # Request a specific list of hosts
### #SBATCH --constraint="Volta|RTX8000" # Request GPU Type: Volta(V100 or V100S) or RTX8000
###
#- Log information
echo "Job start at $(date "+%Y-%m-%d %H:%M:%S")"
echo "Job run at:"
echo "$(hostnamectl)"
# echo "$(df -h | grep -v tmpfs)"
#- Load environments
#- Load environments
module unload cuda-cudnn
source ~/.bashrc
module list # list modules loaded
conda activate open_reasoner
echo $(module list) # list modules loaded
echo $(which gcc)
echo $(which python)
echo $(which python3)
cluster-quota # nas quota
# nvidia-smi --format=csv --query-gpu=name,driver_version,power.limit # gpu info
nvidia-smi
#- Warning! Please not change your CUDA_VISIBLE_DEVICES
#- in `.bashrc`, `env.sh`, or your job script
echo "Use GPU ${CUDA_VISIBLE_DEVICES}" # which gpus
#- The CUDA_VISIBLE_DEVICES variable is assigned and specified by SLURM
# export CUDA_DEVICE_ORDER="PCI_BUS_ID"
#- Job step
# sleep 30h
cd /nfs_global/S/zhangxiaoyun/prm/openr
export PYTHONPATH=$(pwd)
bash reason/llm_service/create_service_qwen2.5_math_vllm_gold_prm.sh
sleep 100s
bash scripts/eval/beam_search.sh
#- End
echo "Job end at $(date "+%Y-%m-%d %H:%M:%S")"
Currently Loaded Modulefiles:
1) cluster-tools/v1.0 4) slurm-tools/v1.0 7) cuda-cudnn/11.8-8.8.1
2) git/2.31.1 5) cmake/3.21.7
3) python3/3.8.16 6) mpich/3.2.1
Currently Loaded Modulefiles:
1) cluster-tools/v1.0 4) slurm-tools/v1.0 7) cuda-cudnn/11.8-8.8.1
2) git/2.31.1 5) cmake/3.21.7
3) python3/3.8.16 6) mpich/3.2.1
Job start at 2025-02-27 13:10:47
Job run at:
Static hostname: localhost.localdomain
Transient hostname: r8a100-d01
Icon name: computer-server
Chassis: server
Machine ID: af6fe29e1ea7413c9518073fffae5e4a
Boot ID: 41d3b695cf27447cb7da3a3bfb840cb5
Operating System: Rocky Linux 8.7 (Green Obsidian)
CPE OS Name: cpe:/o:rocky:rocky:8:GA
Kernel: Linux 4.18.0-425.10.1.el8_7.x86_64
Architecture: x86-64
Have already added /tools/cluster-modulefiles into $MODULEPATH
/usr/bin/gcc
/workspace/S/zhangxiaoyun/miniconda3/envs/open_reasoner/bin/python
/workspace/S/zhangxiaoyun/miniconda3/envs/open_reasoner/bin/python3
############### /home : /home/S/zhangxiaoyun
Disk quotas for user zhangxiaoyun (uid 6191):
Filesystem space quota limit grace files quota limit grace
/home 5198M 16384M 20480M 115k 0 0
############### /workspace
Disk quotas for user zhangxiaoyun (uid 6191):
Filesystem space quota limit grace files quota limit grace
/workspace 78781M 400G 500G 719k 0 0
############### /nfs_global
Disk quotas for user zhangxiaoyun (uid 6191):
Filesystem space quota limit grace files quota limit grace
/nfs_global 1594G 5120G 7168G 39469 5000k 10000k
############### /lustre
Disk quotas for usr zhangxiaoyun (uid 6191):
Filesystem used quota limit grace files quota limit grace
/lustre 0k 8T 10T - 0 3000000 36000000 -
uid 6191 is using default block quota setting
uid 6191 is using default file quota setting
Thu Feb 27 13:10:48 2025
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 550.54.15 Driver Version: 550.54.15 CUDA Version: 12.4 |
|-----------------------------------------+------------------------+----------------------+
| GPU Name Persistence-M | Bus-Id Disp.A | Volatile Uncorr. ECC |
| Fan Temp Perf Pwr:Usage/Cap | Memory-Usage | GPU-Util Compute M. |
| | | MIG M. |
|=========================================+========================+======================|
| 0 NVIDIA A100 80GB PCIe On | 00000000:35:00.0 Off | 0 |
| N/A 37C P0 57W / 300W | 0MiB / 81920MiB | 0% Default |
| | | Disabled |
+-----------------------------------------+------------------------+----------------------+
| 1 NVIDIA A100 80GB PCIe On | 00000000:36:00.0 Off | 0 |
| N/A 40C P0 57W / 300W | 0MiB / 81920MiB | 0% Default |
| | | Disabled |
+-----------------------------------------+------------------------+----------------------+
| 2 NVIDIA A100 80GB PCIe On | 00000000:39:00.0 Off | 0 |
| N/A 42C P0 57W / 300W | 0MiB / 81920MiB | 0% Default |
| | | Disabled |
+-----------------------------------------+------------------------+----------------------+
| 3 NVIDIA A100 80GB PCIe On | 00000000:3D:00.0 Off | 0 |
| N/A 37C P0 56W / 300W | 0MiB / 81920MiB | 0% Default |
| | | Disabled |
+-----------------------------------------+------------------------+----------------------+
| 4 NVIDIA A100 80GB PCIe On | 00000000:9C:00.0 Off | 0 |
| N/A 37C P0 56W / 300W | 0MiB / 81920MiB | 0% Default |
| | | Disabled |
+-----------------------------------------+------------------------+----------------------+
| 5 NVIDIA A100 80GB PCIe On | 00000000:9D:00.0 Off | 0 |
| N/A 40C P0 58W / 300W | 0MiB / 81920MiB | 0% Default |
| | | Disabled |
+-----------------------------------------+------------------------+----------------------+
| 6 NVIDIA A100 80GB PCIe On | 00000000:A0:00.0 Off | 0 |
| N/A 38C P0 55W / 300W | 0MiB / 81920MiB | 0% Default |
| | | Disabled |
+-----------------------------------------+------------------------+----------------------+
| 7 NVIDIA A100 80GB PCIe On | 00000000:A4:00.0 Off | 0 |
| N/A 39C P0 56W / 300W | 0MiB / 81920MiB | 0% Default |
| | | Disabled |
+-----------------------------------------+------------------------+----------------------+
+-----------------------------------------------------------------------------------------+
| Processes: |
| GPU GI CI PID Type Process name GPU Memory |
| ID ID Usage |
|=========================================================================================|
| No running processes found |
+-----------------------------------------------------------------------------------------+
Use GPU 0,1,2,3,4,5,6,7
PYTHON_EXECUTABLE=/workspace/S/zhangxiaoyun/miniconda3/envs/open_reasoner/bin/python3
Wait 5 seconds ...
Starting workers
......@@ -2,9 +2,13 @@ import torch
from transformers import AutoTokenizer, AutoConfig
from vllm import LLM, SamplingParams
import re
import os
import json
from envs.MATH import extract_answer, extract_groundtruth, judge_correct
import random
import multiprocessing
lock = multiprocessing.Lock()
print(id(lock), os.getpid())
question_item_map = {}
with open("./envs/MATH/dataset/test500.jsonl", encoding="utf-8") as f:
......@@ -49,18 +53,29 @@ def _math_shepherd_infer_fn(input_str: str, model, tokenizer, device):
@torch.inference_mode()
def _qwen_math_gold_infer_fn(input_str: str, model, tokenizer, device, acc):
STEP_TAG = '\n\n\n\n\n'
# print("-----------------------------")
# print(input_str)
pattern = r'<\|im_start\|>user\s*\n\s*(.*?)\s*<\|im_end\|>'
pattern = '<\|im_start\|>user\s*\n\s*(.*?)\s*<\|im_end\|>'
match = re.search(pattern, input_str, re.DOTALL)
assert match is not None, f"No match found for pattern: {pattern}"
question = match.group(1)
solution = question_item_map[question]["solution"]
answer = question_item_map[question]["answer"]
input_str = input_str.replace('<\|im_start\|>user\n','').replace('<\|im_end\|>','')
# print("-----------------------------")
# print(input_str)
steps = input_str.split(STEP_TAG)
inputs = [steps[0]]
for i in range(len(steps)-1):
inputs.append(steps[i+1] + inputs[i])
for i in range(len(steps)-2):
inputs.append(inputs[i] + steps[i+1])
# print("-----------------------------")
# print("steps:", steps)
# print("-----------------------------")
# print("inputs:", inputs)
# print("-----------------------------")
inference_params = {
"temperature": 0.6,
......@@ -71,6 +86,8 @@ def _qwen_math_gold_infer_fn(input_str: str, model, tokenizer, device, acc):
}
sampling_params = SamplingParams(**inference_params)
global lock
with lock:
outputs = model.generate(inputs, sampling_params)
step_scores = []
......
......@@ -679,7 +679,7 @@ class SearchTree:
prms = reward_fn(
[
(
simulate_env.question,
f'<|im_start|>user\n{simulate_env.question}<|im_end|>',
simulate_env.answer + x["action"],
)
for x in simulate_env.legal_actions
......
......@@ -13,9 +13,9 @@ CUDA_DEVICE_BASE=0
POLICY_MODEL_NAME=Qwen2.5-Math-1.5B-Instruct
# VALUE_MODEL_NAME=qwen_prm/checkpoint-6898/
# VALUE_MODEL_NAME=Qwen/Qwen2.5-Math-7B-PRM
VALUE_MODEL_NAME=$POLICY_MODEL_NAME
VALUE_MODEL_NAME=${POLICY_MODEL_NAME}_RM
MODEL_PATH=$MODEL_BASE/$POLICY_MODEL_NAME
VALUE_MODEL_PATH=$MODEL_BASE/$VALUE_MODEL_NAME
VALUE_MODEL_PATH=$MODEL_BASE/$POLICY_MODEL_NAME
LOGDIR=logs_fastchat
......@@ -24,8 +24,8 @@ tmux new-session -s FastChat -n controller -d
tmux send-keys "export LOGDIR=${LOGDIR}" Enter
tmux send-keys "$PYTHON_EXECUTABLE -m fastchat.serve.controller --port ${CONTROLER_PORT} --host $HOST_ADDR" Enter
NUM_LM_WORKER=2
NUM_RM_WORKER=6
NUM_LM_WORKER=1
NUM_RM_WORKER=7
echo "Wait 5 seconds ..."
sleep 5
......@@ -46,5 +46,5 @@ do
WORKER_PORT=$((i+WORKER_BASE_PORT+NUM_LM_WORKER))
tmux new-window -n value_worker
tmux send-keys "export LOGDIR=${LOGDIR}" Enter
tmux send-keys "CUDA_VISIBLE_DEVICES=$((i+NUM_LM_WORKER+CUDA_DEVICE_BASE)) $PYTHON_EXECUTABLE -m reason.llm_service.workers.gold_reward_model_worker --model-path $VALUE_MODEL_PATH --controller-address http://$HOST_ADDR:$CONTROLER_PORT --host $HOST_ADDR --port $WORKER_PORT --worker-address http://$HOST_ADDR:$WORKER_PORT --acc $ACC" Enter
tmux send-keys "CUDA_VISIBLE_DEVICES=$((i+NUM_LM_WORKER+CUDA_DEVICE_BASE)) $PYTHON_EXECUTABLE -m reason.llm_service.workers.gold_reward_model_worker --model-path $VALUE_MODEL_PATH --model-names $VALUE_MODEL_NAME --controller-address http://$HOST_ADDR:$CONTROLER_PORT --host $HOST_ADDR --port $WORKER_PORT --worker-address http://$HOST_ADDR:$WORKER_PORT --acc $ACC" Enter
done
......@@ -111,8 +111,8 @@ class ModelWorker(BaseModelWorker):
self.device = device
if self.tokenizer.pad_token == None:
self.tokenizer.pad_token = self.tokenizer.eos_token
self.context_len = get_context_length(self.model.config)
self.generate_stream_func = get_generate_stream_function(self.model, model_path)
# self.context_len = get_context_length(self.model.config)
# self.generate_stream_func = get_generate_stream_function(self.model, model_path)
self.stream_interval = stream_interval
self.embed_in_truncate = embed_in_truncate
self.seed = seed
......@@ -137,13 +137,13 @@ class ModelWorker(BaseModelWorker):
# )
# value = value_2
value = [self.infer_fn(s).tolist() for s in input_str]
value = [self.infer_fn(s) for s in input_str]
# # verify two values
# for v1, v2 in zip(value, value_2):
# assert torch.allclose(
# torch.tensor(v1), torch.tensor(v2), 1e-6), [v1, v2]
else:
value = self.infer_fn(input_str).tolist()
value = self.infer_fn(input_str)
ret = {"input": input_str, "value": value}
gc.collect()
torch.cuda.empty_cache()
......@@ -206,6 +206,9 @@ def create_model_worker():
args = parser.parse_args()
logger.info(f"args: {args}")
if args.dtype is None:
args.dtype = "float16"
if args.gpus:
if len(args.gpus.split(",")) < args.num_gpus:
raise ValueError(
......
python reason/evaluation/evaluate.py \
--LM Qwen2.5-Math-1.5B-Instruct \
--RM checkpoint-6898 \
--RM Qwen2.5-Math-1.5B-Instruct_RM \
--task_name MATH \
--temperature 0.7 \
--max_new_tokens 2048 \
......
![alt text](image.png)
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment