speed upgit add .git add .!

69ea6d70 · ZhangXiaoyun · 9bc550dc · 69ea6d70 · 69ea6d70 · 69ea6d70
Commit 69ea6d70 authored Feb 27, 2025 by ZhangXiaoyun
25 changed files
--- a/openr/config/__init__.py
+++ b/openr/config/__init__.py
--- a/openr/debug/MATH/beam_search/20250226_192918/config.json
+++ b/openr/debug/MATH/beam_search/20250226_192918/config.json
+{"gen_config": {"n": 1, "temperature": 0.7, "top_p": 1, "top_k": -1, "max_new_tokens": 2048, "stop_token_ids": null, "stop_str": null, "include_stop_str_in_output": false}, "method": "beam_search", "method_config": {"task_name": "MATH", "tree_max_width": 4, "tree_max_depth": 50, "init_critic_value": true, "beam_size": 1}, "LM": "Qwen2.5-Math-1.5B-Instruct", "RM": "Qwen2.5-Math-1.5B-Instruct_RM"}
\ No newline at end of file
--- a/openr/debug/MATH/beam_search/20250226_192918/record.jsonl
+++ b/openr/debug/MATH/beam_search/20250226_192918/record.jsonl
--- a/openr/debug/MATH/beam_search/20250226_193257/config.json
+++ b/openr/debug/MATH/beam_search/20250226_193257/config.json
+{"gen_config": {"n": 1, "temperature": 0.7, "top_p": 1, "top_k": -1, "max_new_tokens": 2048, "stop_token_ids": null, "stop_str": null, "include_stop_str_in_output": false}, "method": "beam_search", "method_config": {"task_name": "MATH", "tree_max_width": 4, "tree_max_depth": 50, "init_critic_value": true, "beam_size": 1}, "LM": "Qwen2.5-Math-1.5B-Instruct", "RM": "Qwen2.5-Math-1.5B-Instruct_RM"}
\ No newline at end of file
--- a/openr/debug/MATH/beam_search/20250226_193257/record.jsonl
+++ b/openr/debug/MATH/beam_search/20250226_193257/record.jsonl
--- a/openr/debug/MATH/beam_search/20250226_194246/config.json
+++ b/openr/debug/MATH/beam_search/20250226_194246/config.json
+{"gen_config": {"n": 1, "temperature": 0.7, "top_p": 1, "top_k": -1, "max_new_tokens": 2048, "stop_token_ids": null, "stop_str": null, "include_stop_str_in_output": false}, "method": "beam_search", "method_config": {"task_name": "MATH", "tree_max_width": 4, "tree_max_depth": 50, "init_critic_value": true, "beam_size": 1}, "LM": "Qwen2.5-Math-1.5B-Instruct", "RM": "Qwen2.5-Math-1.5B-Instruct_RM"}
\ No newline at end of file
--- a/openr/debug/MATH/beam_search/20250226_194246/record.jsonl
+++ b/openr/debug/MATH/beam_search/20250226_194246/record.jsonl
--- a/openr/debug/MATH/beam_search/20250226_194643/config.json
+++ b/openr/debug/MATH/beam_search/20250226_194643/config.json
+{"gen_config": {"n": 1, "temperature": 0.7, "top_p": 1, "top_k": -1, "max_new_tokens": 2048, "stop_token_ids": null, "stop_str": null, "include_stop_str_in_output": false}, "method": "beam_search", "method_config": {"task_name": "MATH", "tree_max_width": 4, "tree_max_depth": 50, "init_critic_value": true, "beam_size": 1}, "LM": "Qwen2.5-Math-1.5B-Instruct", "RM": "Qwen2.5-Math-1.5B-Instruct_RM"}
\ No newline at end of file
--- a/openr/debug/MATH/beam_search/20250226_194643/record.jsonl
+++ b/openr/debug/MATH/beam_search/20250226_194643/record.jsonl
--- a/openr/debug/MATH/beam_search/20250226_200221/config.json
+++ b/openr/debug/MATH/beam_search/20250226_200221/config.json
+{"gen_config": {"n": 1, "temperature": 0.7, "top_p": 1, "top_k": -1, "max_new_tokens": 2048, "stop_token_ids": null, "stop_str": null, "include_stop_str_in_output": false}, "method": "beam_search", "method_config": {"task_name": "MATH", "tree_max_width": 4, "tree_max_depth": 50, "init_critic_value": true, "beam_size": 1}, "LM": "Qwen2.5-Math-1.5B-Instruct", "RM": "Qwen2.5-Math-1.5B-Instruct_RM"}
\ No newline at end of file
--- a/openr/debug/MATH/beam_search/20250226_200221/record.jsonl
+++ b/openr/debug/MATH/beam_search/20250226_200221/record.jsonl
--- a/openr/debug/MATH/beam_search/20250227_115624/config.json
+++ b/openr/debug/MATH/beam_search/20250227_115624/config.json
+{"gen_config": {"n": 1, "temperature": 0.7, "top_p": 1, "top_k": -1, "max_new_tokens": 2048, "stop_token_ids": null, "stop_str": null, "include_stop_str_in_output": false}, "method": "beam_search", "method_config": {"task_name": "MATH", "tree_max_width": 4, "tree_max_depth": 50, "init_critic_value": true, "beam_size": 1}, "LM": "Qwen2.5-Math-1.5B-Instruct", "RM": "Qwen2.5-Math-1.5B-Instruct_RM"}
\ No newline at end of file
--- a/openr/debug/MATH/beam_search/20250227_115624/record.jsonl
+++ b/openr/debug/MATH/beam_search/20250227_115624/record.jsonl
--- a/openr/debug/MATH/beam_search/20250227_125146/config.json
+++ b/openr/debug/MATH/beam_search/20250227_125146/config.json
+{"gen_config": {"n": 1, "temperature": 0.7, "top_p": 1, "top_k": -1, "max_new_tokens": 2048, "stop_token_ids": null, "stop_str": null, "include_stop_str_in_output": false}, "method": "beam_search", "method_config": {"task_name": "MATH", "tree_max_width": 4, "tree_max_depth": 50, "init_critic_value": true, "beam_size": 1}, "LM": "Qwen2.5-Math-1.5B-Instruct", "RM": "Qwen2.5-Math-1.5B-Instruct_RM"}
\ No newline at end of file
--- a/openr/debug/MATH/beam_search/20250227_125146/record.jsonl
+++ b/openr/debug/MATH/beam_search/20250227_125146/record.jsonl
--- a/openr/infer.sh
+++ b/openr/infer.sh
+#!/bin/bash
+#- Job parameters
+# (TODO)
+# Please modify job name
+#SBATCH -J inference            # The job name
+#SBATCH -o inference.out        # Write the standard output to file named 'ret-<job_number>.out'
+#SBATCH -e inference.err        # Write the standard error to file named 'ret-<job_number>.err'
+#- Resources
+# (TODO)
+# Please modify your requirements
+#SBATCH -p r8nv-gpu-hw-80g                    # Submit to 'nv-gpu' Partitiion
+#SBATCH -t 1-06:00:00                # Run for a maximum time of 0 days, 12 hours, 00 mins, 00 secs
+#SBATCH --nodes=1                    # Request N nodes
+#SBATCH --gres=gpu:8                 # Request M GPU per node
+#SBATCH --gres-flags=enforce-binding # CPU-GPU Affinity
+#SBATCH --qos=gpu-normal           # Request QOS Type
+#SBATCH --constraint="A100_80G"
+###
+### The system will alloc 8 or 16 cores per gpu by default.
+### If you need more or less, use following:
+### #SBATCH --cpus-per-task=K            # Request K cores
+###
+### 
+### Without specifying the constraint, any available nodes that meet the requirement will be allocated
+### You can specify the characteristics of the compute nodes, and even the names of the compute nodes
+###
+### #SBATCH --nodelist=gpu-v00           # Request a specific list of hosts 
+### #SBATCH --constraint="Volta|RTX8000" # Request GPU Type: Volta(V100 or V100S) or RTX8000
+###
+#- Log information
+echo "Job start at $(date "+%Y-%m-%d %H:%M:%S")"
+echo "Job run at:"
+echo "$(hostnamectl)"
+# echo "$(df -h | grep -v tmpfs)"
+#- Load environments
+#- Load environments
+module unload cuda-cudnn
+source ~/.bashrc
+module list                       # list modules loaded
+conda activate open_reasoner
+echo $(module list)              # list modules loaded
+echo $(which gcc)
+echo $(which python)
+echo $(which python3)
+cluster-quota                    # nas quota
+# nvidia-smi --format=csv --query-gpu=name,driver_version,power.limit # gpu info
+nvidia-smi
+#- Warning! Please not change your CUDA_VISIBLE_DEVICES
+#- in `.bashrc`, `env.sh`, or your job script
+echo "Use GPU ${CUDA_VISIBLE_DEVICES}"                              # which gpus
+#- The CUDA_VISIBLE_DEVICES variable is assigned and specified by SLURM
+# export CUDA_DEVICE_ORDER="PCI_BUS_ID"
+#- Job step
+# sleep 30h
+cd /nfs_global/S/zhangxiaoyun/prm/openr
+export PYTHONPATH=$(pwd)
+bash reason/llm_service/create_service_qwen2.5_math_vllm_gold_prm.sh
+sleep 100s
+bash scripts/eval/beam_search.sh
+#- End
+echo "Job end at $(date "+%Y-%m-%d %H:%M:%S")"
--- a/openr/inference.err
+++ b/openr/inference.err
+Currently Loaded Modulefiles:
+ 1) cluster-tools/v1.0   4) slurm-tools/v1.0   7) cuda-cudnn/11.8-8.8.1  
+ 2) git/2.31.1           5) cmake/3.21.7      
+ 3) python3/3.8.16       6) mpich/3.2.1       
+Currently Loaded Modulefiles:
+ 1) cluster-tools/v1.0   4) slurm-tools/v1.0   7) cuda-cudnn/11.8-8.8.1  
+ 2) git/2.31.1           5) cmake/3.21.7      
+ 3) python3/3.8.16       6) mpich/3.2.1       
--- a/openr/inference.out
+++ b/openr/inference.out
+Job start at 2025-02-27 13:10:47
+Job run at:
+   Static hostname: localhost.localdomain
+Transient hostname: r8a100-d01
+         Icon name: computer-server
+           Chassis: server
+        Machine ID: af6fe29e1ea7413c9518073fffae5e4a
+           Boot ID: 41d3b695cf27447cb7da3a3bfb840cb5
+  Operating System: Rocky Linux 8.7 (Green Obsidian)
+       CPE OS Name: cpe:/o:rocky:rocky:8:GA
+            Kernel: Linux 4.18.0-425.10.1.el8_7.x86_64
+      Architecture: x86-64
+Have already added /tools/cluster-modulefiles into $MODULEPATH
+/usr/bin/gcc
+/workspace/S/zhangxiaoyun/miniconda3/envs/open_reasoner/bin/python
+/workspace/S/zhangxiaoyun/miniconda3/envs/open_reasoner/bin/python3
+############### /home : /home/S/zhangxiaoyun
+Disk quotas for user zhangxiaoyun (uid 6191): 
+     Filesystem   space   quota   limit   grace   files   quota   limit   grace
+          /home   5198M  16384M  20480M            115k       0       0        
+############### /workspace
+Disk quotas for user zhangxiaoyun (uid 6191): 
+     Filesystem   space   quota   limit   grace   files   quota   limit   grace
+     /workspace  78781M    400G    500G            719k       0       0        
+############### /nfs_global
+Disk quotas for user zhangxiaoyun (uid 6191): 
+     Filesystem   space   quota   limit   grace   files   quota   limit   grace
+    /nfs_global   1594G   5120G   7168G           39469   5000k  10000k        
+############### /lustre
+Disk quotas for usr zhangxiaoyun (uid 6191):
+     Filesystem    used   quota   limit   grace   files   quota   limit   grace
+        /lustre      0k      8T     10T       -       0  3000000 36000000       -
+uid 6191 is using default block quota setting
+uid 6191 is using default file quota setting
+Thu Feb 27 13:10:48 2025       
+-----------------------------------------------------------------------------------------+
+| NVIDIA-SMI 550.54.15              Driver Version: 550.54.15      CUDA Version: 12.4     |
+|-----------------------------------------+------------------------+----------------------+
+| GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |
+| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
+|                                         |                        |               MIG M. |
+|=========================================+========================+======================|
+|   0  NVIDIA A100 80GB PCIe          On  |   00000000:35:00.0 Off |                    0 |
+| N/A   37C    P0             57W /  300W |       0MiB /  81920MiB |      0%      Default |
+|                                         |                        |             Disabled |
+-----------------------------------------+------------------------+----------------------+
+|   1  NVIDIA A100 80GB PCIe          On  |   00000000:36:00.0 Off |                    0 |
+| N/A   40C    P0             57W /  300W |       0MiB /  81920MiB |      0%      Default |
+|                                         |                        |             Disabled |
+-----------------------------------------+------------------------+----------------------+
+|   2  NVIDIA A100 80GB PCIe          On  |   00000000:39:00.0 Off |                    0 |
+| N/A   42C    P0             57W /  300W |       0MiB /  81920MiB |      0%      Default |
+|                                         |                        |             Disabled |
+-----------------------------------------+------------------------+----------------------+
+|   3  NVIDIA A100 80GB PCIe          On  |   00000000:3D:00.0 Off |                    0 |
+| N/A   37C    P0             56W /  300W |       0MiB /  81920MiB |      0%      Default |
+|                                         |                        |             Disabled |
+-----------------------------------------+------------------------+----------------------+
+|   4  NVIDIA A100 80GB PCIe          On  |   00000000:9C:00.0 Off |                    0 |
+| N/A   37C    P0             56W /  300W |       0MiB /  81920MiB |      0%      Default |
+|                                         |                        |             Disabled |
+-----------------------------------------+------------------------+----------------------+
+|   5  NVIDIA A100 80GB PCIe          On  |   00000000:9D:00.0 Off |                    0 |
+| N/A   40C    P0             58W /  300W |       0MiB /  81920MiB |      0%      Default |
+|                                         |                        |             Disabled |
+-----------------------------------------+------------------------+----------------------+
+|   6  NVIDIA A100 80GB PCIe          On  |   00000000:A0:00.0 Off |                    0 |
+| N/A   38C    P0             55W /  300W |       0MiB /  81920MiB |      0%      Default |
+|                                         |                        |             Disabled |
+-----------------------------------------+------------------------+----------------------+
+|   7  NVIDIA A100 80GB PCIe          On  |   00000000:A4:00.0 Off |                    0 |
+| N/A   39C    P0             56W /  300W |       0MiB /  81920MiB |      0%      Default |
+|                                         |                        |             Disabled |
+-----------------------------------------+------------------------+----------------------+
+-----------------------------------------------------------------------------------------+
+| Processes:                                                                              |
+|  GPU   GI   CI        PID   Type   Process name                              GPU Memory |
+|        ID   ID                                                               Usage      |
+|=========================================================================================|
+|  No running processes found                                                             |
+-----------------------------------------------------------------------------------------+
+Use GPU 0,1,2,3,4,5,6,7
+PYTHON_EXECUTABLE=/workspace/S/zhangxiaoyun/miniconda3/envs/open_reasoner/bin/python3
+Wait 5 seconds ...
+Starting workers
--- a/openr/prm/infer_fns.py
+++ b/openr/prm/infer_fns.py
@@ -2,9 +2,13 @@ import torch
 from transformers import AutoTokenizer, AutoConfig
 from vllm import LLM, SamplingParams
 import re
+import os
 import json
 from envs.MATH import extract_answer, extract_groundtruth, judge_correct
 import random
+import multiprocessing
+lock = multiprocessing.Lock()
+print(id(lock), os.getpid())
 question_item_map = {}
 with open("./envs/MATH/dataset/test500.jsonl", encoding="utf-8") as f:
@@ -49,18 +53,29 @@ def _math_shepherd_infer_fn(input_str: str, model, tokenizer, device):
 @torch.inference_mode()
 def _qwen_math_gold_infer_fn(input_str: str, model, tokenizer, device, acc):
    STEP_TAG = '\n\n\n\n\n'
+    # print("-----------------------------")
+    # print(input_str)
-    pattern = r'<\|im_start\|>user\s*\n\s*(.*?)\s*<\|im_end\|>'
+    pattern = '<\|im_start\|>user\s*\n\s*(.*?)\s*<\|im_end\|>'
    match = re.search(pattern, input_str, re.DOTALL)
    assert match is not None, f"No match found for pattern: {pattern}"
    question = match.group(1)
    solution = question_item_map[question]["solution"]
    answer = question_item_map[question]["answer"]
+    input_str = input_str.replace('<\|im_start\|>user\n','').replace('<\|im_end\|>','')
+    # print("-----------------------------")
+    # print(input_str)
    steps = input_str.split(STEP_TAG)
    inputs = [steps[0]]
-    for i in range(len(steps)-1):
+    for i in range(len(steps)-2):
-        inputs.append(steps[i+1] + inputs[i])
+        inputs.append(inputs[i] + steps[i+1])
+    # print("-----------------------------")
+    # print("steps:", steps)
+    # print("-----------------------------")
+    # print("inputs:", inputs)
+    # print("-----------------------------")
    inference_params = {
        "temperature": 0.6,
@@ -71,6 +86,8 @@ def _qwen_math_gold_infer_fn(input_str: str, model, tokenizer, device, acc):
    }
    sampling_params = SamplingParams(**inference_params)
+    global lock
+    with lock:
        outputs = model.generate(inputs, sampling_params)
    step_scores = []

--- a/openr/reason/guided_search/tree.py
+++ b/openr/reason/guided_search/tree.py
@@ -679,7 +679,7 @@ class SearchTree:
            prms = reward_fn(
                [
                    (
-                        simulate_env.question,
+                        f'<|im_start|>user\n{simulate_env.question}<|im_end|>',
                        simulate_env.answer + x["action"],
                    )
                    for x in simulate_env.legal_actions

--- a/openr/reason/llm_service/create_service_qwen2.5_math_vllm_gold_prm.sh
+++ b/openr/reason/llm_service/create_service_qwen2.5_math_vllm_gold_prm.sh
@@ -13,9 +13,9 @@ CUDA_DEVICE_BASE=0
 POLICY_MODEL_NAME=Qwen2.5-Math-1.5B-Instruct
 # VALUE_MODEL_NAME=qwen_prm/checkpoint-6898/
 # VALUE_MODEL_NAME=Qwen/Qwen2.5-Math-7B-PRM
-VALUE_MODEL_NAME=$POLICY_MODEL_NAME
+VALUE_MODEL_NAME=${POLICY_MODEL_NAME}_RM
 MODEL_PATH=$MODEL_BASE/$POLICY_MODEL_NAME
-VALUE_MODEL_PATH=$MODEL_BASE/$VALUE_MODEL_NAME
+VALUE_MODEL_PATH=$MODEL_BASE/$POLICY_MODEL_NAME
 LOGDIR=logs_fastchat
@@ -24,8 +24,8 @@ tmux new-session -s FastChat -n controller -d
 tmux send-keys "export LOGDIR=${LOGDIR}" Enter
 tmux send-keys "$PYTHON_EXECUTABLE -m fastchat.serve.controller --port ${CONTROLER_PORT} --host $HOST_ADDR" Enter
-NUM_LM_WORKER=2
+NUM_LM_WORKER=1
-NUM_RM_WORKER=6
+NUM_RM_WORKER=7
 echo "Wait 5 seconds ..."
 sleep 5
@@ -46,5 +46,5 @@ do
  WORKER_PORT=$((i+WORKER_BASE_PORT+NUM_LM_WORKER))
  tmux new-window -n value_worker
  tmux send-keys "export LOGDIR=${LOGDIR}" Enter
-  tmux send-keys "CUDA_VISIBLE_DEVICES=$((i+NUM_LM_WORKER+CUDA_DEVICE_BASE)) $PYTHON_EXECUTABLE -m reason.llm_service.workers.gold_reward_model_worker --model-path $VALUE_MODEL_PATH --controller-address http://$HOST_ADDR:$CONTROLER_PORT --host $HOST_ADDR --port $WORKER_PORT --worker-address http://$HOST_ADDR:$WORKER_PORT --acc $ACC" Enter
+  tmux send-keys "CUDA_VISIBLE_DEVICES=$((i+NUM_LM_WORKER+CUDA_DEVICE_BASE)) $PYTHON_EXECUTABLE -m reason.llm_service.workers.gold_reward_model_worker --model-path $VALUE_MODEL_PATH --model-names $VALUE_MODEL_NAME --controller-address http://$HOST_ADDR:$CONTROLER_PORT --host $HOST_ADDR --port $WORKER_PORT --worker-address http://$HOST_ADDR:$WORKER_PORT --acc $ACC" Enter
 done
--- a/openr/reason/llm_service/workers/gold_reward_model_worker.py
+++ b/openr/reason/llm_service/workers/gold_reward_model_worker.py
@@ -111,8 +111,8 @@ class ModelWorker(BaseModelWorker):
        self.device = device
        if self.tokenizer.pad_token == None:
            self.tokenizer.pad_token = self.tokenizer.eos_token
-        self.context_len = get_context_length(self.model.config)
+        # self.context_len = get_context_length(self.model.config)
-        self.generate_stream_func = get_generate_stream_function(self.model, model_path)
+        # self.generate_stream_func = get_generate_stream_function(self.model, model_path)
        self.stream_interval = stream_interval
        self.embed_in_truncate = embed_in_truncate
        self.seed = seed
@@ -137,13 +137,13 @@ class ModelWorker(BaseModelWorker):
                # )
                # value = value_2
-                value = [self.infer_fn(s).tolist() for s in input_str]
+                value = [self.infer_fn(s) for s in input_str]
                # # verify two values
                # for v1, v2 in zip(value, value_2):
                #     assert torch.allclose(
                #         torch.tensor(v1), torch.tensor(v2), 1e-6), [v1, v2]
            else:
-                value = self.infer_fn(input_str).tolist()
+                value = self.infer_fn(input_str)
            ret = {"input": input_str, "value": value}
            gc.collect()
            torch.cuda.empty_cache()
@@ -206,6 +206,9 @@ def create_model_worker():
    args = parser.parse_args()
    logger.info(f"args: {args}")
+    if args.dtype is None:
+        args.dtype = "float16"
    if args.gpus:
        if len(args.gpus.split(",")) < args.num_gpus:
            raise ValueError(

--- a/openr/scripts/eval/beam_search.sh
+++ b/openr/scripts/eval/beam_search.sh
 python reason/evaluation/evaluate.py \
    --LM Qwen2.5-Math-1.5B-Instruct \
-    --RM checkpoint-6898 \
+    --RM Qwen2.5-Math-1.5B-Instruct_RM \
    --task_name MATH \
    --temperature 0.7 \
    --max_new_tokens 2048 \

--- a/results/README.md
+++ b/results/README.md
+![alt text](image.png)
\ No newline at end of file
--- a/results/image.png
+++ b/results/image.png