redis

1cbcbfc2 · ZhangXiaoyun · 708b9eee · 1cbcbfc2 · 1cbcbfc2 · 1cbcbfc2
Commit 1cbcbfc2 authored Mar 01, 2025 by ZhangXiaoyun
Showing with 25 additions and 10 deletions

openr/infer.sh
+3 -3

openr/reason/llm_service/create_service_qwen2.5_math_vllm_gold_prm_speed.sh
+9 -7

openr/reason/llm_service/workers/gold_reward_model_worker.py
+9 -0

openr/scripts/eval/beam_search.sh
+4 -0

No files found.
--- a/openr/infer.sh
+++ b/openr/infer.sh
@@ -80,9 +80,9 @@ ulimit -u 4125556
 cd /nfs_global/S/zhangxiaoyun/prm/openr
 export PYTHONPATH=$(pwd)

-bash reason/llm_service/create_service_qwen2.5_math_vllm_gold_prm.sh
-sleep 100s
-bash scripts/eval/beam_search.sh
+bash reason/llm_service/create_service_qwen2.5_math_vllm_gold_prm_speed.sh --acc 1.0 --policy_model_name Qwen2.5-Math-1.5B-Instruct
+sleep 30s
+bash scripts/eval/beam_search.sh --acc 1.0 --policy_model_name Qwen2.5-Math-1.5B-Instruct

 #- End
 echo "Job end at $(date "+%Y-%m-%d %H:%M:%S")"
--- a/openr/reason/llm_service/create_service_qwen2.5_math_vllm_gold_prm_speed.sh
+++ b/openr/reason/llm_service/create_service_qwen2.5_math_vllm_gold_prm_speed.sh
@@ -21,10 +21,14 @@ while [[ "$#" -gt 0 ]]; do
    shift
 done

+
 VALUE_MODEL_NAME=${POLICY_MODEL_NAME}_RM
 MODEL_PATH=$MODEL_BASE/$POLICY_MODEL_NAME
 VALUE_MODEL_PATH=$MODEL_BASE/$POLICY_MODEL_NAME

+echo "POLICY_MODEL_NAME is $POLICY_MODEL_NAME"
+echo "VALUE_MODEL_NAME is $VALUE_MODEL_NAME"
+
 echo PYTHON_EXECUTABLE=$(which python3)
 PYTHON_EXECUTABLE=$(which python3)

@@ -37,8 +41,8 @@ tmux new-session -s FastChat -n controller -d
 tmux send-keys "export LOGDIR=${LOGDIR}" Enter
 tmux send-keys "$PYTHON_EXECUTABLE -m fastchat.serve.controller --port ${CONTROLER_PORT} --host $HOST_ADDR" Enter

-NUM_LM_WORKER=2
-NUM_RM_WORKER=14
+NUM_LM_WORKER=1
+NUM_RM_WORKER=7

 echo "Wait 5 seconds ..."
 sleep 5
@@ -49,7 +53,7 @@ do
  WORKER_PORT=$((WORKER_BASE_PORT+i))
  tmux new-window -n policy_worker_$i
  tmux send-keys "export LOGDIR=${LOGDIR}" Enter
-  tmux send-keys "CUDA_VISIBLE_DEVICES=$((CUDA_DEVICE_BASE)) $PYTHON_EXECUTABLE -m reason.llm_service.workers.vllm_worker --model-path $MODEL_PATH --controller-address http://$HOST_ADDR:$CONTROLER_PORT --host $HOST_ADDR --port $WORKER_PORT --worker-address http://$HOST_ADDR:$WORKER_PORT --gpu_memory_utilization 0.45" Enter
+  tmux send-keys "CUDA_VISIBLE_DEVICES=$((i+CUDA_DEVICE_BASE)) $PYTHON_EXECUTABLE -m reason.llm_service.workers.vllm_worker --model-path $MODEL_PATH --controller-address http://$HOST_ADDR:$CONTROLER_PORT --host $HOST_ADDR --port $WORKER_PORT --worker-address http://$HOST_ADDR:$WORKER_PORT --gpu_memory_utilization 0.95" Enter
 done


@@ -57,11 +61,9 @@ done
 for i in $(seq 0 $((NUM_RM_WORKER-1)))
 do
  WORKER_PORT=$((i+WORKER_BASE_PORT+NUM_LM_WORKER))
-  GPU_ID=$(((i + NUM_LM_WORKER) / 2 + CUDA_DEVICE_BASE))
-
-  tmux new-window -n value_worker
+  tmux new-window -n value_worker_$i
  tmux send-keys "export LOGDIR=${LOGDIR}" Enter
-  tmux send-keys "CUDA_VISIBLE_DEVICES=$GPU_ID $PYTHON_EXECUTABLE -m reason.llm_service.workers.gold_reward_model_worker --model-path $VALUE_MODEL_PATH --model-names $VALUE_MODEL_NAME --controller-address http://$HOST_ADDR:$CONTROLER_PORT --host $HOST_ADDR --port $WORKER_PORT --worker-address http://$HOST_ADDR:$WORKER_PORT --gpu_memory_utilization 0.45 --acc $ACC" Enter
+  tmux send-keys "CUDA_VISIBLE_DEVICES=$((i+NUM_LM_WORKER+CUDA_DEVICE_BASE)) $PYTHON_EXECUTABLE -m reason.llm_service.workers.gold_reward_model_worker --model-path $VALUE_MODEL_PATH --model-names $VALUE_MODEL_NAME --controller-address http://$HOST_ADDR:$CONTROLER_PORT --host $HOST_ADDR --port $WORKER_PORT --worker-address http://$HOST_ADDR:$WORKER_PORT --gpu_memory_utilization 0.95 --acc $ACC" Enter
 done

 tmux send-keys "redis-server ~/redis_conf/redis.conf" Enter
--- a/openr/reason/llm_service/workers/gold_reward_model_worker.py
+++ b/openr/reason/llm_service/workers/gold_reward_model_worker.py
@@ -74,6 +74,7 @@ class ModelWorker(BaseModelWorker):
        embed_in_truncate: bool = False,
        seed: Optional[int] = None,
        debug: bool = False,
+        gpu_memory_utilization: float = 0.45,
        **kwargs,
    ):
        super().__init__(
@@ -92,6 +93,7 @@ class ModelWorker(BaseModelWorker):
            trust_remote_code=True,
            tensor_parallel_size=num_gpus,
            dtype=dtype,
+            gpu_memory_utilization=gpu_memory_utilization,
        )
        self.tokenizer = self.model.get_tokenizer()
        # self.model, self.tokenizer = load_model(
@@ -202,6 +204,12 @@ def create_model_worker():
        type=float,
        default=1.0,
        help="acc for prm",
+    ) 
+    parser.add_argument(
+        "--gpu_memory_utilization",
+        type=float,
+        default=0.45,
+        help="gpu memory utilization",
    )
    args = parser.parse_args()
    logger.info(f"args: {args}")
@@ -270,6 +278,7 @@ def create_model_worker():
        embed_in_truncate=args.embed_in_truncate,
        seed=args.seed,
        debug=args.debug,
+        gpu_memory_utilization=args.gpu_memory_utilization,
    )
    return args, worker


--- a/openr/scripts/eval/beam_search.sh
+++ b/openr/scripts/eval/beam_search.sh
@@ -13,9 +13,13 @@ while [[ "$#" -gt 0 ]]; do
    shift
 done

+
 VALUE_MODEL_NAME=${POLICY_MODEL_NAME}_RM
 SAVE_DIR="${POLICY_MODEL_NAME}/${ACC}"

+echo "POLICY_MODEL_NAME is $POLICY_MODEL_NAME"
+echo "VALUE_MODEL_NAME is $VALUE_MODEL_NAME"
+
 python reason/evaluation/evaluate.py \
    --LM $POLICY_MODEL_NAME \
    --RM $VALUE_MODEL_NAME \