assert

fa5864b0 · ZhangXiaoyun · f9793a40 · fa5864b0 · fa5864b0
Commit fa5864b0 authored Mar 01, 2025 by ZhangXiaoyun
Show whitespace changes
Inline Side-by-side

Showing with 8 additions and 4 deletions

openr/infer.sh
+2 -2

openr/prm/infer_fns.py
+6 -2

No files found.
--- a/openr/infer.sh
+++ b/openr/infer.sh
@@ -80,9 +80,9 @@ ulimit -u 4125556
 cd /nfs_global/S/zhangxiaoyun/prm/openr
 export PYTHONPATH=$(pwd)

-bash reason/llm_service/create_service_qwen2.5_math_vllm_gold_prm_speed.sh --acc 1.0 --policy_model_name Qwen2.5-Math-1.5B-Instruct
+bash reason/llm_service/create_service_qwen2.5_math_vllm_gold_prm_speed.sh --acc 1.0 --policy_model_name Qwen2.5-Math-7B-Instruct
 sleep 30s
-bash scripts/eval/beam_search.sh --acc 1.0 --policy_model_name Qwen2.5-Math-1.5B-Instruct
+bash scripts/eval/beam_search.sh --acc 1.0 --policy_model_name Qwen2.5-Math-7B-Instruct

 #- End
 echo "Job end at $(date "+%Y-%m-%d %H:%M:%S")"
--- a/openr/prm/infer_fns.py
+++ b/openr/prm/infer_fns.py
@@ -81,8 +81,10 @@ def _qwen_math_gold_infer_fn(input_str: str, model, tokenizer, device, acc):
    # print(input_str)
    steps = input_str.split(STEP_TAG)
    inputs = [steps[0]]
-    for i in range(len(steps)-2):
-        inputs.append(inputs[i] + steps[i+1])
+    for i in range(1, len(steps)):
+        if not steps[i].isspace():
+            inputs.append(inputs[1] + steps[i])
+    steps_num = len(inputs)
    # print("-----------------------------")
    # print("steps:", steps)
    # print("-----------------------------")
@@ -117,6 +119,7 @@ def _qwen_math_gold_infer_fn(input_str: str, model, tokenizer, device, acc):
    # 如果前序是0，那就不用再rollout了
    if len(step_scores) != 0 and step_scores[-1] == 0:
        step_scores.extend([0] * (len(inputs) - len(step_scores)))
+        assert steps_num == len(step_scores), f"{steps_num} != {len(step_scores)}"
        return step_scores
        

@@ -143,6 +146,7 @@ def _qwen_math_gold_infer_fn(input_str: str, model, tokenizer, device, acc):
            step_score = 1 - step_score
        step_scores.append(step_score)

+    assert steps_num == len(step_scores), f"{steps_num} != {len(step_scores)}"
    return step_scores