Commit fa5864b0 by ZhangXiaoyun

assert

parent f9793a40
...@@ -80,9 +80,9 @@ ulimit -u 4125556 ...@@ -80,9 +80,9 @@ ulimit -u 4125556
cd /nfs_global/S/zhangxiaoyun/prm/openr cd /nfs_global/S/zhangxiaoyun/prm/openr
export PYTHONPATH=$(pwd) export PYTHONPATH=$(pwd)
bash reason/llm_service/create_service_qwen2.5_math_vllm_gold_prm_speed.sh --acc 1.0 --policy_model_name Qwen2.5-Math-1.5B-Instruct bash reason/llm_service/create_service_qwen2.5_math_vllm_gold_prm_speed.sh --acc 1.0 --policy_model_name Qwen2.5-Math-7B-Instruct
sleep 30s sleep 30s
bash scripts/eval/beam_search.sh --acc 1.0 --policy_model_name Qwen2.5-Math-1.5B-Instruct bash scripts/eval/beam_search.sh --acc 1.0 --policy_model_name Qwen2.5-Math-7B-Instruct
#- End #- End
echo "Job end at $(date "+%Y-%m-%d %H:%M:%S")" echo "Job end at $(date "+%Y-%m-%d %H:%M:%S")"
...@@ -81,8 +81,10 @@ def _qwen_math_gold_infer_fn(input_str: str, model, tokenizer, device, acc): ...@@ -81,8 +81,10 @@ def _qwen_math_gold_infer_fn(input_str: str, model, tokenizer, device, acc):
# print(input_str) # print(input_str)
steps = input_str.split(STEP_TAG) steps = input_str.split(STEP_TAG)
inputs = [steps[0]] inputs = [steps[0]]
for i in range(len(steps)-2): for i in range(1, len(steps)):
inputs.append(inputs[i] + steps[i+1]) if not steps[i].isspace():
inputs.append(inputs[1] + steps[i])
steps_num = len(inputs)
# print("-----------------------------") # print("-----------------------------")
# print("steps:", steps) # print("steps:", steps)
# print("-----------------------------") # print("-----------------------------")
...@@ -117,6 +119,7 @@ def _qwen_math_gold_infer_fn(input_str: str, model, tokenizer, device, acc): ...@@ -117,6 +119,7 @@ def _qwen_math_gold_infer_fn(input_str: str, model, tokenizer, device, acc):
# 如果前序是0,那就不用再rollout了 # 如果前序是0,那就不用再rollout了
if len(step_scores) != 0 and step_scores[-1] == 0: if len(step_scores) != 0 and step_scores[-1] == 0:
step_scores.extend([0] * (len(inputs) - len(step_scores))) step_scores.extend([0] * (len(inputs) - len(step_scores)))
assert steps_num == len(step_scores), f"{steps_num} != {len(step_scores)}"
return step_scores return step_scores
...@@ -143,6 +146,7 @@ def _qwen_math_gold_infer_fn(input_str: str, model, tokenizer, device, acc): ...@@ -143,6 +146,7 @@ def _qwen_math_gold_infer_fn(input_str: str, model, tokenizer, device, acc):
step_score = 1 - step_score step_score = 1 - step_score
step_scores.append(step_score) step_scores.append(step_score)
assert steps_num == len(step_scores), f"{steps_num} != {len(step_scores)}"
return step_scores return step_scores
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment