Commit 8d63ec21 by 苏舞仙

pad

parent e5f061dc
......@@ -1009,7 +1009,7 @@ class RayPPOTrainer(object):
new_batch = new_batch.union(gen_batch_output)
# unpad
new_batch = unpad_dataproto(new_batch, pad_size=pad_size)
new_batch = unpad_dataproto(new_batch, pad_size=pad_size * self.config.actor_rollout_ref.rollout.n)
with _timer('reward', timing_raw):
# compute scores. Support both model and function-based.
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment