Unverified Commit a65c9157 by Guangming Sheng Committed by GitHub

[misc] fix: grpo kl loss should be add when do minimization (#179)

- As titled
parent 38ac5255
......@@ -263,7 +263,7 @@ class DataParallelPPOActor(BasePPOActor):
kl_penalty=self.config.kl_loss_type)
kl_loss = masked_mean(kld, response_mask)
policy_loss = policy_loss - kl_loss * self.config.kl_loss_coef
policy_loss = policy_loss + kl_loss * self.config.kl_loss_coef
metrics['actor/kl_loss'] = kl_loss.detach().item()
metrics['actor/kl_coef'] = self.config.kl_loss_coef
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment