[misc] feat: enable grad ckpt as default and enable chunk prefill as default (#147)

9fca71d2 · Guangming Sheng · GitHub · 54603cbd · 9fca71d2 · 9fca71d2
Unverified Commit 9fca71d2 authored Jan 28, 2025 by Guangming Sheng Committed by GitHub Jan 28, 2025
Hide whitespace changes
Inline Side-by-side

Showing with 6 additions and 5 deletions

examples/ppo_trainer/run_qwen2-7b_rm.sh
+3 -2

verl/trainer/config/ppo_trainer.yaml
+3 -3

No files found.
--- a/examples/ppo_trainer/run_qwen2-7b_rm.sh
+++ b/examples/ppo_trainer/run_qwen2-7b_rm.sh
@@ -24,6 +24,7 @@ python3 -m verl.trainer.main_ppo \
    actor_rollout_ref.actor.optim.lr_warmup_steps_ratio=0.1 \
    actor_rollout_ref.actor.ppo_mini_batch_size=256 \
    actor_rollout_ref.actor.ppo_micro_batch_size_per_gpu=16 \
+    actor_rollout_ref.model.enable_gradient_checkpointing=True \
    actor_rollout_ref.actor.fsdp_config.param_offload=False \
    actor_rollout_ref.actor.fsdp_config.grad_offload=False \
    actor_rollout_ref.actor.fsdp_config.optimizer_offload=False \
@@ -37,8 +38,8 @@ python3 -m verl.trainer.main_ppo \
    critic.model.use_remove_padding=True \
    critic.optim.lr_warmup_steps_ratio=0.05 \
    critic.model.path=Qwen/Qwen2-7B-Instruct \
-    critic.model.enable_gradient_checkpointing=False \
+    critic.model.enable_gradient_checkpointing=True \
-    critic.ppo_micro_batch_size_per_gpu=16 \
+    critic.ppo_micro_batch_size_per_gpu=32 \
    critic.model.fsdp_config.param_offload=False \
    critic.model.fsdp_config.grad_offload=False \
    critic.model.fsdp_config.optimizer_offload=False \

--- a/verl/trainer/config/ppo_trainer.yaml
+++ b/verl/trainer/config/ppo_trainer.yaml
@@ -16,7 +16,7 @@ actor_rollout_ref:
    path: ~/models/deepseek-llm-7b-chat
    external_lib: null
    override_config: { }
-    enable_gradient_checkpointing: False
+    enable_gradient_checkpointing: True
    use_remove_padding: False
  actor:
    strategy: fsdp  # This is for backward-compatibility
@@ -81,7 +81,7 @@ actor_rollout_ref:
    log_prob_use_dynamic_bsz: ${actor_rollout_ref.actor.use_dynamic_bsz}
    log_prob_max_token_len_per_gpu: ${actor_rollout_ref.actor.ppo_max_token_len_per_gpu}
    disable_log_stats: True
-    enable_chunked_prefill: False # could get higher throughput
+    enable_chunked_prefill: True # could get higher throughput
    # for hf rollout
    do_sample: True
    # number of responses (i.e. num sample times)
@@ -100,7 +100,7 @@ critic:
    tokenizer_path: ${actor_rollout_ref.model.path}
    override_config: { }
    external_lib: ${actor_rollout_ref.model.external_lib}
-    enable_gradient_checkpointing: False
+    enable_gradient_checkpointing: True
    use_remove_padding: False
    fsdp_config:
      param_offload: False