Unverified Commit 9fca71d2 by Guangming Sheng Committed by GitHub

[misc] feat: enable grad ckpt as default and enable chunk prefill as default (#147)

parent 54603cbd
...@@ -24,6 +24,7 @@ python3 -m verl.trainer.main_ppo \ ...@@ -24,6 +24,7 @@ python3 -m verl.trainer.main_ppo \
actor_rollout_ref.actor.optim.lr_warmup_steps_ratio=0.1 \ actor_rollout_ref.actor.optim.lr_warmup_steps_ratio=0.1 \
actor_rollout_ref.actor.ppo_mini_batch_size=256 \ actor_rollout_ref.actor.ppo_mini_batch_size=256 \
actor_rollout_ref.actor.ppo_micro_batch_size_per_gpu=16 \ actor_rollout_ref.actor.ppo_micro_batch_size_per_gpu=16 \
actor_rollout_ref.model.enable_gradient_checkpointing=True \
actor_rollout_ref.actor.fsdp_config.param_offload=False \ actor_rollout_ref.actor.fsdp_config.param_offload=False \
actor_rollout_ref.actor.fsdp_config.grad_offload=False \ actor_rollout_ref.actor.fsdp_config.grad_offload=False \
actor_rollout_ref.actor.fsdp_config.optimizer_offload=False \ actor_rollout_ref.actor.fsdp_config.optimizer_offload=False \
...@@ -37,8 +38,8 @@ python3 -m verl.trainer.main_ppo \ ...@@ -37,8 +38,8 @@ python3 -m verl.trainer.main_ppo \
critic.model.use_remove_padding=True \ critic.model.use_remove_padding=True \
critic.optim.lr_warmup_steps_ratio=0.05 \ critic.optim.lr_warmup_steps_ratio=0.05 \
critic.model.path=Qwen/Qwen2-7B-Instruct \ critic.model.path=Qwen/Qwen2-7B-Instruct \
critic.model.enable_gradient_checkpointing=False \ critic.model.enable_gradient_checkpointing=True \
critic.ppo_micro_batch_size_per_gpu=16 \ critic.ppo_micro_batch_size_per_gpu=32 \
critic.model.fsdp_config.param_offload=False \ critic.model.fsdp_config.param_offload=False \
critic.model.fsdp_config.grad_offload=False \ critic.model.fsdp_config.grad_offload=False \
critic.model.fsdp_config.optimizer_offload=False \ critic.model.fsdp_config.optimizer_offload=False \
......
...@@ -16,7 +16,7 @@ actor_rollout_ref: ...@@ -16,7 +16,7 @@ actor_rollout_ref:
path: ~/models/deepseek-llm-7b-chat path: ~/models/deepseek-llm-7b-chat
external_lib: null external_lib: null
override_config: { } override_config: { }
enable_gradient_checkpointing: False enable_gradient_checkpointing: True
use_remove_padding: False use_remove_padding: False
actor: actor:
strategy: fsdp # This is for backward-compatibility strategy: fsdp # This is for backward-compatibility
...@@ -81,7 +81,7 @@ actor_rollout_ref: ...@@ -81,7 +81,7 @@ actor_rollout_ref:
log_prob_use_dynamic_bsz: ${actor_rollout_ref.actor.use_dynamic_bsz} log_prob_use_dynamic_bsz: ${actor_rollout_ref.actor.use_dynamic_bsz}
log_prob_max_token_len_per_gpu: ${actor_rollout_ref.actor.ppo_max_token_len_per_gpu} log_prob_max_token_len_per_gpu: ${actor_rollout_ref.actor.ppo_max_token_len_per_gpu}
disable_log_stats: True disable_log_stats: True
enable_chunked_prefill: False # could get higher throughput enable_chunked_prefill: True # could get higher throughput
# for hf rollout # for hf rollout
do_sample: True do_sample: True
# number of responses (i.e. num sample times) # number of responses (i.e. num sample times)
...@@ -100,7 +100,7 @@ critic: ...@@ -100,7 +100,7 @@ critic:
tokenizer_path: ${actor_rollout_ref.model.path} tokenizer_path: ${actor_rollout_ref.model.path}
override_config: { } override_config: { }
external_lib: ${actor_rollout_ref.model.external_lib} external_lib: ${actor_rollout_ref.model.external_lib}
enable_gradient_checkpointing: False enable_gradient_checkpointing: True
use_remove_padding: False use_remove_padding: False
fsdp_config: fsdp_config:
param_offload: False param_offload: False
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment