Unverified Commit fefca417 by Chi Zhang Committed by GitHub

[perf] feat: support ref/rm offload (#121)

- Force ref/rm to use CPUOffload. Fix root FSDP unit not reshard weights
after forward
- HSDP support is on hold and assert False right now.
parent 2f1be790
...@@ -43,6 +43,7 @@ actor_rollout_ref: ...@@ -43,6 +43,7 @@ actor_rollout_ref:
param_offload: False param_offload: False
grad_offload: False grad_offload: False
optimizer_offload: False optimizer_offload: False
fsdp_size: -1
ref: ref:
fsdp_config: fsdp_config:
param_offload: False param_offload: False
...@@ -91,6 +92,7 @@ critic: ...@@ -91,6 +92,7 @@ critic:
enable_gradient_checkpointing: False enable_gradient_checkpointing: False
use_remove_padding: False use_remove_padding: False
fsdp_config: fsdp_config:
fsdp_size: -1
param_offload: False param_offload: False
grad_offload: False grad_offload: False
optimizer_offload: False optimizer_offload: False
...@@ -125,6 +127,7 @@ reward_model: ...@@ -125,6 +127,7 @@ reward_model:
use_remove_padding: False use_remove_padding: False
fsdp_config: fsdp_config:
min_num_params: 0 min_num_params: 0
fsdp_size: -1
micro_batch_size: 8 micro_batch_size: 8
max_length: null max_length: null
ulysses_sequence_parallel_size: 1 # sp size ulysses_sequence_parallel_size: 1 # sp size
......
...@@ -20,6 +20,7 @@ python3 -m verl.trainer.main_ppo \ ...@@ -20,6 +20,7 @@ python3 -m verl.trainer.main_ppo \
actor_rollout_ref.actor.fsdp_config.param_offload=False \ actor_rollout_ref.actor.fsdp_config.param_offload=False \
actor_rollout_ref.actor.fsdp_config.grad_offload=False \ actor_rollout_ref.actor.fsdp_config.grad_offload=False \
actor_rollout_ref.actor.fsdp_config.optimizer_offload=False \ actor_rollout_ref.actor.fsdp_config.optimizer_offload=False \
actor_rollout_ref.actor.fsdp_config.fsdp_size=-1 \
actor_rollout_ref.rollout.log_prob_micro_batch_size=128 \ actor_rollout_ref.rollout.log_prob_micro_batch_size=128 \
actor_rollout_ref.rollout.tensor_model_parallel_size=2 \ actor_rollout_ref.rollout.tensor_model_parallel_size=2 \
actor_rollout_ref.rollout.name=vllm \ actor_rollout_ref.rollout.name=vllm \
...@@ -36,6 +37,7 @@ python3 -m verl.trainer.main_ppo \ ...@@ -36,6 +37,7 @@ python3 -m verl.trainer.main_ppo \
critic.model.fsdp_config.param_offload=False \ critic.model.fsdp_config.param_offload=False \
critic.model.fsdp_config.grad_offload=False \ critic.model.fsdp_config.grad_offload=False \
critic.model.fsdp_config.optimizer_offload=False \ critic.model.fsdp_config.optimizer_offload=False \
critic.model.fsdp_config.fsdp_size=-1 \
reward_model.enable=True \ reward_model.enable=True \
reward_model.ulysses_sequence_parallel_size=2 \ reward_model.ulysses_sequence_parallel_size=2 \
reward_model.model.path=Qwen/Qwen2.5-0.5B\ reward_model.model.path=Qwen/Qwen2.5-0.5B\
......
...@@ -53,7 +53,6 @@ actor_rollout_ref: ...@@ -53,7 +53,6 @@ actor_rollout_ref:
wrap_policy: wrap_policy:
# transformer_layer_cls_to_wrap: None # transformer_layer_cls_to_wrap: None
min_num_params: 0 min_num_params: 0
fsdp_size: -1
log_prob_micro_batch_size: 128 log_prob_micro_batch_size: 128
log_prob_use_dynamic_bsz: ${actor_rollout_ref.actor.use_dynamic_bsz} log_prob_use_dynamic_bsz: ${actor_rollout_ref.actor.use_dynamic_bsz}
log_prob_max_token_len_per_gpu: ${actor_rollout_ref.actor.ppo_max_token_len_per_gpu} log_prob_max_token_len_per_gpu: ${actor_rollout_ref.actor.ppo_max_token_len_per_gpu}
...@@ -129,6 +128,7 @@ reward_model: ...@@ -129,6 +128,7 @@ reward_model:
fsdp_config: fsdp_config:
min_num_params: 0 min_num_params: 0
param_offload: False param_offload: False
fsdp_size: -1
micro_batch_size: 64 micro_batch_size: 64
max_length: null max_length: null
ulysses_sequence_parallel_size: 1 # sp size ulysses_sequence_parallel_size: 1 # sp size
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment