Unverified Commit 62e23aee by Guangming Sheng Committed by GitHub

[ci] fix: delete ckpts after saving in github runner (#272)

- As titled
parent ec82e9e5
......@@ -46,6 +46,11 @@ jobs:
run: |
ray stop --force
bash tests/e2e/run_qwen_gsm8k_function_rm.sh
- name: Running gsm8k e2e without rmpad using function rm and load ckpt from previous step
run: |
ray stop --force
bash tests/e2e/run_qwen_gsm8k_function_rm_no_rmpad.sh
rm -rf ~/ckpt/*
- name: Running gsm8k e2e training tests on 8 L20 GPUs with rmpad using function rm (GRPO)
run: |
ray stop --force
......@@ -54,10 +59,6 @@ jobs:
run: |
ray stop --force
bash tests/e2e/run_qwen_gsm8k_function_rm_remax.sh
- name: Running gsm8k e2e without rmpad using function rm and load ckpt from previous step
run: |
ray stop --force
bash tests/e2e/run_qwen_gsm8k_function_rm_no_rmpad.sh
- name: Running gsm8k e2e with rmpad using model rm
run: |
ray stop --force
......
......@@ -45,4 +45,5 @@ jobs:
- name: Running gsm8k e2e training tests with LoRA
run: |
ray stop --force
bash tests/sft/run_sft_qwen05_peft.sh 8 $HOME/ckpts/
\ No newline at end of file
bash tests/sft/run_sft_qwen05_peft.sh 8 $HOME/ckpts/
rm -rf $HOME/ckpts/*
\ No newline at end of file
......@@ -58,3 +58,4 @@ jobs:
run: |
ray stop --force
bash tests/sft/run_sft_qwen05_sp2_liger.sh 8 $HOME/ckpts/
rm -rf $HOME/ckpts/
......@@ -39,4 +39,5 @@ python3 -m verl.trainer.main_ppo \
trainer.n_gpus_per_node=8 \
trainer.nnodes=1 \
trainer.save_freq=1 \
trainer.default_local_dir=$HOME/ckpt/ \
trainer.total_training_steps=1 $@
......@@ -415,15 +415,15 @@ class ActorRolloutRefWorker(MegatronWorker):
return output
@register(dispatch_mode=Dispatch.ONE_TO_ALL)
def load_checkpoint(self, checkpoint_path):
def load_checkpoint(self, checkpoint_path, **kwargs):
pass
@register(dispatch_mode=Dispatch.ONE_TO_ALL)
def load_pretrained_model(self, checkpoint_path):
def load_pretrained_model(self, checkpoint_path, **kwargs):
pass
@register(dispatch_mode=Dispatch.ONE_TO_ALL)
def save_checkpoint(self, checkpoint_path):
def save_checkpoint(self, checkpoint_path, **kwargs):
assert self._is_actor
pass
......@@ -590,11 +590,11 @@ class CriticWorker(MegatronWorker):
return output
@register(dispatch_mode=Dispatch.ONE_TO_ALL)
def load_checkpoint(self, checkpoint_path):
def load_checkpoint(self, checkpoint_path, **kwargs):
pass
@register(dispatch_mode=Dispatch.ONE_TO_ALL)
def save_checkpoint(self, checkpoint_path):
def save_checkpoint(self, checkpoint_path, **kwargs):
pass
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment