Commit e87d0923 by Shi wenxuan

feat: new metrics

parent f2982c41
......@@ -95,7 +95,7 @@ python3 -m verl.trainer.main_ppo \
actor_rollout_ref.rollout.tensor_model_parallel_size=4 \
actor_rollout_ref.rollout.name=vllm \
actor_rollout_ref.rollout.n=8 \
actor_rollout_ref.rollout.val_kwargs.n=2 \
actor_rollout_ref.rollout.val_kwargs.n=10 \
actor_rollout_ref.rollout.temperature=1.0 \
actor_rollout_ref.rollout.val_kwargs.temperature=1.0 \
actor_rollout_ref.rollout.val_kwargs.do_sample=True \
......@@ -123,7 +123,7 @@ python3 -m verl.trainer.main_ppo \
trainer.resume_mode=auto \
trainer.default_hdfs_dir=null \
trainer.save_freq=20 \
trainer.test_freq=20 \
trainer.test_freq=2 \
trainer.total_epochs=100 "${@:1}"
......
This source diff could not be displayed because it is too large. You can view the blob instead.
#!/bin/bash
set -x
set -euxo pipefail
project_name='DAPO'
exp_name='DAPO-Early-Qwen2.5-32B'
adv_estimator=grpo
kl_coef=0.0
kl_loss_coef=0.0
clip_ratio_low=0.2
clip_ratio_high=0.28
enable_overlong_buffer=True
overlong_buffer_len=$((1024 * 1))
overlong_penalty_factor=1.0
# An early version for DAPO
enable_filter_groups=False
gen_prompt_bsz=512 # NOTE: no filtering here
train_prompt_bsz=512
train_prompt_mini_bsz=32
n_resp_per_prompt=16
use_token_level_loss=False
# Ray
RAY_ADDRESS=${RAY_ADDRESS:-"http://localhost:8265"}
WORKING_DIR=${WORKING_DIR:-"${PWD}"}
RUNTIME_ENV=${RUNTIME_ENV:-"${WORKING_DIR}/verl/trainer/runtime_env.yaml"}
NNODES=${NNODES:-16}
# Paths
# Algorithm
## Train
max_prompt_length=$((1024 * 2))
max_response_length=$((1024 * 20))
## Validation
val_top_k=-1 # 0 for HF rollout, -1 for vLLM rollout
# Performance Related Parameter
sp_size=8
use_dynamic_bsz=True
actor_ppo_max_token_len=$((max_prompt_length + max_response_length))
infer_ppo_max_token_len=$((max_prompt_length + max_response_length))
offload=True
gen_tp=4
export VLLM_USE_V1=1
echo "$WANDB_DIR"
echo "$SAVE_DIR"
echo "$WANDB_API_KEY"
# Set default model path if not provided
MODEL_PATH="/share/collab/codemodel/models/Qwen2.5-Coder-7B-Instruct"
# Train over a single node, 8 A100-80GB GPUs.
python3 -m verl.trainer.main_ppo \
algorithm.adv_estimator=grpo \
data.train_files=/nfs_global/S/zhuyaoyu/projects/verl/data/codev/v1/1.6k/train.parquet \
data.val_files=/nfs_global/S/zhuyaoyu/projects/verl/data/codev/v1/1.6k/test.parquet \
data.train_batch_size=128 \
data.val_batch_size=512 \
data.max_prompt_length=1024 \
data.max_response_length=3072 \
actor_rollout_ref.model.path=$MODEL_PATH \
+actor_rollout_ref.model.override_config.attention_dropout=0. \
+actor_rollout_ref.model.override_config.embd_pdrop=0. \
+actor_rollout_ref.model.override_config.resid_pdrop=0. \
actor_rollout_ref.model.enable_gradient_checkpointing=True \
actor_rollout_ref.actor.optim.lr=1e-6 \
actor_rollout_ref.actor.optim.weight_decay=0.0 \
actor_rollout_ref.actor.use_dynamic_bsz=True\
actor_rollout_ref.actor.ppo_max_token_len_per_gpu=12000 \
actor_rollout_ref.model.use_remove_padding=True \
actor_rollout_ref.actor.clip_ratio_low=${clip_ratio_low} \
actor_rollout_ref.actor.clip_ratio_high=${clip_ratio_high} \
actor_rollout_ref.actor.ppo_mini_batch_size=64 \
actor_rollout_ref.actor.use_kl_loss=True \
actor_rollout_ref.actor.kl_loss_coef=0.00 \
actor_rollout_ref.actor.kl_loss_type=low_var_kl \
actor_rollout_ref.actor.entropy_coeff=0 \
actor_rollout_ref.actor.grad_clip=0.5 \
actor_rollout_ref.actor.use_token_level_loss=${use_token_level_loss} \
actor_rollout_ref.model.enable_gradient_checkpointing=True \
actor_rollout_ref.actor.fsdp_config.param_offload=False \
actor_rollout_ref.actor.fsdp_config.optimizer_offload=False \
actor_rollout_ref.rollout.log_prob_max_token_len_per_gpu=24000 \
actor_rollout_ref.rollout.tensor_model_parallel_size=4 \
actor_rollout_ref.rollout.name=vllm \
actor_rollout_ref.rollout.n=8 \
actor_rollout_ref.rollout.val_kwargs.n=10 \
actor_rollout_ref.rollout.temperature=1.0 \
actor_rollout_ref.rollout.val_kwargs.temperature=1.0 \
actor_rollout_ref.rollout.val_kwargs.do_sample=True \
actor_rollout_ref.rollout.gpu_memory_utilization=0.7 \
actor_rollout_ref.rollout.enforce_eager=False \
actor_rollout_ref.rollout.free_cache_engine=False \
reward_model.reward_manager=prime \
actor_rollout_ref.ref.fsdp_config.param_offload=True \
custom_reward_function.overlong_buffer.enable=${enable_overlong_buffer} \
custom_reward_function.overlong_buffer.len=${overlong_buffer_len} \
custom_reward_function.overlong_buffer.penalty_factor=${overlong_penalty_factor} \
custom_reward_function.path=verl/utils/reward_score/codev.py \
custom_reward_function.name=compute_score_wrapper \
custom_reward_function.continuous_reward.enable=True \
custom_reward_function.continuous_reward.error_ratio_threshold=0.5 \
algorithm.kl_ctrl.kl_coef=0.0 \
trainer.critic_warmup=0 \
trainer.logger=['console','wandb'] \
trainer.project_name='codev' \
trainer.experiment_name='codev-7b-16k' \
trainer.n_gpus_per_node=$USER_GPUS_PER_NODE \
trainer.nnodes=$SLURM_JOB_NUM_NODES \
+trainer.val_before_train=False \
trainer.default_local_dir=$SAVE_DIR \
trainer.resume_mode=auto \
trainer.default_hdfs_dir=null \
trainer.save_freq=20 \
trainer.test_freq=2 \
trainer.total_epochs=100 "${@:1}"
# custom_reward_function.path=/nfs_global/S/zhuyaoyu/projects/dapo/verl/utils/reward_score/codev.py \
\ No newline at end of file
WARNING: Did not unuse /usr/share/Modules/modulefiles
No Modulefiles Currently Loaded.
Currently Loaded Modulefiles:
1) cluster-tools/v1.0 3) gcc/9.3.0
2) slurm-tools/v1.0 4) cuda-cudnn/11.8-8.8.1
/usr/bin/which: no python in (/tools/cluster-software/cuda-cudnn/cuda-11.8.0-8.8.1/bin:/tools/cluster-software/gcc/gcc-9.3.0/bin:/tools/cluster-software/slurm-tools/slurm-tools-v1.0/bin:/tools/cluster-software/cluster-tools/cluster-tools-v1.0/bin:/home/S/wuyt/.elan/bin:/home/S/wuyt/.cargo/bin:/home/S/wuyt/nfs_global/anaconda3/envs/deepscaler/bin:/home/S/wuyt/lustre/anaconda3/condabin:/home/S/wuyt/.local/bin:/home/S/wuyt/bin:/usr/share/Modules/bin:/usr/local/bin:/usr/bin:/usr/local/sbin:/usr/sbin:/nfs_global/S/wuyt/.local/bin:/nfs_global/S/wuyt/wuyt/git-lfs/git-lfs-3.2.0)
Currently Loaded Modulefiles:
1) cluster-tools/v1.0 3) gcc/9.3.0
2) slurm-tools/v1.0 4) cuda-cudnn/11.8-8.8.1
/usr/bin/which: no python in (/tools/cluster-software/cuda-cudnn/cuda-11.8.0-8.8.1/bin:/tools/cluster-software/gcc/gcc-9.3.0/bin:/tools/cluster-software/slurm-tools/slurm-tools-v1.0/bin:/tools/cluster-software/cluster-tools/cluster-tools-v1.0/bin:/home/S/wuyt/.elan/bin:/home/S/wuyt/.cargo/bin:/home/S/wuyt/nfs_global/anaconda3/envs/deepscaler/bin:/home/S/wuyt/lustre/anaconda3/condabin:/home/S/wuyt/.local/bin:/home/S/wuyt/bin:/usr/share/Modules/bin:/usr/local/bin:/usr/bin:/usr/local/sbin:/usr/sbin:/nfs_global/S/wuyt/.local/bin:/nfs_global/S/wuyt/wuyt/git-lfs/git-lfs-3.2.0)
wandb: Appending key for api.wandb.ai to your netrc file: /home/S/wuyt/.netrc
wandb: W&B API key is configured. Use `wandb login --relogin` to force relogin
wandb: Appending key for api.wandb.ai to your netrc file: /home/S/wuyt/.netrc
wandb: W&B API key is configured. Use `wandb login --relogin` to force relogin
[2025-04-01 10:04:11,489 W 634026 634026] global_state_accessor.cc:429: Retrying to get node with node ID 2ab9de94a7185bdf0132a2fa88197f4972316281e3d52a2ad135506d
2025-04-01 10:04:20,398 INFO dashboard_sdk.py:338 -- Uploading package gcs://_ray_pkg_443717d5f8aeb41f.zip.
2025-04-01 10:04:20,398 INFO packaging.py:575 -- Creating a file package for local module '.'.
train-multigpu.sh: line 220: 728523 Terminated copy_log_and_plot
chmod: changing permissions of '../tmp': Operation not permitted
cp: cannot create special file '../tmp/ray_wuyt/session_latest/sockets/plasma_store': File exists
cp: cannot create special file '../tmp/ray_wuyt/session_latest/sockets/raylet': File exists
/var/log/atop/atop_20250401 - stat raw file: No such file or directory
This source diff could not be displayed because it is too large. You can view the blob instead.
WARNING: Did not unuse /usr/share/Modules/modulefiles
No Modulefiles Currently Loaded.
Currently Loaded Modulefiles:
1) cluster-tools/v1.0 3) gcc/9.3.0
2) slurm-tools/v1.0 4) cuda-cudnn/11.8-8.8.1
Currently Loaded Modulefiles:
1) cluster-tools/v1.0 3) gcc/9.3.0
2) slurm-tools/v1.0 4) cuda-cudnn/11.8-8.8.1
wandb: Appending key for api.wandb.ai to your netrc file: /home/S/wuyt/.netrc
wandb: Appending key for api.wandb.ai to your netrc file: /home/S/wuyt/.netrc
wandb: W&B API key is configured. Use `wandb login --relogin` to force relogin
wandb: W&B API key is configured. Use `wandb login --relogin` to force relogin
[2025-04-02 01:06:35,166 W 3115729 3115729] global_state_accessor.cc:429: Retrying to get node with node ID 736407c8bd15a81bfe135e9345be09f72e478a7f772f20455efb29e7
2025-04-02 01:06:45,530 INFO dashboard_sdk.py:338 -- Uploading package gcs://_ray_pkg_55d2a6863b4a199c.zip.
2025-04-02 01:06:45,531 INFO packaging.py:575 -- Creating a file package for local module '.'.
train-multigpu.sh: line 223: 595967 Terminated copy_log_and_plot
Traceback (most recent call last):
File "/nfs_global/S/zhuyaoyu/projects/verl/plot_and_analyze/plot.py", line 298, in <module>
plot_data(args.folder, no_ratio=args.no_ratio)
File "/nfs_global/S/zhuyaoyu/projects/verl/plot_and_analyze/plot.py", line 282, in plot_data
plot_different_accuracy_ratio(folder)
File "/nfs_global/S/zhuyaoyu/projects/verl/plot_and_analyze/plot.py", line 127, in plot_different_accuracy_ratio
df = pd.read_csv(csv_file_path)
File "/workspace/S/zhuyaoyu/softwares/miniconda3/envs/verl/lib/python3.10/site-packages/pandas/io/parsers/readers.py", line 1026, in read_csv
return _read(filepath_or_buffer, kwds)
File "/workspace/S/zhuyaoyu/softwares/miniconda3/envs/verl/lib/python3.10/site-packages/pandas/io/parsers/readers.py", line 620, in _read
parser = TextFileReader(filepath_or_buffer, **kwds)
File "/workspace/S/zhuyaoyu/softwares/miniconda3/envs/verl/lib/python3.10/site-packages/pandas/io/parsers/readers.py", line 1620, in __init__
self._engine = self._make_engine(f, self.engine)
File "/workspace/S/zhuyaoyu/softwares/miniconda3/envs/verl/lib/python3.10/site-packages/pandas/io/parsers/readers.py", line 1898, in _make_engine
return mapping[engine](f, **self.options)
File "/workspace/S/zhuyaoyu/softwares/miniconda3/envs/verl/lib/python3.10/site-packages/pandas/io/parsers/c_parser_wrapper.py", line 93, in __init__
self._reader = parsers.TextReader(src, **kwds)
File "parsers.pyx", line 581, in pandas._libs.parsers.TextReader.__cinit__
pandas.errors.EmptyDataError: No columns to parse from file
chmod: changing permissions of '../tmp': Operation not permitted
cp: cannot create special file '../tmp/ray_wuyt/session_latest/sockets/plasma_store': File exists
cp: cannot create special file '../tmp/ray_wuyt/session_latest/sockets/raylet': File exists
/var/log/atop/atop_20250402 - stat raw file: No such file or directory
This source diff could not be displayed because it is too large. You can view the blob instead.
WARNING: Did not unuse /usr/share/Modules/modulefiles
No Modulefiles Currently Loaded.
Currently Loaded Modulefiles:
1) cluster-tools/v1.0 3) gcc/9.3.0
2) slurm-tools/v1.0 4) cuda-cudnn/11.8-8.8.1
wandb: Appending key for api.wandb.ai to your netrc file: /home/S/wuyt/.netrc
wandb: W&B API key is configured. Use `wandb login --relogin` to force relogin
2025-04-02 01:14:14,510 INFO dashboard_sdk.py:338 -- Uploading package gcs://_ray_pkg_55d2a6863b4a199c.zip.
2025-04-02 01:14:14,510 INFO packaging.py:575 -- Creating a file package for local module '.'.
Traceback (most recent call last):
File "/workspace/S/zhuyaoyu/softwares/miniconda3/envs/verl/lib/python3.10/site-packages/pandas/core/indexes/base.py", line 3805, in get_loc
return self._engine.get_loc(casted_key)
File "index.pyx", line 167, in pandas._libs.index.IndexEngine.get_loc
File "index.pyx", line 196, in pandas._libs.index.IndexEngine.get_loc
File "pandas/_libs/hashtable_class_helper.pxi", line 7081, in pandas._libs.hashtable.PyObjectHashTable.get_item
File "pandas/_libs/hashtable_class_helper.pxi", line 7089, in pandas._libs.hashtable.PyObjectHashTable.get_item
KeyError: 'reward/correct_0%_ratio'
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "/nfs_global/S/zhuyaoyu/projects/verl/plot_and_analyze/plot.py", line 298, in <module>
plot_data(args.folder, no_ratio=args.no_ratio)
File "/nfs_global/S/zhuyaoyu/projects/verl/plot_and_analyze/plot.py", line 282, in plot_data
plot_different_accuracy_ratio(folder)
File "/nfs_global/S/zhuyaoyu/projects/verl/plot_and_analyze/plot.py", line 131, in plot_different_accuracy_ratio
df[f'{col}_smoothed'] = smooth_data(df[col])
File "/workspace/S/zhuyaoyu/softwares/miniconda3/envs/verl/lib/python3.10/site-packages/pandas/core/frame.py", line 4102, in __getitem__
indexer = self.columns.get_loc(key)
File "/workspace/S/zhuyaoyu/softwares/miniconda3/envs/verl/lib/python3.10/site-packages/pandas/core/indexes/base.py", line 3812, in get_loc
raise KeyError(key) from err
KeyError: 'reward/correct_0%_ratio'
Traceback (most recent call last):
File "/workspace/S/zhuyaoyu/softwares/miniconda3/envs/verl/lib/python3.10/site-packages/pandas/core/indexes/base.py", line 3805, in get_loc
return self._engine.get_loc(casted_key)
File "index.pyx", line 167, in pandas._libs.index.IndexEngine.get_loc
File "index.pyx", line 196, in pandas._libs.index.IndexEngine.get_loc
File "pandas/_libs/hashtable_class_helper.pxi", line 7081, in pandas._libs.hashtable.PyObjectHashTable.get_item
File "pandas/_libs/hashtable_class_helper.pxi", line 7089, in pandas._libs.hashtable.PyObjectHashTable.get_item
KeyError: 'reward/correct_0%_ratio'
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "/nfs_global/S/zhuyaoyu/projects/verl/plot_and_analyze/plot.py", line 298, in <module>
plot_data(args.folder, no_ratio=args.no_ratio)
File "/nfs_global/S/zhuyaoyu/projects/verl/plot_and_analyze/plot.py", line 282, in plot_data
plot_different_accuracy_ratio(folder)
File "/nfs_global/S/zhuyaoyu/projects/verl/plot_and_analyze/plot.py", line 131, in plot_different_accuracy_ratio
df[f'{col}_smoothed'] = smooth_data(df[col])
File "/workspace/S/zhuyaoyu/softwares/miniconda3/envs/verl/lib/python3.10/site-packages/pandas/core/frame.py", line 4102, in __getitem__
indexer = self.columns.get_loc(key)
File "/workspace/S/zhuyaoyu/softwares/miniconda3/envs/verl/lib/python3.10/site-packages/pandas/core/indexes/base.py", line 3812, in get_loc
raise KeyError(key) from err
KeyError: 'reward/correct_0%_ratio'
train-multigpu.sh: line 223: 618003 Terminated copy_log_and_plot
Traceback (most recent call last):
File "/nfs_global/S/zhuyaoyu/projects/verl/plot_and_analyze/plot.py", line 298, in <module>
plot_data(args.folder, no_ratio=args.no_ratio)
File "/nfs_global/S/zhuyaoyu/projects/verl/plot_and_analyze/plot.py", line 282, in plot_data
plot_different_accuracy_ratio(folder)
File "/nfs_global/S/zhuyaoyu/projects/verl/plot_and_analyze/plot.py", line 131, in plot_different_accuracy_ratio
df[f'{col}_smoothed'] = smooth_data(df[col])
File "/nfs_global/S/zhuyaoyu/projects/verl/plot_and_analyze/plot.py", line 44, in smooth_data
return data.rolling(window=window_size, min_periods=1).mean()
File "/workspace/S/zhuyaoyu/softwares/miniconda3/envs/verl/lib/python3.10/site-packages/pandas/core/generic.py", line 12580, in rolling
return Rolling(
File "/workspace/S/zhuyaoyu/softwares/miniconda3/envs/verl/lib/python3.10/site-packages/pandas/core/window/rolling.py", line 170, in __init__
self._validate()
File "/workspace/S/zhuyaoyu/softwares/miniconda3/envs/verl/lib/python3.10/site-packages/pandas/core/window/rolling.py", line 1869, in _validate
super()._validate()
File "/workspace/S/zhuyaoyu/softwares/miniconda3/envs/verl/lib/python3.10/site-packages/pandas/core/window/rolling.py", line 181, in _validate
raise ValueError(
ValueError: min_periods 1 must be <= window 0
chmod: changing permissions of '../tmp': Operation not permitted
cp: cannot create special file '../tmp/ray_wuyt/session_latest/sockets/plasma_store': File exists
cp: cannot create special file '../tmp/ray_wuyt/session_latest/sockets/raylet': File exists
/var/log/atop/atop_20250402 - stat raw file: No such file or directory
This source diff could not be displayed because it is too large. You can view the blob instead.
WARNING: Did not unuse /usr/share/Modules/modulefiles
No Modulefiles Currently Loaded.
Currently Loaded Modulefiles:
1) cluster-tools/v1.0 3) gcc/9.3.0
2) slurm-tools/v1.0 4) cuda-cudnn/11.8-8.8.1
Currently Loaded Modulefiles:
1) cluster-tools/v1.0 3) gcc/9.3.0
2) slurm-tools/v1.0 4) cuda-cudnn/11.8-8.8.1
wandb: Appending key for api.wandb.ai to your netrc file: /home/S/wuyt/.netrc
wandb: W&B API key is configured. Use `wandb login --relogin` to force relogin
wandb: Appending key for api.wandb.ai to your netrc file: /home/S/wuyt/.netrc
wandb: W&B API key is configured. Use `wandb login --relogin` to force relogin
[2025-04-02 11:51:05,525 W 3127793 3127793] global_state_accessor.cc:429: Retrying to get node with node ID b26f7a02826b437d1ff5aec846a4896c59f24a33b9b656ec3cd16f56
2025-04-02 11:51:16,404 INFO dashboard_sdk.py:338 -- Uploading package gcs://_ray_pkg_75d347a5920743dc.zip.
2025-04-02 11:51:16,404 INFO packaging.py:575 -- Creating a file package for local module '.'.
train-multigpu.sh: line 223: 701843 Terminated copy_log_and_plot
chmod: changing permissions of '../tmp': Operation not permitted
cp: cannot create special file '../tmp/ray_wuyt/session_latest/sockets/plasma_store': File exists
cp: cannot create special file '../tmp/ray_wuyt/session_latest/sockets/raylet': File exists
/var/log/atop/atop_20250402 - stat raw file: No such file or directory
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
Currently Loaded Modulefiles:
1) git/2.31.1 2) gcc/9.3.0 3) cmake/3.21.7
Currently Loaded Modulefiles:
1) git/2.31.1 3) cmake/3.21.7 5) slurm-tools/v1.0
2) gcc/9.3.0 4) cluster-tools/v1.0 6) cuda-cudnn/12.1-8.9.3
Job start at 2025-04-04 08:51:30
Job run at:
Static hostname: localhost.localdomain
Transient hostname: r8l40-a00.ib.future.cn
Icon name: computer-server
Chassis: server
Machine ID: 5a5f22d1ca484ec4bb0c3310c788be8b
Boot ID: 870c9831f3b64f2ca8b3258b37fb8613
Operating System: Rocky Linux 8.7 (Green Obsidian)
CPE OS Name: cpe:/o:rocky:rocky:8:GA
Kernel: Linux 4.18.0-425.10.1.el8_7.x86_64
Architecture: x86-64
Filesystem Size Used Avail Use% Mounted on
/dev/mapper/rl-root 376G 18G 358G 5% /
/dev/nvme4n1p1 3.5T 25G 3.5T 1% /local
/dev/nvme2n1p1 3.5T 29G 3.5T 1% /tmp
/dev/mapper/rl-var 512G 9.9G 502G 2% /var
/dev/nvme0n1p2 2.0G 366M 1.7G 18% /boot
/dev/nvme1n1p1 3.5T 43G 3.5T 2% /local/nfscache
/dev/nvme0n1p1 599M 5.8M 594M 1% /boot/efi
ssd.nas00.future.cn:/rocky8_home 16G 3.3G 13G 21% /home
ssd.nas00.future.cn:/rocky8_workspace 400G 239G 162G 60% /workspace
ssd.nas00.future.cn:/rocky8_tools 5.0T 75G 5.0T 2% /tools
ssd.nas00.future.cn:/centos7_home 16G 7.6G 8.5G 47% /centos7/home
ssd.nas00.future.cn:/centos7_workspace 400G 5.2G 395G 2% /centos7/workspace
ssd.nas00.future.cn:/centos7_tools 5.0T 235G 4.8T 5% /centos7/tools
ssd.nas00.future.cn:/eda-tools 8.0T 5.7T 2.4T 72% /centos7/eda-tools
hdd.nas00.future.cn:/share_personal 500G 414M 500G 1% /share/personal
zone05.nas01.future.cn:/NAS_HPC_collab_codemodel 34T 33T 858G 98% /share/collab/codemodel
ext-zone00.nas02.future.cn:/nfs_global 289T 276T 14T 96% /nfs_global
ssd.nas00.future.cn:/common_datasets 75T 63T 13T 84% /datasets
192.168.12.10@o2ib:192.168.12.11@o2ib:/lustre 1.9P 54T 1.7P 4% /lustre
beegfs_nodev 70T 15T 56T 21% /fast
Have already added /tools/cluster-modulefiles into $MODULEPATH
/tools/cluster-software/gcc/gcc-9.3.0/bin/gcc
/workspace/S/zhuyaoyu/softwares/miniconda3/bin/python
/workspace/S/zhuyaoyu/softwares/miniconda3/bin/python3
############### /home : /home/S/zhuyaoyu
Disk quotas for user zhuyaoyu (uid 6207):
Filesystem space quota limit grace files quota limit grace
/home 3353M 16384M 20480M 90671 0 0
############### /workspace
Disk quotas for user zhuyaoyu (uid 6207):
Filesystem space quota limit grace files quota limit grace
/workspace 239G 400G 500G 799k 0 0
############### /nfs_global
Disk quotas for user zhuyaoyu (uid 6207):
Filesystem space quota limit grace files quota limit grace
/nfs_global 2410G 5120G 7168G 2069k 5000k 10000k
############### /lustre
Disk quotas for usr zhuyaoyu (uid 6207):
Filesystem used quota limit grace files quota limit grace
/lustre 0k 8T 10T - 0 3000000 36000000 -
uid 6207 is using default block quota setting
uid 6207 is using default file quota setting
name, driver_version, power.limit [W]
NVIDIA L40, 550.54.15, 275.00 W
NVIDIA L40, 550.54.15, 275.00 W
NVIDIA L40, 550.54.15, 275.00 W
NVIDIA L40, 550.54.15, 275.00 W
NVIDIA L40, 550.54.15, 275.00 W
NVIDIA L40, 550.54.15, 275.00 W
NVIDIA L40, 550.54.15, 275.00 W
NVIDIA L40, 550.54.15, 275.00 W
Using GPU(s) 0,1,2,3,4,5,6,7
This job is assigned the following resources by SLURM:
CPU_IDs=0-31,56-87 GRES=gpu:8(IDX:0-7)
Have already added /tools/cluster-modulefiles into $MODULEPATH
Got device mesh tensor([ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15],
dtype=torch.int32), mesh_dim_names ('fsdp',)
Processing model shards with 16 (16,) in total
Writing to local disk
Saving model to ckpt/codev_distill_16k_vllm1_v2/global_step_120/actor/huggingface
Got device mesh tensor([ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15],
dtype=torch.int32), mesh_dim_names ('fsdp',)
Processing model shards with 16 (16,) in total
Writing to local disk
Saving model to ckpt/codev_distill_16k_vllm1_v2/global_step_140/actor/huggingface
Job end at 2025-04-04 08:53:38
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
......@@ -116,7 +116,7 @@ if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Submit a Slurm job with specified parameters.")
# 添加命令行参数
parser.add_argument("--node_count", type=int, default=1, help="Number of nodes required.")
parser.add_argument("--node_count", type=int, default=2, help="Number of nodes required.")
parser.add_argument("--gpus_per_node", type=int, default=8, help="Number of GPUs per node (4 or 8).")
parser.add_argument("--node_type", type=str, default="r8l40", help="Node type (r8l40/r8l40s/r8a100).")
parser.add_argument("--partition", type=str, default=None, help="Partition name. (r8nv-gpu-dedicated needs to be specified)")
......
......@@ -77,13 +77,13 @@ echo "Main program continues to run. Monitoring information will be exported aft
# source xxxxx/activate
# source "/workspace/S/zhuyaoyu/softwares/miniconda3/etc/profile.d/conda.sh"
source activate /workspace/S/zhuyaoyu/softwares/miniconda3/envs/dapo/
source /workspace/S/zhuyaoyu/softwares/miniconda3/bin/activate /workspace/S/zhuyaoyu/softwares/miniconda3/envs/dapo/
export PATH="/workspace/S/zhuyaoyu/softwares/miniconda3/envs/dapo/:$PATH"
which python
# wandb login your_api_key!!!!
export WANDB_API_KEY='0a72cf472255879d3bad4939d3b39506e4a8573b'
export WANDB_API_KEY='e8f26cb646aea4a12ef982270212804afa4fa31e'
wandb login $WANDB_API_KEY
export WANDB_MODE=offline
......@@ -160,16 +160,12 @@ ray status
# and submitting on other nodes will cause network errors
if [ "$SLURM_PROCID" -eq 0 ]; then
ray list nodes
SCRIPT_TO_RUN="$CURR_DIR/recipe/dapo/run_dapo_codev_7b_16k_err_l1.0_continuous_reward.sh"
export SAVE_DIR="$CURR_DIR/results/run_dapo_codev_7b_16k_continuous_reward_0.0"
SCRIPT_TO_RUN="$CURR_DIR/recipe/dapo/dapo_7b_test.sh"
export SAVE_DIR="$CURR_DIR/results/dapo_7b_test"
# SCRIPT_TO_RUN=recipe/dapo/run_dapo_codev_7b_20k_err_l0.2_r1_continuous_reward.sh
# export SAVE_DIR="$CURR_DIR/results/run_dapo_codev_7b_20k_continuous_reward"
# SCRIPT_TO_RUN=recipe/dapo/dapo_7b_test.sh
# export SAVE_DIR="$CURR_DIR/results/dapo_7b_test"
mkdir -p $SAVE_DIR
chmod 777 $SAVE_DIR
cp $SCRIPT_TO_RUN $SAVE_DIR
......
......@@ -20,13 +20,13 @@
###SBATCH --cpus-per-task=48 # Request Q core per task; means that P*Q cores per node
# SBATCH
#SBATCH -p r8nv-gpu-dedicated # Submit to 'r8nv-gpu-hw' Partitiion
#SBATCH -t 7-00:00:00 # Run for a maximum time of 0 days, 12 hours, 00 mins, 00 secs
#SBATCH -p r8nv-gpu-dist # Submit to 'r8nv-gpu-hw' Partitiion
#SBATCH -t 1-06:00:00 # Run for a maximum time of 0 days, 12 hours, 00 mins, 00 secs
#SBATCH --nodes=2 # Request N nodes
#SBATCH --gres=gpu:8 # Request M GPU per node
#SBATCH --qos=normal # Request QOS Type
#SBATCH --constraint=A100_80G
#SBATCH --exclude=r8a100-d[07]
#SBATCH --qos=gpu-normal # Request QOS Type
##SBATCH --constraint="L40"|"L40S"
#SBATCH --nodelist=r8l40-a[00,01]
#==========================================================================#
......
......@@ -20,13 +20,13 @@
###SBATCH --cpus-per-task=48 # Request Q core per task; means that P*Q cores per node
# SBATCH
#SBATCH -p r8nv-gpu-hw # Submit to 'r8nv-gpu-hw' Partitiion
#SBATCH -p r8nv-gpu-dist # Submit to 'r8nv-gpu-hw' Partitiion
#SBATCH -t 1-05:59:59 # Run for a maximum time of 0 days, 12 hours, 00 mins, 00 secs
#SBATCH --nodes=1 # Request N nodes
#SBATCH --nodes=2 # Request N nodes
#SBATCH --gres=gpu:8 # Request M GPU per node
#SBATCH --qos=gpu-normal # Request QOS Type
#SBATCH --nodelist=r8l40-a01
#SBATCH --exclude=r8a100-d[07]
##SBATCH --constraint=A100_80G
#SBATCH --nodelist=r8l40-a[04-05]
#==========================================================================#
......
......@@ -20,6 +20,7 @@ from typing import Any, Dict, List, Callable
import numpy as np
from verl import DataProto
from collections import Counter, defaultdict
from transformers import AutoTokenizer
def reduce_metrics(metrics: Dict[str, List[Any]]) -> Dict[str, Any]:
......@@ -262,6 +263,124 @@ def compute_reward_metrics(batch):
return reward_metrics
def compute_reflection_metrics(batch: DataProto, tokenizer: AutoTokenizer) -> Dict[str, Any]:
reflection_words = [
"verify", "check", "confirm", "however", "reflect", "wait",
"correct", "revise", "adjust", "re-evaluate", "re-examine", "yet"
]
# print("batch.non_tensor_batch: ", {key:value for key, value in batch.non_tensor_batch.items()})
# print("batch.batch: ", {key:value for key, value in batch.batch.items()})
response_text = [tokenizer.decode(ids, skip_special_tokens=True) for ids in batch.batch['responses']]
# print("response_text: ", response_text)
response_info = _compute_response_info(batch)
response_length = response_info['response_length']
reward_tensor = batch.batch['token_level_scores'].sum(-1)
is_codev = batch.non_tensor_batch.get('data_source', [''])[0] == 'codev'
is_deepscaler = batch.non_tensor_batch.get('data_source', [''])[0] == 'deepscaler'
if is_codev or is_deepscaler:
reward_correct = 1.0
correct = (reward_tensor == reward_correct).float()
else:
correct = (reward_tensor > 0).float()
metrics = {}
contains_any_reflection = np.zeros(len(response_text), dtype=bool)
word_presence = {word: np.zeros(len(response_text), dtype=bool) for word in reflection_words}
word_counts = {word: 0 for word in reflection_words}
total_words = 0
for i, text in enumerate(response_text):
text_lower = text.lower()
for word in reflection_words:
count = text_lower.count(word)
if count > 0:
word_presence[word][i] = True
contains_any_reflection[i] = True
word_counts[word] += count
total_words += count
metrics["reflection/any_word_frequency"] = total_words
# metrics["reflection/any_word_ratio"] = float(np.mean(contains_any_reflection))
if np.any(contains_any_reflection):
metrics["reflection/with_length_mean"] = float(np.mean(response_length[contains_any_reflection].cpu().numpy()))
metrics["reflection/without_length_mean"] = float(np.mean(response_length[~contains_any_reflection].cpu().numpy()))
if np.any(contains_any_reflection):
metrics["reflection/with_correct_ratio"] = float(np.mean(correct[contains_any_reflection].cpu().numpy()))
metrics["reflection/without_correct_ratio"] = float(np.mean(correct[~contains_any_reflection].cpu().numpy()))
if np.any(contains_any_reflection):
metrics["reflection/with_reward_mean"] = float(np.mean(reward_tensor[contains_any_reflection].cpu().numpy()))
metrics["reflection/without_reward_mean"] = float(np.mean(reward_tensor[~contains_any_reflection].cpu().numpy()))
for word in reflection_words:
# metrics[f"reflection/word_{word}_ratio"] = float(np.mean(word_presence[word]))
metrics[f"reflection_{word}/word_{word}_frequency"] = word_counts[word]
metrics[f"reflection_{word}/with_{word}_length_mean"] = float(np.mean(response_length[word_presence[word]].cpu().numpy()))
metrics[f"reflection_{word}/without_{word}_length_mean"] = float(np.mean(response_length[~word_presence[word]].cpu().numpy()))
metrics[f"reflection_{word}/with_{word}_correct_ratio"] = float(np.mean(correct[word_presence[word]].cpu().numpy()))
metrics[f"reflection_{word}/without_{word}_correct_ratio"] = float(np.mean(correct[~word_presence[word]].cpu().numpy()))
metrics[f"reflection_{word}/with_{word}_reward_mean"] = float(np.mean(reward_tensor[word_presence[word]].cpu().numpy()))
metrics[f"reflection_{word}/without_{word}_reward_mean"] = float(np.mean(reward_tensor[~word_presence[word]].cpu().numpy()))
return metrics
def compute_language_mix_metrics(batch: DataProto, tokenizer: AutoTokenizer) -> Dict[str, Any]:
response_text = [tokenizer.decode(ids, skip_special_tokens=True) for ids in batch.batch['responses']]
reward_tensor = batch.batch['token_level_scores'].sum(-1)
response_info = _compute_response_info(batch)
response_length = response_info['response_length']
# 判断是否为特定任务类型
is_codev = 'is_codev' in batch.batch and batch.batch['is_codev']
is_deepscaler = 'is_deepscaler' in batch.batch and batch.batch['is_deepscaler']
if is_codev or is_deepscaler:
reward_correct = 1.0
correct = (reward_tensor == reward_correct).float()
else:
correct = (reward_tensor > 0).float()
metrics = {}
# 检测中英文混杂
contains_mix = np.zeros(len(response_text), dtype=bool)
total_mix_count = 0
for i, text in enumerate(response_text):
# 计算中文字符数
chinese_count = sum(1 for char in text if '\u4e00' <= char <= '\u9fff')
# 计算英文字符数
english_count = sum(1 for char in text if ('a' <= char.lower() <= 'z'))
if chinese_count > 0 and english_count > 0:
contains_mix[i] = True
total_mix_count += 1
metrics["language_mix/frequency"] = total_mix_count
metrics["language_mix/ratio"] = float(np.mean(contains_mix))
if np.any(contains_mix):
metrics["language_mix/with_length_mean"] = float(np.mean(response_length[contains_mix].cpu().numpy()))
metrics["language_mix/without_length_mean"] = float(np.mean(response_length[~contains_mix].cpu().numpy()))
metrics["language_mix/with_correct_ratio"] = float(np.mean(correct[contains_mix].cpu().numpy()))
metrics["language_mix/without_correct_ratio"] = float(np.mean(correct[~contains_mix].cpu().numpy()))
metrics["language_mix/with_reward_mean"] = float(np.mean(reward_tensor[contains_mix].cpu().numpy()))
metrics["language_mix/without_reward_mean"] = float(np.mean(reward_tensor[~contains_mix].cpu().numpy()))
return metrics
def bootstrap_metric(data: list[dict[str, Any]],
subset_size: int,
......
......@@ -39,7 +39,7 @@ from verl.single_controller.ray import RayResourcePool, RayWorkerGroup, RayClass
from verl.single_controller.ray.base import create_colocated_worker_cls
from verl.trainer.ppo import core_algos
# from verl.trainer.ppo.metric_utils import compute_data_metrics, compute_throughout_metrics, compute_timing_metrics, reduce_metrics, bootstrap_metric, calc_maj_val
from verl.trainer.ppo.metric_utils import compute_data_metrics, compute_throughout_metrics, compute_timing_metrics, reduce_metrics, compute_reward_metrics, bootstrap_metric, calc_maj_val
from verl.trainer.ppo.metric_utils import compute_data_metrics, compute_throughout_metrics, compute_timing_metrics, reduce_metrics, compute_reward_metrics, compute_reflection_metrics, compute_language_mix_metrics,bootstrap_metric, calc_maj_val
from verl.utils.seqlen_balancing import get_seqlen_balanced_partitions, log_seqlen_unbalance
from verl.utils.checkpoint.checkpoint_manager import find_latest_ckpt_path
from verl.utils.dataset.rl_dataset import RLHFDataset, collate_fn
......@@ -610,6 +610,8 @@ class RayPPOTrainer(object):
for metric_name, metric_vals in reward_extra_infos_dict.items():
var2vals[metric_name].append(metric_vals[sample_idx])
data_src_pass_k_values = defaultdict(lambda: defaultdict(list))
data_src2prompt2var2metric = defaultdict(lambda: defaultdict(lambda: defaultdict(dict)))
for data_source, prompt2var2vals in data_src2prompt2var2vals.items():
for prompt, var2vals in prompt2var2vals.items():
......@@ -646,6 +648,48 @@ class RayPPOTrainer(object):
metric[f"maj@{n}/mean"], metric[f"maj@{n}/std"] = maj_n_mean, maj_n_std
data_src2prompt2var2metric[data_source][prompt][var_name] = metric
# 在这里添加pass@k指标,单独计算和存储
final_rewards = var2vals["final_reward"]
pass_metric = {}
bootstraped_pass_metric = {}
# 创建通过/不通过的二元数组 (score=1表示通过)
passes = [1 if reward == 1.0 else 0 for reward in final_rewards]
n = len(passes)
c = sum(passes)
# 计算常见的k值的pass@k
ks = [1, 5, 10]
from scipy.special import comb
for k in ks:
if k <= n: # 确保k不超过样本总数
if c == 0:
pass_at_k = 0.0
elif c == n:
pass_at_k = 1.0
elif k > n -c:
pass_at_k = 1.0
else:
pass_at_k = 1.0 - (comb(n-c, k, exact=True) / comb(n, k, exact=True))
pass_metric[f"pass@{k}"] = pass_at_k
data_src2prompt2var2metric[data_source][prompt]["pass"] = pass_metric
# # 使用bootstrap方法估计pass@k的分布
# for k in ks:
# if k <= n_resps:
# bootstrap_samples = 1000
# pass_k_results = []
# for _ in range(bootstrap_samples):
# # 有放回地随机采样k个输出
# sampled_indices = np.random.choice(len(passes), size=k, replace=True)
# sampled_passes = [passes[i] for i in sampled_indices]
# # 如果至少有一个通过,则视为成功
# success = 1 if any(sampled_passes) else 0
# pass_k_results.append(success)
# bootstraped_pass_metric[f"bootstrapped_pass@{k}"] = np.mean(pass_k_results)
# data_src2prompt2var2metric[data_source][prompt]["pass_bootstrapped"] = bootstraped_pass_metric
data_src2var2metric2prompt_vals = defaultdict(lambda: defaultdict(lambda: defaultdict(list)))
for data_source, prompt2var2metric in data_src2prompt2var2metric.items():
......@@ -662,6 +706,7 @@ class RayPPOTrainer(object):
metric_dict[pfx] = np.mean(prompt_vals)
val_metric_dict = {f"val/{key}": value for key, value in metric_dict.items()}
# print(f'val_metric_dict: {val_metric_dict}')
return val_metric_dict
def init_workers(self):
......@@ -1082,6 +1127,8 @@ class RayPPOTrainer(object):
n_gpus = self.resource_pool_manager.get_n_gpus()
metrics.update(compute_throughout_metrics(batch=batch, timing_raw=timing_raw, n_gpus=n_gpus))
metrics.update(compute_reward_metrics(batch=batch))
metrics.update(compute_reflection_metrics(batch=batch, tokenizer=self.tokenizer))
metrics.update(compute_language_mix_metrics(batch=batch, tokenizer=self.tokenizer))
timing_raw = defaultdict(float) # clear timing
metrics["train/num_gen_batches"] = num_gen_batches
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment