Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
V
verl
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
ZhangXiaoyun
verl
Commits
b6cd691b
Commit
b6cd691b
authored
May 12, 2025
by
Yaoyu Zhu
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
fix config problems and add timing plot
parent
11cc0595
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
202 additions
and
66 deletions
+202
-66
plot_and_analyze/plot_timing.py
+198
-62
recipe/dapo/run_dapo_codev_7b_14k.sh
+1
-1
recipe/dapo/run_dapo_codev_7b_16k.sh
+1
-1
recipe/dapo/run_dapo_codev_7b_3.1k_accelerate.sh
+1
-1
recipe/dapo/run_dapo_codev_7b_3.1k_base.sh
+1
-1
No files found.
plot_and_analyze/plot_timing.py
View file @
b6cd691b
import
pandas
as
pd
from
pathlib
import
Path
import
matplotlib.pyplot
as
plt
import
argparse
from
pathlib
import
Path
import
os
def
process_folder
(
folder
):
"""处理单个文件夹,返回
步骤、时间和响应长度数据
"""
"""处理单个文件夹,返回
时间和响应长度数据(支持多step列与testing列减法)
"""
csv_path
=
Path
(
folder
)
/
'stats.csv'
if
not
csv_path
.
exists
():
return
None
,
None
,
None
# (step调整时间, gen时间, 响应长度)
df
=
pd
.
read_csv
(
csv_path
)
# 筛选以 'timing_s/step' 开头的列
timing_cols
=
[
col
for
col
in
df
.
columns
if
col
.
startswith
(
'timing_s/step'
)]
# 假设存在'response_length/mean' 列
if
'response_length/mean'
in
df
.
columns
:
response_length_col
=
[
'step'
,
'response_length/mean'
]
# 提取timing相关列
timing_cols
=
[
col
for
col
in
df
.
columns
if
col
.
startswith
(
'timing_s/'
)]
step_cols
=
[
col
for
col
in
timing_cols
if
'step'
in
col
]
# 筛选包含'step'的列
testing_col
=
'timing_s/testing'
if
'timing_s/testing'
in
df
.
columns
else
None
# 计算调整后的step时间列
adjusted_step_cols
=
[]
for
step_col
in
step_cols
:
if
testing_col
and
testing_col
in
df
.
columns
:
df
[
step_col
]
=
pd
.
to_numeric
(
df
[
step_col
],
errors
=
'coerce'
)
df
[
testing_col
]
=
pd
.
to_numeric
(
df
[
testing_col
],
errors
=
'coerce'
)
adjusted_col
=
f
'{step_col}_adjusted'
df
[
adjusted_col
]
=
df
[
step_col
]
.
sub
(
df
[
testing_col
],
fill_value
=
0
)
adjusted_step_cols
.
append
(
adjusted_col
)
else
:
adjusted_step_cols
.
append
(
step_col
)
if
not
adjusted_step_cols
:
print
(
f
"警告:文件夹 {folder} 无有效step列,跳过处理"
)
return
None
,
None
,
None
df_step
=
df
[[
'step'
]
+
adjusted_step_cols
]
.
melt
(
id_vars
=
'step'
,
var_name
=
'metric'
,
value_name
=
'time'
)
df_step
=
df_step
.
dropna
(
subset
=
[
'time'
])
df_step_avg
=
df_step
.
groupby
(
'step'
)[
'time'
]
.
mean
()
.
reset_index
()
# 提取gen时间
gen_col
=
'timing_s/gen'
df_gen
=
df
[[
gen_col
,
'step'
]]
if
gen_col
in
df
.
columns
else
None
if
df_gen
is
not
None
:
df_gen
=
df_gen
.
rename
(
columns
=
{
gen_col
:
'time'
})
.
dropna
()
df_gen_avg
=
df_gen
.
groupby
(
'step'
)[
'time'
]
.
mean
()
.
reset_index
()
else
:
response_length_col
=
[]
# 提取步骤和对应的时间数据
df_timing
=
df
[[
'step'
]
+
timing_cols
]
# 转换为长格式数据(步骤-时间对应关系)
df_timing
=
pd
.
melt
(
df_timing
,
id_vars
=
'step'
,
var_name
=
'timing_s/step'
,
value_name
=
'time'
)
# 筛选步骤小于等于 55 的数据
df_timing
=
df_timing
[
df_timing
[
'step'
]
<=
55
]
if
response_length_col
:
df_response
=
df
[
response_length_col
]
df_response
=
df_response
[
df_response
[
'step'
]
<=
55
]
return
df_timing
,
df_response
return
df_timing
,
None
df_gen_avg
=
None
# 提取响应长度
response_col
=
'response_length/mean'
df_response
=
df
[[
response_col
,
'step'
]]
if
response_col
in
df
.
columns
else
None
if
df_response
is
not
None
:
df_response
=
df_response
.
rename
(
columns
=
{
response_col
:
'length'
})
.
dropna
()
df_response_avg
=
df_response
.
groupby
(
'step'
)[
'length'
]
.
mean
()
.
reset_index
()
else
:
df_response_avg
=
None
return
df_step_avg
,
df_gen_avg
,
df_response_avg
def
plot_timing_comparison
(
folders
,
labels
):
"""绘制两个路径的时间和响应长度对比折线图(双 Y 轴)"""
fig
,
ax1
=
plt
.
subplots
(
figsize
=
(
12
,
6
))
ax2
=
ax1
.
twinx
()
def
plot_combined_comparison
(
folders
,
labels
,
save_name
,
title
):
"""整合step调整时间、gen时间、gen per token到一张图(三行子图)"""
all_data
=
[]
for
folder
,
label
in
zip
(
folders
,
labels
):
df_timing
,
df_response
=
process_folder
(
folder
)
# 计算每个步骤的平均值(如果有多个样本)或直接使用单样本数据
# 假设每行是一个样本,这里取平均值(可根据数据结构调整)
df_step_avg
=
df_timing
.
groupby
(
'step'
)[
'time'
]
.
mean
()
.
reset_index
()
ax1
.
plot
(
df_step_avg
[
'step'
],
df_step_avg
[
'time'
],
marker
=
None
,
linestyle
=
'-'
,
linewidth
=
2
,
label
=
f
'{label} Timing'
)
if
df_response
is
not
None
:
ax2
.
plot
(
df_response
[
'step'
],
df_response
[
'response_length/mean'
],
marker
=
None
,
linestyle
=
'--'
,
linewidth
=
2
,
label
=
f
'{label} Response Length'
)
ax1
.
set_xlabel
(
'Step'
)
ax1
.
set_ylabel
(
'Time (seconds)'
,
color
=
'b'
)
ax2
.
set_ylabel
(
'Response Length (mean)'
,
color
=
'r'
)
plt
.
title
(
'Timing and Response Length Comparison Between Accelerated and Non-Accelerated Versions'
)
# 合并两个图例
lines
,
labels
=
ax1
.
get_legend_handles_labels
()
lines2
,
labels2
=
ax2
.
get_legend_handles_labels
()
ax2
.
legend
(
lines
+
lines2
,
labels
+
labels2
,
loc
=
'upper left'
)
ax1
.
grid
(
True
,
linestyle
=
'--'
,
alpha
=
0.7
)
plt
.
tight_layout
()
# 保存或显示图表
step_avg
,
gen_avg
,
response_avg
=
process_folder
(
folder
)
if
step_avg
is
None
or
step_avg
.
empty
:
print
(
f
"警告:{label} 无有效step数据,跳过"
)
continue
all_data
.
append
((
label
,
step_avg
,
gen_avg
,
response_avg
))
# 提取两个模型的数据(假设顺序为Adaptive DAPO和Baseline DAPO)
adaptive_data
,
baseline_data
=
all_data
[
0
],
all_data
[
1
]
adapt_step
,
adapt_gen
,
adapt_response
=
adaptive_data
[
1
],
adaptive_data
[
2
],
adaptive_data
[
3
]
base_step
,
base_gen
,
base_response
=
baseline_data
[
1
],
baseline_data
[
2
],
baseline_data
[
3
]
# 计算加速比(基线时间 / 自适应时间,值>1表示自适应更快)
speedup
=
{
'step'
:
{},
'gen'
:
{},
'gen_per_token'
:
{}
}
# Step时间加速比
if
not
adapt_step
.
empty
and
not
base_step
.
empty
:
adapt_step_150_before
=
adapt_step
[
adapt_step
[
'step'
]
<=
150
]
base_step_150_before
=
base_step
[
base_step
[
'step'
]
<=
150
]
adapt_step_150_after
=
adapt_step
[
adapt_step
[
'step'
]
>
150
]
base_step_150_after
=
base_step
[
base_step
[
'step'
]
>
150
]
if
not
adapt_step_150_before
.
empty
and
not
base_step_150_before
.
empty
:
adapt_step_mean_150_before
=
adapt_step_150_before
[
'time'
]
.
mean
()
base_step_mean_150_before
=
base_step_150_before
[
'time'
]
.
mean
()
speedup
[
'step'
][
'before_150'
]
=
base_step_mean_150_before
/
adapt_step_mean_150_before
if
not
adapt_step_150_after
.
empty
and
not
base_step_150_after
.
empty
:
adapt_step_mean_150_after
=
adapt_step_150_after
[
'time'
]
.
mean
()
base_step_mean_150_after
=
base_step_150_after
[
'time'
]
.
mean
()
speedup
[
'step'
][
'after_150'
]
=
base_step_mean_150_after
/
adapt_step_mean_150_after
# Gen时间加速比
if
adapt_gen
is
not
None
and
not
adapt_gen
.
empty
and
base_gen
is
not
None
and
not
base_gen
.
empty
:
adapt_gen_150_before
=
adapt_gen
[
adapt_gen
[
'step'
]
<=
150
]
base_gen_150_before
=
base_gen
[
base_gen
[
'step'
]
<=
150
]
adapt_gen_150_after
=
adapt_gen
[
adapt_gen
[
'step'
]
>
150
]
base_gen_150_after
=
base_gen
[
base_gen
[
'step'
]
>
150
]
if
not
adapt_gen_150_before
.
empty
and
not
base_gen_150_before
.
empty
:
adapt_gen_mean_150_before
=
adapt_gen_150_before
[
'time'
]
.
mean
()
base_gen_mean_150_before
=
base_gen_150_before
[
'time'
]
.
mean
()
speedup
[
'gen'
][
'before_150'
]
=
base_gen_mean_150_before
/
adapt_gen_mean_150_before
if
not
adapt_gen_150_after
.
empty
and
not
base_gen_150_after
.
empty
:
adapt_gen_mean_150_after
=
adapt_gen_150_after
[
'time'
]
.
mean
()
base_gen_mean_150_after
=
base_gen_150_after
[
'time'
]
.
mean
()
speedup
[
'gen'
][
'after_150'
]
=
base_gen_mean_150_after
/
adapt_gen_mean_150_after
# Gen time per token加速比
if
(
adapt_gen
is
not
None
and
not
adapt_gen
.
empty
and
base_gen
is
not
None
and
not
base_gen
.
empty
and
adapt_response
is
not
None
and
not
adapt_response
.
empty
and
base_response
is
not
None
and
not
base_response
.
empty
):
adapt_merged
=
pd
.
merge
(
adapt_gen
,
adapt_response
,
on
=
'step'
,
how
=
'inner'
)
base_merged
=
pd
.
merge
(
base_gen
,
base_response
,
on
=
'step'
,
how
=
'inner'
)
adapt_merged_150_before
=
adapt_merged
[
adapt_merged
[
'step'
]
<=
150
]
base_merged_150_before
=
base_merged
[
base_merged
[
'step'
]
<=
150
]
adapt_merged_150_after
=
adapt_merged
[
adapt_merged
[
'step'
]
>
150
]
base_merged_150_after
=
base_merged
[
base_merged
[
'step'
]
>
150
]
if
not
adapt_merged_150_before
.
empty
and
not
base_merged_150_before
.
empty
:
adapt_ratio_mean_150_before
=
(
adapt_merged_150_before
[
'time'
]
/
adapt_merged_150_before
[
'length'
]
/
(
128
*
16
))
.
mean
()
base_ratio_mean_150_before
=
(
base_merged_150_before
[
'time'
]
/
base_merged_150_before
[
'length'
]
/
(
128
*
16
))
.
mean
()
speedup
[
'gen_per_token'
][
'before_150'
]
=
base_ratio_mean_150_before
/
adapt_ratio_mean_150_before
if
not
adapt_merged_150_after
.
empty
and
not
base_merged_150_after
.
empty
:
adapt_ratio_mean_150_after
=
(
adapt_merged_150_after
[
'time'
]
/
adapt_merged_150_after
[
'length'
]
/
(
128
*
16
))
.
mean
()
base_ratio_mean_150_after
=
(
base_merged_150_after
[
'time'
]
/
base_merged_150_after
[
'length'
]
/
(
128
*
16
))
.
mean
()
speedup
[
'gen_per_token'
][
'after_150'
]
=
base_ratio_mean_150_after
/
adapt_ratio_mean_150_after
# 打印加速比
print
(
"===== 加速比统计 ====="
)
print
(
"Step时间加速比:"
)
print
(
f
"150轮之前 (Baseline / Adaptive): {speedup['step'].get('before_150', 'N/A'):.2f}"
)
print
(
f
"150轮之后 (Baseline / Adaptive): {speedup['step'].get('after_150', 'N/A'):.2f}"
)
print
(
"
\n
Gen时间加速比:"
)
print
(
f
"150轮之前 (Baseline / Adaptive): {speedup['gen'].get('before_150', 'N/A'):.2f}"
)
print
(
f
"150轮之后 (Baseline / Adaptive): {speedup['gen'].get('after_150', 'N/A'):.2f}"
)
print
(
"
\n
Gen per Token加速比:"
)
print
(
f
"150轮之前 (Baseline / Adaptive): {speedup['gen_per_token'].get('before_150', 'N/A'):.2f}"
)
print
(
f
"150轮之后 (Baseline / Adaptive): {speedup['gen_per_token'].get('after_150', 'N/A'):.2f}"
)
print
(
"======================
\n
"
)
# 创建3行1列子图(绘图逻辑不变)
fig
,
axes
=
plt
.
subplots
(
3
,
1
,
figsize
=
(
12
,
10
),
sharex
=
True
)
plt
.
subplots_adjust
(
hspace
=
0.2
,
top
=
0.9
)
color_map
=
plt
.
colormaps
.
get_cmap
(
'tab10'
)
colors
=
[
color_map
(
i
)
for
i
in
range
(
len
(
all_data
))]
# 子图1:RL Training Time per step
ax_step
=
axes
[
0
]
ax_step
.
set_title
(
'RL Training Time per step'
,
fontsize
=
12
)
ax_step
.
set_ylabel
(
'Time (seconds)'
,
fontsize
=
10
)
for
idx
,
(
label
,
step_avg
,
_
,
_
)
in
enumerate
(
all_data
):
ax_step
.
plot
(
step_avg
[
'step'
],
step_avg
[
'time'
],
linestyle
=
'-'
,
linewidth
=
2
,
color
=
colors
[
idx
],
label
=
label
)
ax_step
.
legend
(
loc
=
'upper right'
,
fontsize
=
10
)
ax_step
.
grid
(
True
,
linestyle
=
'--'
,
alpha
=
0.7
)
# 子图2:Generation Time
ax_gen
=
axes
[
1
]
ax_gen
.
set_title
(
'Generation Time'
,
fontsize
=
12
)
ax_gen
.
set_ylabel
(
'Time (seconds)'
,
fontsize
=
10
)
for
idx
,
(
label
,
_
,
gen_avg
,
_
)
in
enumerate
(
all_data
):
if
gen_avg
is
not
None
and
not
gen_avg
.
empty
:
ax_gen
.
plot
(
gen_avg
[
'step'
],
gen_avg
[
'time'
],
linestyle
=
'-'
,
linewidth
=
2
,
color
=
colors
[
idx
],
label
=
label
)
ax_gen
.
legend
(
loc
=
'upper right'
,
fontsize
=
10
)
ax_gen
.
grid
(
True
,
linestyle
=
'--'
,
alpha
=
0.7
)
# 子图3:Generation Time per Used Token
ax_ratio
=
axes
[
2
]
ax_ratio
.
set_title
(
'Generation Time per Used Token'
,
fontsize
=
12
)
ax_ratio
.
set_xlabel
(
'Step'
,
fontsize
=
10
)
ax_ratio
.
set_ylabel
(
'Time per Token (seconds)'
,
fontsize
=
10
)
for
idx
,
(
label
,
_
,
gen_avg
,
response_avg
)
in
enumerate
(
all_data
):
if
gen_avg
is
not
None
and
response_avg
is
not
None
and
not
gen_avg
.
empty
and
not
response_avg
.
empty
:
df_merged
=
pd
.
merge
(
gen_avg
,
response_avg
,
on
=
'step'
,
how
=
'inner'
)
df_merged
[
'gen_time_per_token'
]
=
df_merged
[
'time'
]
/
df_merged
[
'length'
]
/
(
128
*
16
)
ax_ratio
.
plot
(
df_merged
[
'step'
],
df_merged
[
'gen_time_per_token'
],
linestyle
=
'-'
,
linewidth
=
2
,
color
=
colors
[
idx
],
label
=
label
)
ax_ratio
.
legend
(
loc
=
'upper right'
,
fontsize
=
10
)
ax_ratio
.
grid
(
True
,
linestyle
=
'--'
,
alpha
=
0.7
)
fig
.
suptitle
(
title
,
fontsize
=
14
,
y
=
0.95
)
save_dir
=
'results/figures'
import
os
os
.
makedirs
(
save_dir
,
exist_ok
=
True
)
plt
.
savefig
(
os
.
path
.
join
(
save_dir
,
'timing_comparison.png'
)
)
plt
.
show
()
plt
.
savefig
(
os
.
path
.
join
(
save_dir
,
save_name
),
bbox_inches
=
'tight'
)
plt
.
close
()
if
__name__
==
'__main__'
:
...
...
@@ -77,7 +210,10 @@ if __name__ == '__main__':
'results/codev_3.1k_dapo_accelerate'
,
'results/codev_3.1k_dapo_no_accelerate'
]
labels
=
[
'Accelerated'
,
'Non-Accelerated'
]
# 对应每个文件夹的标签
plot_timing_comparison
(
folders
,
labels
)
\ No newline at end of file
labels
=
[
'Adaptive DAPO'
,
'Baseline DAPO'
]
plot_combined_comparison
(
folders
=
folders
,
labels
=
labels
,
save_name
=
'adaptive_dapo_timing_comparison.png'
,
title
=
'Acceleration Result of Adaptive DAPO'
)
\ No newline at end of file
recipe/dapo/run_dapo_codev_7b_14k.sh
View file @
b6cd691b
...
...
@@ -118,7 +118,7 @@ python3 -m verl.trainer.main_ppo \
trainer.critic_warmup
=
0
\
trainer.logger
=[
'console'
,
'wandb'
]
\
trainer.project_name
=
'codev'
\
trainer.experiment_name
=
'codev-7b-
3.1
kdata'
\
trainer.experiment_name
=
'codev-7b-
14
kdata'
\
trainer.n_gpus_per_node
=
$USER_GPUS_PER_NODE
\
trainer.nnodes
=
$SLURM_JOB_NUM_NODES
\
+trainer.val_before_train
=
False
\
...
...
recipe/dapo/run_dapo_codev_7b_16k.sh
View file @
b6cd691b
...
...
@@ -118,7 +118,7 @@ python3 -m verl.trainer.main_ppo \
trainer.critic_warmup
=
0
\
trainer.logger
=[
'console'
,
'wandb'
]
\
trainer.project_name
=
'codev'
\
trainer.experiment_name
=
'codev-7b-
3.1
kdata'
\
trainer.experiment_name
=
'codev-7b-
16
kdata'
\
trainer.n_gpus_per_node
=
$USER_GPUS_PER_NODE
\
trainer.nnodes
=
$SLURM_JOB_NUM_NODES
\
+trainer.val_before_train
=
False
\
...
...
recipe/dapo/run_dapo_codev_7b_3.1k_accelerate.sh
View file @
b6cd691b
...
...
@@ -125,7 +125,7 @@ python3 -m verl.trainer.main_ppo \
trainer.default_local_dir
=
$SAVE_DIR
\
trainer.resume_mode
=
auto
\
trainer.default_hdfs_dir
=
null
\
trainer.save_freq
=
4
0
\
trainer.save_freq
=
2
0
\
trainer.test_freq
=
20
\
trainer.total_epochs
=
100
"
${
@
:1
}
"
...
...
recipe/dapo/run_dapo_codev_7b_3.1k_base.sh
View file @
b6cd691b
...
...
@@ -126,7 +126,7 @@ python3 -m verl.trainer.main_ppo \
trainer.default_local_dir
=
$SAVE_DIR
\
trainer.resume_mode
=
auto
\
trainer.default_hdfs_dir
=
null
\
trainer.save_freq
=
4
0
\
trainer.save_freq
=
2
0
\
trainer.test_freq
=
20
\
trainer.total_epochs
=
100
"
${
@
:1
}
"
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment