Commit b6cd691b by Yaoyu Zhu

fix config problems and add timing plot

parent 11cc0595
import pandas as pd
from pathlib import Path
import matplotlib.pyplot as plt
import argparse
from pathlib import Path
import os
def process_folder(folder):
"""处理单个文件夹,返回步骤、时间和响应长度数据"""
"""处理单个文件夹,返回时间和响应长度数据(支持多step列与testing列减法)"""
csv_path = Path(folder) / 'stats.csv'
if not csv_path.exists():
return None, None, None # (step调整时间, gen时间, 响应长度)
df = pd.read_csv(csv_path)
# 筛选以 'timing_s/step' 开头的列
timing_cols = [col for col in df.columns if col.startswith('timing_s/step')]
# 假设存在'response_length/mean' 列
if'response_length/mean' in df.columns:
response_length_col = ['step','response_length/mean']
# 提取timing相关列
timing_cols = [col for col in df.columns if col.startswith('timing_s/')]
step_cols = [col for col in timing_cols if 'step' in col] # 筛选包含'step'的列
testing_col = 'timing_s/testing' if 'timing_s/testing' in df.columns else None
# 计算调整后的step时间列
adjusted_step_cols = []
for step_col in step_cols:
if testing_col and testing_col in df.columns:
df[step_col] = pd.to_numeric(df[step_col], errors='coerce')
df[testing_col] = pd.to_numeric(df[testing_col], errors='coerce')
adjusted_col = f'{step_col}_adjusted'
df[adjusted_col] = df[step_col].sub(df[testing_col], fill_value=0)
adjusted_step_cols.append(adjusted_col)
else:
adjusted_step_cols.append(step_col)
if not adjusted_step_cols:
print(f"警告:文件夹 {folder} 无有效step列,跳过处理")
return None, None, None
df_step = df[['step'] + adjusted_step_cols].melt(id_vars='step', var_name='metric', value_name='time')
df_step = df_step.dropna(subset=['time'])
df_step_avg = df_step.groupby('step')['time'].mean().reset_index()
# 提取gen时间
gen_col = 'timing_s/gen'
df_gen = df[[gen_col, 'step']] if gen_col in df.columns else None
if df_gen is not None:
df_gen = df_gen.rename(columns={gen_col: 'time'}).dropna()
df_gen_avg = df_gen.groupby('step')['time'].mean().reset_index()
else:
response_length_col = []
# 提取步骤和对应的时间数据
df_timing = df[['step'] + timing_cols]
# 转换为长格式数据(步骤-时间对应关系)
df_timing = pd.melt(df_timing, id_vars='step', var_name='timing_s/step', value_name='time')
# 筛选步骤小于等于 55 的数据
df_timing = df_timing[df_timing['step'] <= 55]
if response_length_col:
df_response = df[response_length_col]
df_response = df_response[df_response['step'] <= 55]
return df_timing, df_response
return df_timing, None
df_gen_avg = None
# 提取响应长度
response_col = 'response_length/mean'
df_response = df[[response_col, 'step']] if response_col in df.columns else None
if df_response is not None:
df_response = df_response.rename(columns={response_col: 'length'}).dropna()
df_response_avg = df_response.groupby('step')['length'].mean().reset_index()
else:
df_response_avg = None
return df_step_avg, df_gen_avg, df_response_avg
def plot_timing_comparison(folders, labels):
"""绘制两个路径的时间和响应长度对比折线图(双 Y 轴)"""
fig, ax1 = plt.subplots(figsize=(12, 6))
ax2 = ax1.twinx()
def plot_combined_comparison(folders, labels, save_name, title):
"""整合step调整时间、gen时间、gen per token到一张图(三行子图)"""
all_data = []
for folder, label in zip(folders, labels):
df_timing, df_response = process_folder(folder)
# 计算每个步骤的平均值(如果有多个样本)或直接使用单样本数据
# 假设每行是一个样本,这里取平均值(可根据数据结构调整)
df_step_avg = df_timing.groupby('step')['time'].mean().reset_index()
ax1.plot(df_step_avg['step'], df_step_avg['time'], marker=None, linestyle='-',
linewidth=2, label=f'{label} Timing')
if df_response is not None:
ax2.plot(df_response['step'], df_response['response_length/mean'], marker=None, linestyle='--',
linewidth=2, label=f'{label} Response Length')
ax1.set_xlabel('Step')
ax1.set_ylabel('Time (seconds)', color='b')
ax2.set_ylabel('Response Length (mean)', color='r')
plt.title('Timing and Response Length Comparison Between Accelerated and Non-Accelerated Versions')
# 合并两个图例
lines, labels = ax1.get_legend_handles_labels()
lines2, labels2 = ax2.get_legend_handles_labels()
ax2.legend(lines + lines2, labels + labels2, loc='upper left')
ax1.grid(True, linestyle='--', alpha=0.7)
plt.tight_layout()
# 保存或显示图表
step_avg, gen_avg, response_avg = process_folder(folder)
if step_avg is None or step_avg.empty:
print(f"警告:{label} 无有效step数据,跳过")
continue
all_data.append((label, step_avg, gen_avg, response_avg))
# 提取两个模型的数据(假设顺序为Adaptive DAPO和Baseline DAPO)
adaptive_data, baseline_data = all_data[0], all_data[1]
adapt_step, adapt_gen, adapt_response = adaptive_data[1], adaptive_data[2], adaptive_data[3]
base_step, base_gen, base_response = baseline_data[1], baseline_data[2], baseline_data[3]
# 计算加速比(基线时间 / 自适应时间,值>1表示自适应更快)
speedup = {
'step': {},
'gen': {},
'gen_per_token': {}
}
# Step时间加速比
if not adapt_step.empty and not base_step.empty:
adapt_step_150_before = adapt_step[adapt_step['step'] <= 150]
base_step_150_before = base_step[base_step['step'] <= 150]
adapt_step_150_after = adapt_step[adapt_step['step'] > 150]
base_step_150_after = base_step[base_step['step'] > 150]
if not adapt_step_150_before.empty and not base_step_150_before.empty:
adapt_step_mean_150_before = adapt_step_150_before['time'].mean()
base_step_mean_150_before = base_step_150_before['time'].mean()
speedup['step']['before_150'] = base_step_mean_150_before / adapt_step_mean_150_before
if not adapt_step_150_after.empty and not base_step_150_after.empty:
adapt_step_mean_150_after = adapt_step_150_after['time'].mean()
base_step_mean_150_after = base_step_150_after['time'].mean()
speedup['step']['after_150'] = base_step_mean_150_after / adapt_step_mean_150_after
# Gen时间加速比
if adapt_gen is not None and not adapt_gen.empty and base_gen is not None and not base_gen.empty:
adapt_gen_150_before = adapt_gen[adapt_gen['step'] <= 150]
base_gen_150_before = base_gen[base_gen['step'] <= 150]
adapt_gen_150_after = adapt_gen[adapt_gen['step'] > 150]
base_gen_150_after = base_gen[base_gen['step'] > 150]
if not adapt_gen_150_before.empty and not base_gen_150_before.empty:
adapt_gen_mean_150_before = adapt_gen_150_before['time'].mean()
base_gen_mean_150_before = base_gen_150_before['time'].mean()
speedup['gen']['before_150'] = base_gen_mean_150_before / adapt_gen_mean_150_before
if not adapt_gen_150_after.empty and not base_gen_150_after.empty:
adapt_gen_mean_150_after = adapt_gen_150_after['time'].mean()
base_gen_mean_150_after = base_gen_150_after['time'].mean()
speedup['gen']['after_150'] = base_gen_mean_150_after / adapt_gen_mean_150_after
# Gen time per token加速比
if (adapt_gen is not None and not adapt_gen.empty and
base_gen is not None and not base_gen.empty and
adapt_response is not None and not adapt_response.empty and
base_response is not None and not base_response.empty):
adapt_merged = pd.merge(adapt_gen, adapt_response, on='step', how='inner')
base_merged = pd.merge(base_gen, base_response, on='step', how='inner')
adapt_merged_150_before = adapt_merged[adapt_merged['step'] <= 150]
base_merged_150_before = base_merged[base_merged['step'] <= 150]
adapt_merged_150_after = adapt_merged[adapt_merged['step'] > 150]
base_merged_150_after = base_merged[base_merged['step'] > 150]
if not adapt_merged_150_before.empty and not base_merged_150_before.empty:
adapt_ratio_mean_150_before = (adapt_merged_150_before['time'] / adapt_merged_150_before['length'] / (128 * 16)).mean()
base_ratio_mean_150_before = (base_merged_150_before['time'] / base_merged_150_before['length'] / (128 * 16)).mean()
speedup['gen_per_token']['before_150'] = base_ratio_mean_150_before / adapt_ratio_mean_150_before
if not adapt_merged_150_after.empty and not base_merged_150_after.empty:
adapt_ratio_mean_150_after = (adapt_merged_150_after['time'] / adapt_merged_150_after['length'] / (128 * 16)).mean()
base_ratio_mean_150_after = (base_merged_150_after['time'] / base_merged_150_after['length'] / (128 * 16)).mean()
speedup['gen_per_token']['after_150'] = base_ratio_mean_150_after / adapt_ratio_mean_150_after
# 打印加速比
print("===== 加速比统计 =====")
print("Step时间加速比:")
print(f"150轮之前 (Baseline / Adaptive): {speedup['step'].get('before_150', 'N/A'):.2f}")
print(f"150轮之后 (Baseline / Adaptive): {speedup['step'].get('after_150', 'N/A'):.2f}")
print("\nGen时间加速比:")
print(f"150轮之前 (Baseline / Adaptive): {speedup['gen'].get('before_150', 'N/A'):.2f}")
print(f"150轮之后 (Baseline / Adaptive): {speedup['gen'].get('after_150', 'N/A'):.2f}")
print("\nGen per Token加速比:")
print(f"150轮之前 (Baseline / Adaptive): {speedup['gen_per_token'].get('before_150', 'N/A'):.2f}")
print(f"150轮之后 (Baseline / Adaptive): {speedup['gen_per_token'].get('after_150', 'N/A'):.2f}")
print("======================\n")
# 创建3行1列子图(绘图逻辑不变)
fig, axes = plt.subplots(3, 1, figsize=(12, 10), sharex=True)
plt.subplots_adjust(hspace=0.2, top=0.9)
color_map = plt.colormaps.get_cmap('tab10')
colors = [color_map(i) for i in range(len(all_data))]
# 子图1:RL Training Time per step
ax_step = axes[0]
ax_step.set_title('RL Training Time per step', fontsize=12)
ax_step.set_ylabel('Time (seconds)', fontsize=10)
for idx, (label, step_avg, _, _) in enumerate(all_data):
ax_step.plot(
step_avg['step'], step_avg['time'],
linestyle='-', linewidth=2, color=colors[idx], label=label
)
ax_step.legend(loc='upper right', fontsize=10)
ax_step.grid(True, linestyle='--', alpha=0.7)
# 子图2:Generation Time
ax_gen = axes[1]
ax_gen.set_title('Generation Time', fontsize=12)
ax_gen.set_ylabel('Time (seconds)', fontsize=10)
for idx, (label, _, gen_avg, _) in enumerate(all_data):
if gen_avg is not None and not gen_avg.empty:
ax_gen.plot(
gen_avg['step'], gen_avg['time'],
linestyle='-', linewidth=2, color=colors[idx], label=label
)
ax_gen.legend(loc='upper right', fontsize=10)
ax_gen.grid(True, linestyle='--', alpha=0.7)
# 子图3:Generation Time per Used Token
ax_ratio = axes[2]
ax_ratio.set_title('Generation Time per Used Token', fontsize=12)
ax_ratio.set_xlabel('Step', fontsize=10)
ax_ratio.set_ylabel('Time per Token (seconds)', fontsize=10)
for idx, (label, _, gen_avg, response_avg) in enumerate(all_data):
if gen_avg is not None and response_avg is not None and not gen_avg.empty and not response_avg.empty:
df_merged = pd.merge(gen_avg, response_avg, on='step', how='inner')
df_merged['gen_time_per_token'] = df_merged['time'] / df_merged['length'] / (128 * 16)
ax_ratio.plot(
df_merged['step'], df_merged['gen_time_per_token'],
linestyle='-', linewidth=2, color=colors[idx], label=label
)
ax_ratio.legend(loc='upper right', fontsize=10)
ax_ratio.grid(True, linestyle='--', alpha=0.7)
fig.suptitle(title, fontsize=14, y=0.95)
save_dir = 'results/figures'
import os
os.makedirs(save_dir, exist_ok=True)
plt.savefig(os.path.join(save_dir, 'timing_comparison.png'))
plt.show()
plt.savefig(os.path.join(save_dir, save_name), bbox_inches='tight')
plt.close()
if __name__ == '__main__':
......@@ -77,7 +210,10 @@ if __name__ == '__main__':
'results/codev_3.1k_dapo_accelerate',
'results/codev_3.1k_dapo_no_accelerate'
]
labels = ['Accelerated', 'Non-Accelerated'] # 对应每个文件夹的标签
plot_timing_comparison(folders, labels)
\ No newline at end of file
labels = ['Adaptive DAPO', 'Baseline DAPO']
plot_combined_comparison(
folders=folders,
labels=labels,
save_name='adaptive_dapo_timing_comparison.png',
title='Acceleration Result of Adaptive DAPO'
)
\ No newline at end of file
......@@ -118,7 +118,7 @@ python3 -m verl.trainer.main_ppo \
trainer.critic_warmup=0 \
trainer.logger=['console','wandb'] \
trainer.project_name='codev' \
trainer.experiment_name='codev-7b-3.1kdata' \
trainer.experiment_name='codev-7b-14kdata' \
trainer.n_gpus_per_node=$USER_GPUS_PER_NODE \
trainer.nnodes=$SLURM_JOB_NUM_NODES \
+trainer.val_before_train=False \
......
......@@ -118,7 +118,7 @@ python3 -m verl.trainer.main_ppo \
trainer.critic_warmup=0 \
trainer.logger=['console','wandb'] \
trainer.project_name='codev' \
trainer.experiment_name='codev-7b-3.1kdata' \
trainer.experiment_name='codev-7b-16kdata' \
trainer.n_gpus_per_node=$USER_GPUS_PER_NODE \
trainer.nnodes=$SLURM_JOB_NUM_NODES \
+trainer.val_before_train=False \
......
......@@ -125,7 +125,7 @@ python3 -m verl.trainer.main_ppo \
trainer.default_local_dir=$SAVE_DIR \
trainer.resume_mode=auto \
trainer.default_hdfs_dir=null \
trainer.save_freq=40 \
trainer.save_freq=20 \
trainer.test_freq=20 \
trainer.total_epochs=100 "${@:1}"
......
......@@ -126,7 +126,7 @@ python3 -m verl.trainer.main_ppo \
trainer.default_local_dir=$SAVE_DIR \
trainer.resume_mode=auto \
trainer.default_hdfs_dir=null \
trainer.save_freq=40 \
trainer.save_freq=20 \
trainer.test_freq=20 \
trainer.total_epochs=100 "${@:1}"
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment