Commit afe1b27d by Yaoyu Zhu

change plotting

parent 445e487c
......@@ -9,7 +9,9 @@ import matplotlib.pyplot as plt
def process_log(folder):
folder = Path(folder)
# 获取文件夹内所有的 .out 文件
log_files = list(folder.glob("*.out"))
log_files = []
for suffix in ["out", "log"]:
log_files.extend(list(folder.glob(f"*.{suffix}")))
key_value_pattern = re.compile(r"([\w/%(),[\]]+):\s*(-?\d+\.?\d*)")
all_data = {} # 用于存储每个 step 对应的最新数据
......
import pandas as pd
from pathlib import Path
import argparse
def calculate_timing(folder):
csv_file_path = Path(folder) / 'stats.csv'
df = pd.read_csv(csv_file_path)
# 筛选出以 'timing_s' 开头的列
timing_columns = [col for col in df.columns if col.startswith('timing_s')]
# 取前 10 行计算平均值
if timing_columns:
average_values = df[timing_columns].head(10).mean()
print("以 'timing_s' 开头的字段前 10 行的平均值:")
print(average_values)
else:
print("未找到以 'timing_s' 开头的字段。")
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='Process log files and generate CSV.')
parser.add_argument('--folder', type=str, help='Path to the folder containing the log file.',
default='ckpt/codev_distill_16k')
args = parser.parse_args()
calculate_timing(args.folder)
\ No newline at end of file
......@@ -71,7 +71,7 @@ python3 -m verl.trainer.main_ppo \
algorithm.filter_groups.enable=${enable_filter_groups} \
algorithm.filter_groups.max_num_gen_batches=999 \
algorithm.filter_groups.metric=acc \
data.gen_batch_size=$((($train_prompt_bsz * 4 / 3 + $num_gpu - 1) / $num_gpu * $num_gpu)) \
data.gen_batch_size=$((($train_prompt_bsz * 3 / 2 + $num_gpu - 1) / $num_gpu * $num_gpu)) \
actor_rollout_ref.model.path=$MODEL_PATH \
+actor_rollout_ref.model.override_config.attention_dropout=0. \
+actor_rollout_ref.model.override_config.embd_pdrop=0. \
......
#!/bin/bash
set -x
# Warning: Export VLLM_ATTENTION_BACKEND on every machine before starting Ray cluster.
# vLLM without XFORMERS will results in CUDA errors.
# export VLLM_ATTENTION_BACKEND=XFORMERS
# Parse command line arguments
while [[ $# -gt 0 ]]; do
case $1 in
--model)
MODEL_PATH="$2"
shift 2
;;
*)
break
;;
esac
done
# Set default model path if not provided
if [ -z "$MODEL_PATH" ]; then
MODEL_PATH="/nfs_global/S/lvhanqi/LLaMA-Factory/saves/Qwen2.5-Coder-7B-Instruct-codev-r1-87k/full/sft_6epoch"
fi
echo "GLOO_SOCKET_IFNAME is $GLOO_SOCKET_IFNAME!"
MAX_TOKEN_PER_GPU=32768
GPU_MEMORY_UTILIZATION=0.8
python3 -m verl.trainer.main_ppo \
algorithm.adv_estimator=grpo \
data.train_files=$CURR_DIR/data/codev/v1/4.8k_r1_filtered/train.parquet \
data.val_files=$CURR_DIR/data/codev/v1_1/10k_qwq/test.parquet \
data.train_batch_size=128 \
data.val_batch_size=512 \
data.max_prompt_length=1152 \
data.max_response_length=16384 \
actor_rollout_ref.model.path=$MODEL_PATH \
actor_rollout_ref.actor.optim.lr=2e-6 \
actor_rollout_ref.actor.optim.weight_decay=0.0 \
+actor_rollout_ref.actor.optim.betas=[0.9,0.999] \
actor_rollout_ref.actor.grad_clip=0.5 \
actor_rollout_ref.actor.use_dynamic_bsz=True\
actor_rollout_ref.actor.ppo_max_token_len_per_gpu=$MAX_TOKEN_PER_GPU \
actor_rollout_ref.model.use_remove_padding=True \
actor_rollout_ref.actor.ppo_mini_batch_size=64 \
actor_rollout_ref.actor.use_kl_loss=True \
actor_rollout_ref.actor.kl_loss_coef=0.000 \
actor_rollout_ref.actor.kl_loss_type=low_var_kl \
actor_rollout_ref.model.enable_gradient_checkpointing=True \
actor_rollout_ref.actor.fsdp_config.param_offload=False \
actor_rollout_ref.actor.fsdp_config.optimizer_offload=False \
actor_rollout_ref.rollout.log_prob_max_token_len_per_gpu=$(($MAX_TOKEN_PER_GPU*2)) \
actor_rollout_ref.rollout.tensor_model_parallel_size=2 \
actor_rollout_ref.rollout.name=vllm \
actor_rollout_ref.rollout.n=8 \
actor_rollout_ref.rollout.val_kwargs.n=2 \
actor_rollout_ref.rollout.temperature=1.0 \
actor_rollout_ref.rollout.val_kwargs.temperature=1.0 \
actor_rollout_ref.rollout.val_kwargs.do_sample=True \
actor_rollout_ref.rollout.gpu_memory_utilization=$GPU_MEMORY_UTILIZATION \
actor_rollout_ref.rollout.enforce_eager=False \
actor_rollout_ref.rollout.free_cache_engine=False \
actor_rollout_ref.ref.fsdp_config.param_offload=False \
algorithm.kl_ctrl.kl_coef=0.000 \
reward_model.reward_manager=prime \
trainer.critic_warmup=0 \
trainer.logger=['console','wandb'] \
trainer.project_name='codev' \
trainer.experiment_name='codev-distill-7b-16k-t1-kl0' \
trainer.n_gpus_per_node=8 \
trainer.nnodes=2 \
+trainer.val_before_train=True \
trainer.default_local_dir=$SAVE_DIR \
trainer.default_hdfs_dir=null \
trainer.save_freq=50 \
trainer.test_freq=50 \
trainer.total_epochs=100 "${@:1}"
\ No newline at end of file
This source diff could not be displayed because it is too large. You can view the blob instead.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment