Commit 65ac1294 by Yaoyu Zhu

fix bugs in dapo config (no dynamic sampling, no token-level loss)

parent d58782a4
......@@ -18,12 +18,12 @@ overlong_buffer_len=$((1024 * 1))
overlong_penalty_factor=1.0
# An early version for DAPO
enable_filter_groups=False
enable_filter_groups=True
gen_prompt_bsz=512 # NOTE: no filtering here
train_prompt_bsz=512
train_prompt_mini_bsz=32
n_resp_per_prompt=16
use_token_level_loss=False
use_token_level_loss=True
# Ray
RAY_ADDRESS=${RAY_ADDRESS:-"http://localhost:8265"}
......@@ -69,6 +69,7 @@ python3 -m verl.trainer.main_ppo \
data.val_batch_size=512 \
data.max_prompt_length=1024 \
data.max_response_length=3072 \
algorithm.filter_groups.enable=${enable_filter_groups} \
actor_rollout_ref.model.path=$MODEL_PATH \
+actor_rollout_ref.model.override_config.attention_dropout=0. \
+actor_rollout_ref.model.override_config.embd_pdrop=0. \
......
......@@ -69,6 +69,7 @@ python3 -m verl.trainer.main_ppo \
data.val_batch_size=512 \
data.max_prompt_length=2048 \
data.max_response_length=16384 \
algorithm.filter_groups.enable=${enable_filter_groups} \
actor_rollout_ref.model.path=$MODEL_PATH \
+actor_rollout_ref.model.override_config.attention_dropout=0. \
+actor_rollout_ref.model.override_config.embd_pdrop=0. \
......
......@@ -18,12 +18,12 @@ overlong_buffer_len=$((1024 * 1))
overlong_penalty_factor=1.0
# An early version for DAPO
enable_filter_groups=False
enable_filter_groups=True
gen_prompt_bsz=512 # NOTE: no filtering here
train_prompt_bsz=512
train_prompt_mini_bsz=32
n_resp_per_prompt=16
use_token_level_loss=False
use_token_level_loss=True
# Ray
RAY_ADDRESS=${RAY_ADDRESS:-"http://localhost:8265"}
......@@ -69,6 +69,7 @@ python3 -m verl.trainer.main_ppo \
data.val_batch_size=512 \
data.max_prompt_length=1024 \
data.max_response_length=3072 \
algorithm.filter_groups.enable=${enable_filter_groups} \
actor_rollout_ref.model.path=$MODEL_PATH \
+actor_rollout_ref.model.override_config.attention_dropout=0. \
+actor_rollout_ref.model.override_config.embd_pdrop=0. \
......
......@@ -18,12 +18,12 @@ overlong_buffer_len=$((1024 * 4))
overlong_penalty_factor=1.0
# An early version for DAPO
enable_filter_groups=False
enable_filter_groups=True
gen_prompt_bsz=512 # NOTE: no filtering here
train_prompt_bsz=512
train_prompt_mini_bsz=32
n_resp_per_prompt=16
use_token_level_loss=False
use_token_level_loss=True
# Ray
RAY_ADDRESS=${RAY_ADDRESS:-"http://localhost:8265"}
......@@ -69,6 +69,7 @@ python3 -m verl.trainer.main_ppo \
data.val_batch_size=512 \
data.max_prompt_length=2048 \
data.max_response_length=16384 \
algorithm.filter_groups.enable=${enable_filter_groups} \
actor_rollout_ref.model.path=$MODEL_PATH \
+actor_rollout_ref.model.override_config.attention_dropout=0. \
+actor_rollout_ref.model.override_config.embd_pdrop=0. \
......
......@@ -18,12 +18,12 @@ overlong_buffer_len=$((1024 * 4))
overlong_penalty_factor=1.0
# An early version for DAPO
enable_filter_groups=False
enable_filter_groups=True
gen_prompt_bsz=512 # NOTE: no filtering here
train_prompt_bsz=512
train_prompt_mini_bsz=32
n_resp_per_prompt=16
use_token_level_loss=False
use_token_level_loss=True
# Ray
RAY_ADDRESS=${RAY_ADDRESS:-"http://localhost:8265"}
......@@ -69,6 +69,7 @@ python3 -m verl.trainer.main_ppo \
data.val_batch_size=512 \
data.max_prompt_length=2048 \
data.max_response_length=16384 \
algorithm.filter_groups.enable=${enable_filter_groups} \
actor_rollout_ref.model.path=$MODEL_PATH \
+actor_rollout_ref.model.override_config.attention_dropout=0. \
+actor_rollout_ref.model.override_config.embd_pdrop=0. \
......
......@@ -18,12 +18,12 @@ overlong_buffer_len=$((1024 * 4))
overlong_penalty_factor=1.0
# An early version for DAPO
enable_filter_groups=False
enable_filter_groups=True
gen_prompt_bsz=512 # NOTE: no filtering here
train_prompt_bsz=512
train_prompt_mini_bsz=32
n_resp_per_prompt=16
use_token_level_loss=False
use_token_level_loss=True
# Ray
RAY_ADDRESS=${RAY_ADDRESS:-"http://localhost:8265"}
......@@ -69,6 +69,7 @@ python3 -m verl.trainer.main_ppo \
data.val_batch_size=512 \
data.max_prompt_length=2048 \
data.max_response_length=16384 \
algorithm.filter_groups.enable=${enable_filter_groups} \
actor_rollout_ref.model.path=$MODEL_PATH \
+actor_rollout_ref.model.override_config.attention_dropout=0. \
+actor_rollout_ref.model.override_config.embd_pdrop=0. \
......
......@@ -18,12 +18,12 @@ overlong_buffer_len=$((1024 * 4))
overlong_penalty_factor=1.0
# An early version for DAPO
enable_filter_groups=False
enable_filter_groups=True
gen_prompt_bsz=512 # NOTE: no filtering here
train_prompt_bsz=512
train_prompt_mini_bsz=32
n_resp_per_prompt=16
use_token_level_loss=False
use_token_level_loss=True
# Ray
RAY_ADDRESS=${RAY_ADDRESS:-"http://localhost:8265"}
......@@ -69,6 +69,7 @@ python3 -m verl.trainer.main_ppo \
data.val_batch_size=512 \
data.max_prompt_length=2048 \
data.max_response_length=16384 \
algorithm.filter_groups.enable=${enable_filter_groups} \
actor_rollout_ref.model.path=$MODEL_PATH \
+actor_rollout_ref.model.override_config.attention_dropout=0. \
+actor_rollout_ref.model.override_config.embd_pdrop=0. \
......
......@@ -18,12 +18,12 @@ overlong_buffer_len=$((1024 * 4))
overlong_penalty_factor=1.0
# An early version for DAPO
enable_filter_groups=False
enable_filter_groups=True
gen_prompt_bsz=512 # NOTE: no filtering here
train_prompt_bsz=512
train_prompt_mini_bsz=32
n_resp_per_prompt=16
use_token_level_loss=False
use_token_level_loss=True
# Ray
RAY_ADDRESS=${RAY_ADDRESS:-"http://localhost:8265"}
......@@ -69,6 +69,7 @@ python3 -m verl.trainer.main_ppo \
data.val_batch_size=512 \
data.max_prompt_length=2048 \
data.max_response_length=16384 \
algorithm.filter_groups.enable=${enable_filter_groups} \
actor_rollout_ref.model.path=$MODEL_PATH \
+actor_rollout_ref.model.override_config.attention_dropout=0. \
+actor_rollout_ref.model.override_config.embd_pdrop=0. \
......
......@@ -18,12 +18,12 @@ overlong_buffer_len=$((1024 * 4))
overlong_penalty_factor=1.0
# An early version for DAPO
enable_filter_groups=False
enable_filter_groups=True
gen_prompt_bsz=512 # NOTE: no filtering here
train_prompt_bsz=512
train_prompt_mini_bsz=32
n_resp_per_prompt=16
use_token_level_loss=False
use_token_level_loss=True
# Ray
RAY_ADDRESS=${RAY_ADDRESS:-"http://localhost:8265"}
......@@ -69,6 +69,7 @@ python3 -m verl.trainer.main_ppo \
data.val_batch_size=512 \
data.max_prompt_length=2048 \
data.max_response_length=16384 \
algorithm.filter_groups.enable=${enable_filter_groups} \
actor_rollout_ref.model.path=$MODEL_PATH \
+actor_rollout_ref.model.override_config.attention_dropout=0. \
+actor_rollout_ref.model.override_config.embd_pdrop=0. \
......
......@@ -18,12 +18,12 @@ overlong_buffer_len=$((1024 * 4))
overlong_penalty_factor=1.0
# An early version for DAPO
enable_filter_groups=False
enable_filter_groups=True
gen_prompt_bsz=512 # NOTE: no filtering here
train_prompt_bsz=512
train_prompt_mini_bsz=32
n_resp_per_prompt=16
use_token_level_loss=False
use_token_level_loss=True
# Ray
RAY_ADDRESS=${RAY_ADDRESS:-"http://localhost:8265"}
......@@ -69,6 +69,7 @@ python3 -m verl.trainer.main_ppo \
data.val_batch_size=512 \
data.max_prompt_length=2048 \
data.max_response_length=16384 \
algorithm.filter_groups.enable=${enable_filter_groups} \
actor_rollout_ref.model.path=$MODEL_PATH \
+actor_rollout_ref.model.override_config.attention_dropout=0. \
+actor_rollout_ref.model.override_config.embd_pdrop=0. \
......
......@@ -18,12 +18,12 @@ overlong_buffer_len=$((1024 * 4))
overlong_penalty_factor=1.0
# An early version for DAPO
enable_filter_groups=False
enable_filter_groups=True
gen_prompt_bsz=512 # NOTE: no filtering here
train_prompt_bsz=512
train_prompt_mini_bsz=32
n_resp_per_prompt=16
use_token_level_loss=False
use_token_level_loss=True
# Ray
RAY_ADDRESS=${RAY_ADDRESS:-"http://localhost:8265"}
......@@ -69,6 +69,7 @@ python3 -m verl.trainer.main_ppo \
data.val_batch_size=512 \
data.max_prompt_length=2048 \
data.max_response_length=16384 \
algorithm.filter_groups.enable=${enable_filter_groups} \
actor_rollout_ref.model.path=$MODEL_PATH \
+actor_rollout_ref.model.override_config.attention_dropout=0. \
+actor_rollout_ref.model.override_config.embd_pdrop=0. \
......
......@@ -18,12 +18,12 @@ overlong_buffer_len=$((1024 * 4))
overlong_penalty_factor=1.0
# An early version for DAPO
enable_filter_groups=False
enable_filter_groups=True
gen_prompt_bsz=512 # NOTE: no filtering here
train_prompt_bsz=512
train_prompt_mini_bsz=32
n_resp_per_prompt=16
use_token_level_loss=False
use_token_level_loss=True
# Ray
RAY_ADDRESS=${RAY_ADDRESS:-"http://localhost:8265"}
......@@ -77,6 +77,7 @@ python3 -m verl.trainer.main_ppo \
data.val_batch_size=512 \
data.max_prompt_length=2048 \
data.max_response_length=8192 \
algorithm.filter_groups.enable=${enable_filter_groups} \
actor_rollout_ref.model.path=$MODEL_PATH \
+actor_rollout_ref.model.override_config.attention_dropout=0. \
+actor_rollout_ref.model.override_config.embd_pdrop=0. \
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment