self.observation_shape=4# Dimensions of the game observation
self.action_space=[iforiinrange(2)]# Fixed list of all possible actions
self.action_space=[iforiinrange(2)]# Fixed list of all possible actions (float between 0 and 1)
### Self-Play
self.num_actors=10# Number of simultaneous threads self-playing to feed the replay buffer
self.max_moves=500# Maximum number of moves if game is not finished before
self.num_simulations=50# Number of futur moves self-simulated
self.num_simulations=80# Number of futur moves self-simulated
self.discount=0.997# Chronological discount of the reward
self.self_play_delay=None# Number of seconds to wait after each played game to adjust the self play / training ratio to avoid overfitting (Recommended is 13:1 see https://arxiv.org/abs/1902.04522 Appendix A)
self.self_play_delay=0# Number of seconds to wait after each played game to adjust the self play / training ratio to avoid over/underfitting
# Root prior exploration noise
self.root_dirichlet_alpha=0.25
...
...
@@ -25,30 +27,33 @@ class MuZeroConfig:
self.pb_c_base=19652
self.pb_c_init=1.25
### Network
self.encoding_size=64
self.hidden_size=32
self.encoding_size=32
self.hidden_size=64
# Training
### Training
self.results_path="./pretrained"# Path to store the model weights
self.training_steps=2000# Total number of training steps (ie weights update according to a batch)
self.training_steps=10000# Total number of training steps (ie weights update according to a batch)
self.batch_size=128# Number of parts of games to train on at each training step
self.num_unroll_steps=5# Number of game moves to keep for every batch element
self.checkpoint_interval=10# Number of training steps before using the model for sef-playing
self.window_size=1000# Number of self-play games to keep in the replay buffer
self.td_steps=10# Number of steps in the futur to take into account for calculating the target value
self.training_delay=1# Number of seconds to wait after each training to adjust the self play / training ratio to avoid overfitting (Recommended is 13:1 see https://arxiv.org/abs/1902.04522 Appendix A)
self.training_delay=0# Number of seconds to wait after each played game to adjust the self play / training ratio to avoid over/underfitting
self.weight_decay=1e-4# L2 weights regularization
self.momentum=0.9
# Test
self.test_episodes=2# Number of game played to evaluate the network
# Exponential learning rate schedule
self.lr_init=0.0005# Initial learning rate
self.lr_decay_rate=0.1
self.lr_decay_steps=3500
self.lr_init=0.008# Initial learning rate
self.lr_decay_rate=0.01
self.lr_decay_steps=10000
### Test
self.test_episodes=2# Number of game played to evaluate the network
self.observation_shape=8# Dimensions of the game observation
self.action_space=[iforiinrange(4)]# Fixed list of all possible actions
self.action_space=[iforiinrange(4)]# Fixed list of all possible actions (float between 0 and 1)
### Self-Play
self.num_actors=10# Number of simultaneous threads self-playing to feed the replay buffer
self.max_moves=500# Maximum number of moves if game is not finished before
self.num_simulations=50# Number of futur moves self-simulated
self.num_simulations=80# Number of futur moves self-simulated
self.discount=0.997# Chronological discount of the reward
self.self_play_delay=None# Number of seconds to wait after each played game to adjust the self play / training ratio to avoid overfitting (Recommended is 13:1 see https://arxiv.org/abs/1902.04522 Appendix A)
self.self_play_delay=0# Number of seconds to wait after each played game to adjust the self play / training ratio to avoid over/underfitting
# Root prior exploration noise
self.root_dirichlet_alpha=0.25
...
...
@@ -26,30 +28,34 @@ class MuZeroConfig:
self.pb_c_base=19652
self.pb_c_init=1.25
### Network
self.encoding_size=64
self.hidden_size=32
self.encoding_size=32
self.hidden_size=64
# Training
### Training
self.results_path="./pretrained"# Path to store the model weights
self.training_steps=2000# Total number of training steps (ie weights update according to a batch)
self.training_steps=10000# Total number of training steps (ie weights update according to a batch)
self.batch_size=128# Number of parts of games to train on at each training step
self.num_unroll_steps=5# Number of game moves to keep for every batch element
self.checkpoint_interval=10# Number of training steps before using the model for sef-playing
self.window_size=1000# Number of self-play games to keep in the replay buffer
self.td_steps=10# Number of steps in the futur to take into account for calculating the target value
self.training_delay=8# Number of seconds to wait after each training to adjust the self play / training ratio to avoid overfitting (Recommended is 13:1 see https://arxiv.org/abs/1902.04522 Appendix A)
self.training_delay=0# Number of seconds to wait after each played game to adjust the self play / training ratio to avoid over/underfitting
self.weight_decay=1e-4# L2 weights regularization
self.momentum=0.9
# Test
# Exponential learning rate schedule
self.lr_init=0.01# Initial learning rate
self.lr_decay_rate=0.005
self.lr_decay_steps=10000
### Test
self.test_episodes=2# Number of game played to evaluate the network