Commit a5724559 by Werner Duvaud

Fix comments

parent c5f6bffc
...@@ -9,7 +9,7 @@ class MuZeroConfig: ...@@ -9,7 +9,7 @@ class MuZeroConfig:
### Game ### Game
self.observation_shape = 4 # Dimensions of the game observation self.observation_shape = 4 # Dimensions of the game observation
self.action_space = [i for i in range(2)] # Fixed list of all possible actions (float between 0 and 1) self.action_space = [i for i in range(2)] # Fixed list of all possible actions
### Self-Play ### Self-Play
...@@ -41,7 +41,7 @@ class MuZeroConfig: ...@@ -41,7 +41,7 @@ class MuZeroConfig:
self.checkpoint_interval = 10 # Number of training steps before using the model for sef-playing self.checkpoint_interval = 10 # Number of training steps before using the model for sef-playing
self.window_size = 1000 # Number of self-play games to keep in the replay buffer self.window_size = 1000 # Number of self-play games to keep in the replay buffer
self.td_steps = 10 # Number of steps in the futur to take into account for calculating the target value self.td_steps = 10 # Number of steps in the futur to take into account for calculating the target value
self.training_delay = 0 # Number of seconds to wait after each played game to adjust the self play / training ratio to avoid over/underfitting self.training_delay = 0 # Number of seconds to wait after each training to adjust the self play / training ratio to avoid over/underfitting
self.weight_decay = 1e-4 # L2 weights regularization self.weight_decay = 1e-4 # L2 weights regularization
self.momentum = 0.9 self.momentum = 0.9
......
...@@ -10,7 +10,7 @@ class MuZeroConfig: ...@@ -10,7 +10,7 @@ class MuZeroConfig:
### Game ### Game
self.observation_shape = 8 # Dimensions of the game observation self.observation_shape = 8 # Dimensions of the game observation
self.action_space = [i for i in range(4)] # Fixed list of all possible actions (float between 0 and 1) self.action_space = [i for i in range(4)] # Fixed list of all possible actions
### Self-Play ### Self-Play
...@@ -42,7 +42,7 @@ class MuZeroConfig: ...@@ -42,7 +42,7 @@ class MuZeroConfig:
self.checkpoint_interval = 10 # Number of training steps before using the model for sef-playing self.checkpoint_interval = 10 # Number of training steps before using the model for sef-playing
self.window_size = 1000 # Number of self-play games to keep in the replay buffer self.window_size = 1000 # Number of self-play games to keep in the replay buffer
self.td_steps = 10 # Number of steps in the futur to take into account for calculating the target value self.td_steps = 10 # Number of steps in the futur to take into account for calculating the target value
self.training_delay = 0 # Number of seconds to wait after each played game to adjust the self play / training ratio to avoid over/underfitting self.training_delay = 0 # Number of seconds to wait after each training to adjust the self play / training ratio to avoid over/underfitting
self.weight_decay = 1e-4 # L2 weights regularization self.weight_decay = 1e-4 # L2 weights regularization
self.momentum = 0.9 self.momentum = 0.9
......
...@@ -64,7 +64,7 @@ class MuZeroNetwork(torch.nn.Module): ...@@ -64,7 +64,7 @@ class MuZeroNetwork(torch.nn.Module):
return self.representation_network(observation) return self.representation_network(observation)
def dynamics(self, encoded_state, action): def dynamics(self, encoded_state, action):
# Stack encoded_state with one hot action (See paper appendix Network Architecture) # Stack encoded_state with one hot action (See paper appendix Network Architecture)
action_one_hot = ( action_one_hot = (
torch.zeros((action.shape[0], self.action_space_size)) torch.zeros((action.shape[0], self.action_space_size))
.to(action.device) .to(action.device)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment