Fix comments

a5724559 · Werner Duvaud · c5f6bffc · a5724559 · a5724559 · a5724559
Commit a5724559 authored Jan 11, 2020 by Werner Duvaud
Hide whitespace changes
Inline Side-by-side

Showing with 5 additions and 5 deletions

games/cartpole.py
+2 -2

games/lunarlander.py
+2 -2

models.py
+1 -1

No files found.
--- a/games/cartpole.py
+++ b/games/cartpole.py
@@ -9,7 +9,7 @@ class MuZeroConfig:

        ### Game
        self.observation_shape = 4  # Dimensions of the game observation
-        self.action_space = [i for i in range(2)]  # Fixed list of all possible actions (float between 0 and 1)
+        self.action_space = [i for i in range(2)]  # Fixed list of all possible actions


        ### Self-Play
@@ -41,7 +41,7 @@ class MuZeroConfig:
        self.checkpoint_interval = 10  # Number of training steps before using the model for sef-playing
        self.window_size = 1000  # Number of self-play games to keep in the replay buffer
        self.td_steps = 10  # Number of steps in the futur to take into account for calculating the target value
-        self.training_delay = 0 # Number of seconds to wait after each played game to adjust the self play / training ratio to avoid over/underfitting
+        self.training_delay = 0 # Number of seconds to wait after each training to adjust the self play / training ratio to avoid over/underfitting

        self.weight_decay = 1e-4  # L2 weights regularization
        self.momentum = 0.9

--- a/games/lunarlander.py
+++ b/games/lunarlander.py
@@ -10,7 +10,7 @@ class MuZeroConfig:

        ### Game
        self.observation_shape = 8  # Dimensions of the game observation
-        self.action_space = [i for i in range(4)]  # Fixed list of all possible actions (float between 0 and 1)
+        self.action_space = [i for i in range(4)]  # Fixed list of all possible actions


        ### Self-Play
@@ -42,7 +42,7 @@ class MuZeroConfig:
        self.checkpoint_interval = 10  # Number of training steps before using the model for sef-playing
        self.window_size = 1000  # Number of self-play games to keep in the replay buffer
        self.td_steps = 10  # Number of steps in the futur to take into account for calculating the target value
-        self.training_delay = 0 # Number of seconds to wait after each played game to adjust the self play / training ratio to avoid over/underfitting
+        self.training_delay = 0 # Number of seconds to wait after each training to adjust the self play / training ratio to avoid over/underfitting

        self.weight_decay = 1e-4  # L2 weights regularization
        self.momentum = 0.9

--- a/models.py
+++ b/models.py
@@ -64,7 +64,7 @@ class MuZeroNetwork(torch.nn.Module):
        return self.representation_network(observation)

    def dynamics(self, encoded_state, action):
-        # Stack encoded_state with one hot action (See paper appendix  Network Architecture)
+        # Stack encoded_state with one hot action (See paper appendix Network Architecture)
        action_one_hot = (
            torch.zeros((action.shape[0], self.action_space_size))
            .to(action.device)