Commit c9059c39 by Werner Duvaud

Fix cpu mode

parent a5724559
......@@ -11,12 +11,7 @@ It is designed to be easily adaptable for every games or reinforcement learning
MuZero is a model based reinforcement learning algorithm, successor of AlphaZero. It learns to master games without knowing the rules. It only knows actions and then learn to play and master the game. It is at least more efficient than similar algorithms like [AlphaZero](https://arxiv.org/abs/1712.01815), [SimPLe](https://arxiv.org/abs/1903.00374) and [World Models](https://arxiv.org/abs/1803.10122).
It uses [PyTorch](https://github.com/pytorch/pytorch) and [Ray](https://github.com/ray-project/ray) for running the different components simultaneously. There is a complete GPU support.
There are four components which are classes that run simultaneously in a dedicated thread.
The `shared storage` holds the latest neural network weights, the `self-play` uses those weights to generate self-play games and store them in the `replay buffer`. Finally, those played games are used to `train` a network and store the weights in the shared storage. The circle is complete. See [How it works](https://github.com/werner-duvaud/muzero-general/wiki/How-MuZero-works)
Those components are launched and managed from the MuZero class in `muzero.py` and the structure of the neural network is defined in `models.py`.
It uses [PyTorch](https://github.com/pytorch/pytorch) and [Ray](https://github.com/ray-project/ray) for running the different components simultaneously. GPU training is supported. See [How it works](https://github.com/werner-duvaud/muzero-general/wiki/How-MuZero-works)
All performances are tracked and displayed in real time in tensorboard.
......
import gym
import numpy
import torch
class MuZeroConfig:
......@@ -42,6 +43,7 @@ class MuZeroConfig:
self.window_size = 1000 # Number of self-play games to keep in the replay buffer
self.td_steps = 10 # Number of steps in the futur to take into account for calculating the target value
self.training_delay = 0 # Number of seconds to wait after each training to adjust the self play / training ratio to avoid over/underfitting
self.training_device = "cuda" if torch.cuda.is_available() else "cpu" # Train on GPU if available
self.weight_decay = 1e-4 # L2 weights regularization
self.momentum = 0.9
......@@ -55,6 +57,7 @@ class MuZeroConfig:
### Test
self.test_episodes = 2 # Number of game played to evaluate the network
def visit_softmax_temperature_fn(self, trained_steps):
"""
Parameter to alter the visit count distribution to ensure that the action selection becomes greedier as training progresses.
......
......@@ -36,20 +36,21 @@ class MuZeroConfig:
### Training
self.results_path = "./pretrained" # Path to store the model weights
self.training_steps = 10000 # Total number of training steps (ie weights update according to a batch)
self.training_steps = 20000 # Total number of training steps (ie weights update according to a batch)
self.batch_size = 128 # Number of parts of games to train on at each training step
self.num_unroll_steps = 5 # Number of game moves to keep for every batch element
self.checkpoint_interval = 10 # Number of training steps before using the model for sef-playing
self.window_size = 1000 # Number of self-play games to keep in the replay buffer
self.td_steps = 10 # Number of steps in the futur to take into account for calculating the target value
self.training_delay = 0 # Number of seconds to wait after each training to adjust the self play / training ratio to avoid over/underfitting
self.training_device = "cuda" if torch.cuda.is_available() else "cpu" # Train on GPU if available
self.weight_decay = 1e-4 # L2 weights regularization
self.momentum = 0.9
# Exponential learning rate schedule
self.lr_init = 0.01 # Initial learning rate
self.lr_decay_rate = 0.005
self.lr_decay_rate = 0.001
self.lr_decay_steps = 10000
......
......@@ -44,6 +44,11 @@ class MuZero:
)
)
raise err
try:
os.mkdir(os.path.join(self.config.results_path))
except FileExistsError:
pass
# Fix random generator seed for reproductibility
numpy.random.seed(self.config.seed)
......@@ -65,10 +70,7 @@ class MuZero:
# Initialize workers
training_worker = trainer.Trainer.remote(
copy.deepcopy(self.muzero_weights),
self.config,
# Train on GPU if available
"cuda" if torch.cuda.is_available() else "cpu",
copy.deepcopy(self.muzero_weights), self.config
)
shared_storage_worker = shared_storage.SharedStorage.remote(
copy.deepcopy(self.muzero_weights), self.game_name, self.config,
......@@ -79,12 +81,11 @@ class MuZero:
copy.deepcopy(self.muzero_weights),
self.Game(self.config.seed + seed),
self.config,
"cpu",
)
for seed in range(self.config.num_actors)
]
test_worker = self_play.SelfPlay.remote(
copy.deepcopy(self.muzero_weights), self.Game(), self.config, "cpu",
copy.deepcopy(self.muzero_weights), self.Game(), self.config
)
# Launch workers
......@@ -145,7 +146,7 @@ class MuZero:
print("Testing...")
ray.init()
self_play_workers = self_play.SelfPlay.remote(
copy.deepcopy(self.muzero_weights), self.Game(), self.config, "cpu",
copy.deepcopy(self.muzero_weights), self.Game(), self.config
)
test_rewards = []
with torch.no_grad():
......@@ -169,5 +170,5 @@ if __name__ == "__main__":
muzero = MuZero("cartpole")
muzero.train()
# muzero.load_model()
#muzero.load_model()
muzero.test()
{
"nbformat": 4,
"nbformat_minor": 0,
"metadata": {
"colab": {
"name": "muzero.ipynb",
"provenance": []
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3"
}
},
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Google colab stuffs\n",
"!pip install -r requirements.txt\n",
"!pip uninstall -y pyarrow\n",
"%load_ext tensorboard\n",
"# If you have an import issue with ray in google colab, restart the environment (execution menu)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# You must have the repository imported along with your notebook. \n",
"# For google colab, click on \">\" buttton (left) and import files (muzero.py, self_play.py, ...).\n",
"\n",
"import muzero as mz"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"#Train on cartpole game\n",
"muzero = mz.MuZero(\"cartpole\")\n",
"muzero.train()\n",
"muzero.test()"
]
}
]
}
......@@ -14,7 +14,7 @@ class SelfPlay:
Class which run in a dedicated thread to play games and save them to the replay-buffer.
"""
def __init__(self, initial_weights, game, config, device):
def __init__(self, initial_weights, game, config):
self.config = config
self.game = game
......@@ -26,7 +26,7 @@ class SelfPlay:
self.config.hidden_size,
)
self.model.set_weights(initial_weights)
self.model.to(torch.device(device))
self.model.to(torch.device('cpu'))
self.model.eval()
def continuous_self_play(self, shared_storage, replay_buffer, test_mode=False):
......@@ -272,6 +272,7 @@ class GameHistory:
def store_search_statistics(self, root, action_space):
sum_visits = sum(child.visit_count for child in root.children.values())
# TODO: action could be of any type, not only integers
self.child_visits.append(
[
root.children[a].visit_count / sum_visits if a in root.children else 0
......
......@@ -7,14 +7,14 @@ import torch
import models
@ray.remote(num_gpus=1)
@ray.remote(num_gpus=1 if torch.cuda.is_available() else 0)
class Trainer:
"""
Class which run in a dedicated thread to train a neural network and save it
in the shared storage.
"""
def __init__(self, initial_weights, config, device):
def __init__(self, initial_weights, config):
self.config = config
self.training_step = 0
......@@ -26,7 +26,7 @@ class Trainer:
self.config.hidden_size,
)
self.model.set_weights(initial_weights)
self.model.to(torch.device(device))
self.model.to(torch.device(config.training_device))
self.model.train()
self.optimizer = torch.optim.SGD(
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment