[TEST] Xavie initialization for benchmarks (#54)

* [TEST] Xavie initialization for benchmarks * remove additional line

[TEST] Xavie initialization for benchmarks (#54)
* [TEST] Xavie initialization for benchmarks * remove additional line
5541a275 · Tianqi Chen · 9101181b · 5541a275 · 5541a275 · 5541a275
Commit 5541a275 authored Sep 28, 2017 by Tianqi Chen
Hide whitespace changes
Inline Side-by-side

Showing with 129 additions and 12 deletions

nnvm/python/nnvm/testing/init.py
+110 -0

nnvm/python/nnvm/testing/mobilenet.py
+1 -1

nnvm/python/nnvm/testing/utils.py
+14 -10

nnvm/tutorials/imagenet_inference_gpu.py
+4 -1

No files found.
--- a/nnvm/python/nnvm/testing/init.py
+++ b/nnvm/python/nnvm/testing/init.py
+"""Initializer of parameters."""
+import numpy as np
+class Initializer(object):
+    """The base class of an initializer."""
+    def __init__(self, **kwargs):
+        self._kwargs = kwargs
+    def __call__(self, desc, arr):
+        """Initialize an array
+        Parameters
+        ----------
+        desc : str
+            Initialization pattern descriptor.
+        arr : NDArray
+            The array to be initialized.
+        """
+        if desc.endswith('weight'):
+            self._init_weight(desc, arr)
+        elif desc.endswith('bias'):
+            self._init_bias(desc, arr)
+        elif desc.endswith('gamma'):
+            self._init_gamma(desc, arr)
+        elif desc.endswith('beta'):
+            self._init_beta(desc, arr)
+        elif desc.endswith('mean'):
+            self._init_mean(desc, arr)
+        elif desc.endswith('var'):
+            self._init_var(desc, arr)
+        else:
+            self._init_default(desc, arr)
+    def _init_bias(self, _, arr):
+        arr[:] = 0.0
+    def _init_gamma(self, _, arr):
+        arr[:] = 1.0
+    def _init_beta(self, _, arr):
+        arr[:] = 0.0
+    def _init_mean(self, _, arr):
+        arr[:] = 0.0
+    def _init_var(self, _, arr):
+        arr[:] = 1.0
+    def _init_weight(self, name, arr):
+        """Abstract method to Initialize weight."""
+        raise NotImplementedError("Must override it")
+    def _init_default(self, name, _):
+        raise ValueError(
+            'Unknown initialization pattern for %s. ' \
+            'Default initialization is now limited to '\
+            '"weight", "bias", "gamma" (1.0), and "beta" (0.0).' \
+            'Please use mx.sym.Variable(init=mx.init.*) to set initialization pattern' % name)
+class Xavier(Initializer):
+    """ "Xavier" initialization for weights
+    Parameters
+    ----------
+    rnd_type: str, optional
+        Random generator type, can be ``'gaussian'`` or ``'uniform'``.
+    factor_type: str, optional
+        Can be ``'avg'``, ``'in'``, or ``'out'``.
+    magnitude: float, optional
+        Scale of random number.
+    """
+    def __init__(self, rnd_type="uniform", factor_type="avg", magnitude=3):
+        super(Xavier, self).__init__(rnd_type=rnd_type,
+                                     factor_type=factor_type,
+                                     magnitude=magnitude)
+        self.rnd_type = rnd_type
+        self.factor_type = factor_type
+        self.magnitude = float(magnitude)
+    def _init_weight(self, name, arr):
+        shape = arr.shape
+        hw_scale = 1.
+        if len(shape) < 2:
+            raise ValueError('Xavier initializer cannot be applied to vector {0}. It requires at'
+                             ' least 2D.'.format(name))
+        if len(shape) > 2:
+            hw_scale = np.prod(shape[2:])
+        fan_in, fan_out = shape[1] * hw_scale, shape[0] * hw_scale
+        factor = 1.
+        if self.factor_type == "avg":
+            factor = (fan_in + fan_out) / 2.0
+        elif self.factor_type == "in":
+            factor = fan_in
+        elif self.factor_type == "out":
+            factor = fan_out
+        else:
+            raise ValueError("Incorrect factor type")
+        # Hack for mobilenet, because there is less connectivity
+        if "depthwise" in name:
+            factor = 3 * 3
+        scale = np.sqrt(self.magnitude / factor)
+        if self.rnd_type == "uniform":
+            arr[:] = np.random.uniform(-scale, scale, size=arr.shape)
+        else:
+            raise ValueError("Unknown random type")
--- a/nnvm/python/nnvm/testing/mobilenet.py
+++ b/nnvm/python/nnvm/testing/mobilenet.py
@@ -30,7 +30,7 @@ def separable_conv_block(data, name, depthwise_channels,
    # depthwise convolution + bn + relu
    conv1 = sym.conv2d(data=data, channels=depthwise_channels,
                       groups=depthwise_channels, kernel_size=kernel_size, strides=strides,
-                       padding=padding, use_bias=False, layout="NCHW", name=name + "_conv1")
+                       padding=padding, use_bias=False, layout="NCHW", name=name + "_depthwise_conv1")
    bn1 = sym.batch_norm(data=conv1, epsilon=epsilon, name=name + "_bn1")
    act1 = sym.relu(data=bn1, name=name + "_relu1")
    # pointwise convolution + bn + relu

--- a/nnvm/python/nnvm/testing/utils.py
+++ b/nnvm/python/nnvm/testing/utils.py
@@ -5,9 +5,10 @@ import numpy as np
 import tvm
 from ..compiler import graph_util
 from ..import graph
+from . init import Xavier
+def create_workload(net, batch_size, image_shape=(3, 224, 224),
-def create_workload(net, batch_size, image_shape=(3, 224, 224), dtype="float32"):
+                    dtype="float32", initializer=None, seed=0):
    """Helper function to create benchmark workload for input network
    Parameters
@@ -24,6 +25,12 @@ def create_workload(net, batch_size, image_shape=(3, 224, 224), dtype="float32")
    dtype : str, optional
        The data type
+    initializer : Initializer
+        The initializer used
+    seed : int
+        The seed used in initialization.
    Returns
    -------
    net : nnvm.Symbol
@@ -38,15 +45,12 @@ def create_workload(net, batch_size, image_shape=(3, 224, 224), dtype="float32")
    g = graph.create(net)
    input_shapes, _ = graph_util.infer_shape(g, data=data_shape)
    shape_dict = dict(zip(g.index.input_names, input_shapes))
+    np.random.seed(seed)
+    initializer = initializer if initializer else Xavier(magnitude=3)
    for k, v in shape_dict.items():
        if k == "data":
            continue
-        # Specially generate non-negative parameters.
+        init_value = np.zeros(v).astype(dtype)
-        if k.endswith("gamma"):
+        initializer(k, init_value)
-            init = np.random.uniform(0.9, 1, size=v)
+        params[k] = tvm.nd.array(init_value, ctx=tvm.cpu(0))
-        elif k.endswith("var"):
-            init = np.random.uniform(0.9, 1, size=v)
-        else:
-            init = np.random.uniform(-0.1, 0.1, size=v)
-        params[k] = tvm.nd.array(init.astype(dtype), ctx=tvm.cpu(0))
    return net, params
--- a/nnvm/tutorials/imagenet_inference_gpu.py
+++ b/nnvm/tutorials/imagenet_inference_gpu.py
@@ -8,6 +8,7 @@ This is an example of using NNVM to compile MobileNet/ResNet model and deploy it
 To begin with, we import nnvm(for compilation) and TVM(for deployment).
 """
 import tvm
+import numpy as np
 from tvm.contrib import nvcc, graph_runtime
 import nnvm.compiler
 import nnvm.testing
@@ -64,6 +65,7 @@ net, params = nnvm.testing.mobilenet.get_workload(
 graph, lib, params = nnvm.compiler.build(
    net, target, shape={"data": data_shape}, params=params)
 ######################################################################
 # Run the Compiled Module
 # -----------------------
@@ -74,10 +76,11 @@ graph, lib, params = nnvm.compiler.build(
 # This example runs on the same machine.
 #
 # Note that the code below no longer depends on NNVM, and only relies TVM's runtime to run(deploy).
+data = np.random.uniform(-1, 1, size=data_shape).astype("float32")
 module = graph_runtime.create(graph, lib, ctx)
 # set input
 module.set_input(**params)
+module.set_input("data", data)
 # run
 module.run()
 # get output