Commit f34e1744 by yuruofeifei Committed by Tianqi Chen

[GRADIENT] Register more gradient operators (#300)

* Add conv2d max_pool backward op

* Added tests

* Fix testing

* Address comments

* Change dot to matmul

* Address comments

* Break down indicator function

* Make greater, less numpy compatible
parent 4bd92a4a
...@@ -28,6 +28,7 @@ This level enables fully connected multi-layer perceptron. ...@@ -28,6 +28,7 @@ This level enables fully connected multi-layer perceptron.
:nosignatures: :nosignatures:
nnvm.symbol.dense nnvm.symbol.dense
nnvm.symbol.matmul
nnvm.symbol.relu nnvm.symbol.relu
nnvm.symbol.tanh nnvm.symbol.tanh
nnvm.symbol.sigmoid nnvm.symbol.sigmoid
...@@ -38,6 +39,7 @@ This level enables fully connected multi-layer perceptron. ...@@ -38,6 +39,7 @@ This level enables fully connected multi-layer perceptron.
nnvm.symbol.elemwise_sub nnvm.symbol.elemwise_sub
nnvm.symbol.elemwise_mul nnvm.symbol.elemwise_mul
nnvm.symbol.elemwise_div nnvm.symbol.elemwise_div
nnvm.symbol.elemwise_sum
nnvm.symbol.full nnvm.symbol.full
nnvm.symbol.full_like nnvm.symbol.full_like
nnvm.symbol.ones nnvm.symbol.ones
...@@ -54,6 +56,8 @@ This level enables fully connected multi-layer perceptron. ...@@ -54,6 +56,8 @@ This level enables fully connected multi-layer perceptron.
nnvm.symbol.softmax nnvm.symbol.softmax
nnvm.symbol.log_softmax nnvm.symbol.log_softmax
nnvm.symbol.pad nnvm.symbol.pad
nnvm.symbol.block_grad
nnvm.symbol.indicator
**Level 2: Convolutions** **Level 2: Convolutions**
...@@ -77,6 +81,8 @@ This level enables typical convnet models. ...@@ -77,6 +81,8 @@ This level enables typical convnet models.
:nosignatures: :nosignatures:
nnvm.symbol.reshape nnvm.symbol.reshape
nnvm.symbol.reshape_like
nnvm.symbol.expand_like
nnvm.symbol.copy nnvm.symbol.copy
nnvm.symbol.negative nnvm.symbol.negative
nnvm.symbol.leaky_relu nnvm.symbol.leaky_relu
...@@ -107,6 +113,7 @@ This level enables typical convnet models. ...@@ -107,6 +113,7 @@ This level enables typical convnet models.
Detailed Definitions Detailed Definitions
-------------------- --------------------
.. autofunction:: nnvm.symbol.dense .. autofunction:: nnvm.symbol.dense
.. autofunction:: nnvm.symbol.matmul
.. autofunction:: nnvm.symbol.relu .. autofunction:: nnvm.symbol.relu
.. autofunction:: nnvm.symbol.tanh .. autofunction:: nnvm.symbol.tanh
.. autofunction:: nnvm.symbol.sigmoid .. autofunction:: nnvm.symbol.sigmoid
...@@ -117,6 +124,7 @@ Detailed Definitions ...@@ -117,6 +124,7 @@ Detailed Definitions
.. autofunction:: nnvm.symbol.elemwise_sub .. autofunction:: nnvm.symbol.elemwise_sub
.. autofunction:: nnvm.symbol.elemwise_mul .. autofunction:: nnvm.symbol.elemwise_mul
.. autofunction:: nnvm.symbol.elemwise_div .. autofunction:: nnvm.symbol.elemwise_div
.. autofunction:: nnvm.symbol.elemwise_sum
.. autofunction:: nnvm.symbol.full .. autofunction:: nnvm.symbol.full
.. autofunction:: nnvm.symbol.full_like .. autofunction:: nnvm.symbol.full_like
.. autofunction:: nnvm.symbol.ones .. autofunction:: nnvm.symbol.ones
...@@ -133,6 +141,8 @@ Detailed Definitions ...@@ -133,6 +141,8 @@ Detailed Definitions
.. autofunction:: nnvm.symbol.softmax .. autofunction:: nnvm.symbol.softmax
.. autofunction:: nnvm.symbol.log_softmax .. autofunction:: nnvm.symbol.log_softmax
.. autofunction:: nnvm.symbol.pad .. autofunction:: nnvm.symbol.pad
.. autofunction:: nnvm.symbol.block_grad
.. autofunction:: nnvm.symbol.indicator
.. autofunction:: nnvm.symbol.conv2d .. autofunction:: nnvm.symbol.conv2d
.. autofunction:: nnvm.symbol.conv2d_transpose .. autofunction:: nnvm.symbol.conv2d_transpose
...@@ -142,6 +152,8 @@ Detailed Definitions ...@@ -142,6 +152,8 @@ Detailed Definitions
.. autofunction:: nnvm.symbol.global_avg_pool2d .. autofunction:: nnvm.symbol.global_avg_pool2d
.. autofunction:: nnvm.symbol.reshape .. autofunction:: nnvm.symbol.reshape
.. autofunction:: nnvm.symbol.reshape_like
.. autofunction:: nnvm.symbol.expand_like
.. autofunction:: nnvm.symbol.copy .. autofunction:: nnvm.symbol.copy
.. autofunction:: nnvm.symbol.negative .. autofunction:: nnvm.symbol.negative
.. autofunction:: nnvm.symbol.leaky_relu .. autofunction:: nnvm.symbol.leaky_relu
......
...@@ -62,6 +62,13 @@ enum TypeFlag { ...@@ -62,6 +62,13 @@ enum TypeFlag {
kUint64 = 10, kUint64 = 10,
}; };
enum IndicatorRuleFlag {
kGT0 = 0,
kLT0 = 1,
kMax = 2,
kMin = 3,
};
#define DMLC_DECLARE_DTYPE_FIELD(name) \ #define DMLC_DECLARE_DTYPE_FIELD(name) \
DMLC_DECLARE_FIELD(name) \ DMLC_DECLARE_FIELD(name) \
.add_enum("float16", kFloat16) \ .add_enum("float16", kFloat16) \
...@@ -84,6 +91,28 @@ struct CastParam : public dmlc::Parameter<CastParam> { ...@@ -84,6 +91,28 @@ struct CastParam : public dmlc::Parameter<CastParam> {
} }
}; };
struct IndicatorParam : public dmlc::Parameter<IndicatorParam> {
TShape axis;
bool exclude;
DMLC_DECLARE_PARAMETER(IndicatorParam) {
DMLC_DECLARE_FIELD(axis).set_default(TShape())
.describe(R"code(The axis or axes along which to perform the indicator rule.
The default, `axis=()`, will compute over all elements into a
scalar array with shape `(1,)`.
If `axis` is int, rule is applied on a particular axis.
If `axis` is a tuple of ints, rule is applied on all the axes
specified in the tuple.
If `exclude` is true, rule will be applied on the axes that are
NOT in axis instead.)code");
DMLC_DECLARE_FIELD(exclude).set_default(false)
.describe("Whether to apply rule on axis that are NOT in axis instead.");
}
};
struct ReshapeParam : public dmlc::Parameter<ReshapeParam> { struct ReshapeParam : public dmlc::Parameter<ReshapeParam> {
Tuple<int64_t> shape; Tuple<int64_t> shape;
...@@ -97,8 +126,7 @@ struct SqueezeParam : public dmlc::Parameter<SqueezeParam> { ...@@ -97,8 +126,7 @@ struct SqueezeParam : public dmlc::Parameter<SqueezeParam> {
DMLC_DECLARE_PARAMETER(SqueezeParam) { DMLC_DECLARE_PARAMETER(SqueezeParam) {
DMLC_DECLARE_FIELD(axis).set_default(TShape()) DMLC_DECLARE_FIELD(axis).set_default(TShape())
.describe("The axis to squeeze in the input tensor." .describe("The axis to squeeze in the input tensor.");
" If set to None, all size=1 axes will be squeezed");
} }
}; };
...@@ -110,6 +138,15 @@ struct ScalarParam : public dmlc::Parameter<ScalarParam> { ...@@ -110,6 +138,15 @@ struct ScalarParam : public dmlc::Parameter<ScalarParam> {
} }
}; };
struct FillValueParam : public dmlc::Parameter<FillValueParam> {
double fill_value;
DMLC_DECLARE_PARAMETER(FillValueParam) {
DMLC_DECLARE_FIELD(fill_value)
.describe("Scalar value to be filled");
}
};
struct TransposeParam : public dmlc::Parameter<TransposeParam> { struct TransposeParam : public dmlc::Parameter<TransposeParam> {
TShape axes; TShape axes;
...@@ -158,16 +195,49 @@ struct ReduceParam : public dmlc::Parameter<ReduceParam> { ...@@ -158,16 +195,49 @@ struct ReduceParam : public dmlc::Parameter<ReduceParam> {
} }
}; };
struct InitOpWithScalarParam : public dmlc::Parameter<InitOpWithScalarParam> {
TShape shape;
int dtype;
double fill_value;
DMLC_DECLARE_PARAMETER(InitOpWithScalarParam) {
DMLC_DECLARE_FIELD(shape).set_default(TShape());
DMLC_DECLARE_DTYPE_FIELD(dtype).set_default(kFloat32)
.describe("Target data type.");
DMLC_DECLARE_FIELD(fill_value).describe("Scalar value to fill");
}
};
struct InitOpParam : public dmlc::Parameter<InitOpParam> { struct InitOpParam : public dmlc::Parameter<InitOpParam> {
TShape shape; TShape shape;
int dtype; int dtype;
double value;
DMLC_DECLARE_PARAMETER(InitOpParam) { DMLC_DECLARE_PARAMETER(InitOpParam) {
DMLC_DECLARE_FIELD(shape).set_default(TShape()); DMLC_DECLARE_FIELD(shape).set_default(TShape());
DMLC_DECLARE_DTYPE_FIELD(dtype).set_default(kFloat32) DMLC_DECLARE_DTYPE_FIELD(dtype).set_default(kFloat32)
.describe("Target data type."); .describe("Target data type.");
DMLC_DECLARE_FIELD(value).describe("Value to fill"); }
};
struct ElementWiseReduceParam : public dmlc::Parameter<ElementWiseReduceParam> {
int num_args;
DMLC_DECLARE_PARAMETER(ElementWiseReduceParam) {
DMLC_DECLARE_FIELD(num_args).set_lower_bound(1)
.describe("Number of inputs to be reduced.");
}
};
struct MatMulParam : public dmlc::Parameter<MatMulParam> {
bool transpose_a;
bool transpose_b;
DMLC_DECLARE_PARAMETER(MatMulParam) {
DMLC_DECLARE_FIELD(transpose_a)
.describe("If true then transpose the first input before dot.")
.set_default(false);
DMLC_DECLARE_FIELD(transpose_b)
.describe("If true then transpose the second input before dot.")
.set_default(false);
} }
}; };
......
...@@ -188,7 +188,7 @@ def build(graph, target=None, shape=None, dtype="float32", params=None, target_h ...@@ -188,7 +188,7 @@ def build(graph, target=None, shape=None, dtype="float32", params=None, target_h
The input types to the graph The input types to the graph
params : dict of str to NDArray params : dict of str to NDArray
Input parameetrs to the graph that do not change Input parameters to the graph that do not change
during inference time. Used for pre-compute during inference time. Used for pre-compute
folding optimization. folding optimization.
......
...@@ -5,6 +5,9 @@ from __future__ import absolute_import as _abs ...@@ -5,6 +5,9 @@ from __future__ import absolute_import as _abs
import tvm import tvm
from . import graph_attr from . import graph_attr
from ..graph import create
from ..symbol import Group, ones_like
def infer_shape(graph, **shape): def infer_shape(graph, **shape):
"""Infer the shape given the shape of inputs. """Infer the shape given the shape of inputs.
...@@ -89,3 +92,57 @@ def check_graph_equal(grapha, graphb, compare_variable_attrs=False): ...@@ -89,3 +92,57 @@ def check_graph_equal(grapha, graphb, compare_variable_attrs=False):
err = _deep_compare(grapha, graphb, compare_variable_attrs) err = _deep_compare(grapha, graphb, compare_variable_attrs)
if err: if err:
raise ValueError("Graph compare error: " + err) raise ValueError("Graph compare error: " + err)
def get_gradient_graph(ys, xs, grad_ys=None):
"""Create gradient graph of ys with respect to xs.
Parameters
----------
ys : Symbol or list of Symbol
Symbols from which the gradient is calculated.
xs : Symbol or list of Symbol
Symbols the gradient respect to.
For group symbol, gradients for all outputs will be calculated.
grad_ys : Symbol or list of Symbol
Head gradients for ys.
Returns
-------
ret : Graph
Generated gradient graph.
"""
if isinstance(ys, list):
ys = Group(ys)
g = create(ys)
g._set_symbol_list_attr('grad_ys', ys)
g._set_symbol_list_attr('grad_xs', xs)
ny = len(ys.list_output_names())
if grad_ys is None:
grad_ys = [ones_like(ys[i]) for i in range(ny)]
g._set_symbol_list_attr('grad_ys_out_grad', grad_ys)
return g.apply('Gradient')
def gradients(ys, xs, grad_ys=None):
"""Create gradient symbol of ys respect to xs.
Parameters
----------
ys : Symbol or list of Symbol
Symbols from which the gradient is calculated.
xs : Symbol or list of Symbol
Symbols the gradient respect to.
For group symbol, gradients for all outputs will be calculated.
grad_ys : Symbol or list of Symbol
Head gradients for ys.
Returns
-------
ret : list of Symbol
Generated gradient symbol. For each xs,
all gradients from ys are merged into a single symbol.
"""
grad_g = get_gradient_graph(ys, xs, grad_ys)
nx = len(Group(xs).list_output_names()) \
if isinstance(xs, list) else len(xs.list_output_names())
ret = [grad_g.symbol[i] for i in range(nx)]
return ret
...@@ -13,7 +13,6 @@ from ._base import c_array, c_str, nn_uint, py_str, string_types ...@@ -13,7 +13,6 @@ from ._base import c_array, c_str, nn_uint, py_str, string_types
from ._base import GraphHandle, SymbolHandle from ._base import GraphHandle, SymbolHandle
from ._base import check_call from ._base import check_call
from .symbol import Variable, Symbol, Group as _Group from .symbol import Variable, Symbol, Group as _Group
from .symbol import ones_like
class GraphIndex(object): class GraphIndex(object):
"""Index for quickly accessing graph attributes. """Index for quickly accessing graph attributes.
...@@ -271,38 +270,3 @@ def create(symbol): ...@@ -271,38 +270,3 @@ def create(symbol):
check_call(_LIB.NNGraphCreate( check_call(_LIB.NNGraphCreate(
symbol.handle, ctypes.byref(ghandle))) symbol.handle, ctypes.byref(ghandle)))
return Graph(ghandle) return Graph(ghandle)
def gradients(ys, xs, grad_ys=None):
"""Create gradient symbol of ys respect to xs.
Parameters
----------
ys : Symbol or list of Symbol
Symbols from which the gradient is calculated.
xs : Symbol or list of Symbol
Symbols the gradient respect to.
For group symbol, gradients for all outputs will be calculated.
grad_ys : Symbol or list of Symbol
Head gradients for ys.
Returns
-------
ret : list of Symbol
Generated gradient symbol. For each xs,
all gradients from ys are merged into a single symbol.
"""
if isinstance(ys, list):
ys = _Group(ys)
g = create(ys)
g._set_symbol_list_attr('grad_ys', ys)
g._set_symbol_list_attr('grad_xs', xs)
ny = len(ys.list_output_names())
if grad_ys is None:
grad_ys = [ones_like(ys[i]) for i in range(ny)]
g._set_symbol_list_attr('grad_ys_out_grad', grad_ys)
sym = g.apply('Gradient').symbol
nx = len(_Group(xs).list_output_names()) \
if isinstance(xs, list) else len(xs.list_output_names())
ret = [sym[i] for i in range(nx)]
return ret
...@@ -14,18 +14,23 @@ namespace pass { ...@@ -14,18 +14,23 @@ namespace pass {
namespace { namespace {
// default aggregate gradient function // default aggregate gradient function
// require operator __zero__ and __ewise_sum__ to be presented. // require operator zeros and elemwise_sum to be presented.
NodeEntry DefaultAggregateGradient(std::vector<NodeEntry>&& v) { NodeEntry DefaultAggregateGradient(std::vector<NodeEntry>&& v) {
if (v.size() == 1) { if (v.size() == 1) {
return std::move(v[0]); return std::move(v[0]);
} else if (v.size() == 0) { } else if (v.size() == 0) {
NodePtr zero_node = Node::Create(); NodePtr zero_node = Node::Create();
zero_node->attrs.op = Op::Get("_zeros"); zero_node->attrs.op = Op::Get("zeros");
zero_node->attrs.name = "zero_grad";
zero_node->attrs.op->attr_parser(&(zero_node->attrs));
return NodeEntry{zero_node, 0, 0}; return NodeEntry{zero_node, 0, 0};
} else { } else {
NodePtr sum_node = Node::Create(); NodePtr sum_node = Node::Create();
sum_node->attrs.op = Op::Get("elemwise_sum"); sum_node->attrs.op = Op::Get("elemwise_sum");
sum_node->inputs = std::move(v); sum_node->inputs = std::move(v);
sum_node->attrs.name = "grad_sum";
sum_node->attrs.dict["num_args"] = std::to_string(sum_node->inputs.size());
sum_node->attrs.op->attr_parser(&(sum_node->attrs));
return NodeEntry{sum_node, 0, 0}; return NodeEntry{sum_node, 0, 0};
} }
} }
......
...@@ -84,6 +84,22 @@ inline bool ElemwiseType(const NodeAttrs& attrs, ...@@ -84,6 +84,22 @@ inline bool ElemwiseType(const NodeAttrs& attrs,
attrs, in_attrs, out_attrs, -1); attrs, in_attrs, out_attrs, -1);
} }
inline bool ElementWiseReduceShape(const NodeAttrs& attrs,
std::vector<TShape> *in_attrs,
std::vector<TShape> *out_attrs) {
CHECK_EQ(out_attrs->size(), 1);
return ElemwiseAttr<TShape, shape_is_none, shape_assign, true, shape_string>(
attrs, in_attrs, out_attrs, TShape());
}
inline bool ElementWiseReduceType(const NodeAttrs& attrs,
std::vector<int> *in_attrs,
std::vector<int> *out_attrs) {
CHECK_EQ(out_attrs->size(), 1);
return ElemwiseAttr<int, type_is_none, type_assign, true, type_string>(
attrs, in_attrs, out_attrs, -1);
}
#define NNVM_REGISTER_ELEMWISE_UNARY_OP(name) \ #define NNVM_REGISTER_ELEMWISE_UNARY_OP(name) \
NNVM_REGISTER_OP(name) \ NNVM_REGISTER_OP(name) \
.set_num_inputs(1) \ .set_num_inputs(1) \
...@@ -100,11 +116,13 @@ inline bool ElemwiseType(const NodeAttrs& attrs, ...@@ -100,11 +116,13 @@ inline bool ElemwiseType(const NodeAttrs& attrs,
#define NNVM_REGISTER_INIT_OP(name) \ #define NNVM_REGISTER_INIT_OP(name) \
NNVM_REGISTER_OP(name) \ NNVM_REGISTER_OP(name) \
.set_num_inputs(0) \ .set_num_inputs(0) \
.set_num_outputs(1) \ .set_num_outputs(1)
.set_attr_parser(ParamParser<InitOpParam>) \
.add_arguments(InitOpParam::__FIELDS__()) \
.set_attr<FInferShape>("FInferShape", ZeroShape) \ #define NNVM_REGISTER_INIT_LIKE_OP(name) \
.set_attr<FInferType>("FInferType", ZeroType) NNVM_REGISTER_ELEMWISE_UNARY_OP(name) \
.set_attr<FGradient>("FGradient", MakeZeroGradNodes) \
.add_argument("data", "Symbol", "The input")
#define NNVM_REGISTER_ELEMWISE_BINARY_OP(name) \ #define NNVM_REGISTER_ELEMWISE_BINARY_OP(name) \
...@@ -120,6 +138,41 @@ inline bool ElemwiseType(const NodeAttrs& attrs, ...@@ -120,6 +138,41 @@ inline bool ElemwiseType(const NodeAttrs& attrs,
.add_argument("lhs", "Tensor", "first input") \ .add_argument("lhs", "Tensor", "first input") \
.add_argument("rhs", "Tensor", "second input") .add_argument("rhs", "Tensor", "second input")
#define NNVM_REGISTER_ELEMWISE_REDUCE_OP(name) \
NNVM_REGISTER_OP(name) \
.set_num_inputs([](const NodeAttrs& attrs) { \
return static_cast<uint32_t>( \
dmlc::get<ElementWiseReduceParam>(attrs.parsed).num_args); \
}) \
.set_attr_parser(ParamParser<ElementWiseReduceParam>) \
.set_attr<FGetAttrDict>("FGetAttrDict", \
ParamGetAttrDict<ElementWiseReduceParam>) \
.set_attr<nnvm::FInferShape>("FInferShape", \
ElementWiseReduceShape) \
.set_attr<nnvm::FInferType>("FInferType", ElementWiseReduceType) \
.add_argument("args", "Symbol[]", "Positional input arguments")
#define NNVM_REGISTER_INDICATOR_OP(name) \
NNVM_REGISTER_OP(name) \
.set_num_outputs(1) \
.set_attr<FInferType>( \
"FInferType", [](const NodeAttrs& attrs, \
std::vector<int>* in_attrs, \
std::vector<int>* out_attrs) { \
CHECK_EQ(out_attrs->size(), 1U); \
NNVM_ASSIGN_OUTPUT_TYPE(attrs, *out_attrs, 0, \
static_cast<int>(kFloat32)); \
return true; \
}) \
.set_attr<FGradient>( \
"FGradient", [](const NodePtr& n, \
const std::vector<NodeEntry>& ograds) { \
return MakeZeroGradNodes(n, ograds); \
})
} // namespace top } // namespace top
} // namespace nnvm } // namespace nnvm
#endif // NNVM_TOP_ELEMWISE_OP_COMMON_H_ #endif // NNVM_TOP_ELEMWISE_OP_COMMON_H_
...@@ -120,7 +120,42 @@ a bias vector is created and added to the outputs. ...@@ -120,7 +120,42 @@ a bias vector is created and added to the outputs.
.set_attr<FInferType>("FInferType", ElemwiseType<-1, 1>) .set_attr<FInferType>("FInferType", ElemwiseType<-1, 1>)
.set_num_outputs(1) .set_num_outputs(1)
.set_num_inputs(UseBiasNumInputs<Conv2DParam>) .set_num_inputs(UseBiasNumInputs<Conv2DParam>)
.set_support_level(2); .set_support_level(2)
.set_attr<FGradient>(
"FGradient", [](const NodePtr& n,
const std::vector<NodeEntry>& ograds) {
return MakeGradNode("_conv2d_grad", n,
{ograds[0], n->inputs[Conv2DParam::kData],
n->inputs[Conv2DParam::kWeight]},
n->attrs.dict);
});
NNVM_REGISTER_OP(_conv2d_grad)
.describe(R"code(2D convolution grad.
)code" NNVM_ADD_FILELINE)
.add_argument("ograd", "4D Tensor", "Output grad.")
.add_argument("data", "4D Tensor", "Input data of conv2d.")
.add_argument("weight", "4D Tensor", "Input weight.")
.set_num_inputs(3)
.set_num_outputs(UseBiasNumInputs<Conv2DParam>)
.set_attr<FListOutputNames>("FListOutputNames", UseBiasListInputNames<Conv2DParam>)
.set_attr_parser(ParamParser<Conv2DParam>)
.set_attr<FGetAttrDict>("FGetAttrDict", ParamGetAttrDict<Conv2DParam>)
.set_attr<FInferShape>(
"FInferShape", [](const nnvm::NodeAttrs& attrs,
std::vector<TShape>* in_attrs,
std::vector<TShape>* out_attrs) {
const Conv2DParam& param = nnvm::get<Conv2DParam>(attrs.parsed);
NNVM_ASSIGN_OUTPUT_SHAPE(attrs, *out_attrs, Conv2DParam::kData, in_attrs->at(1));
NNVM_ASSIGN_OUTPUT_SHAPE(attrs, *out_attrs, Conv2DParam::kWeight, in_attrs->at(2));
if (param.use_bias) {
NNVM_ASSIGN_OUTPUT_SHAPE(attrs, *out_attrs, Conv2DParam::kBias, TShape({param.channels}));
}
return true;
})
.set_attr<FInferType>("FInferType", ElemwiseType<3, -1>)
.set_attr<TIsBackward>("TIsBackward", true);
DMLC_REGISTER_PARAMETER(Conv2DTransposeParam); DMLC_REGISTER_PARAMETER(Conv2DTransposeParam);
......
...@@ -54,7 +54,7 @@ NNVM_REGISTER_OP(dense) ...@@ -54,7 +54,7 @@ NNVM_REGISTER_OP(dense)
- **data**: `(x1, x2, ..., xn, input_dim)` - **data**: `(x1, x2, ..., xn, input_dim)`
- **weight**: `(units, input_dim)` - **weight**: `(units, input_dim)`
- **bias**: `(units,)` - **bias**: `(units,)`
- **out**: `(x1, x2, ..., xn, num_hidden)` - **out**: `(x1, x2, ..., xn, units)`
The learnable parameters include both ``weight`` and ``bias``. The learnable parameters include both ``weight`` and ``bias``.
...@@ -72,6 +72,34 @@ If ``use_bias`` is set to be false, then the ``bias`` term is ignored. ...@@ -72,6 +72,34 @@ If ``use_bias`` is set to be false, then the ``bias`` term is ignored.
.set_attr<FListInputNames>("FListInputNames", UseBiasListInputNames<DenseParam>) .set_attr<FListInputNames>("FListInputNames", UseBiasListInputNames<DenseParam>)
.set_attr<FInferShape>("FInferShape", DenseInferShape) .set_attr<FInferShape>("FInferShape", DenseInferShape)
.set_attr<FInferType>("FInferType", ElemwiseType<-1, 1>) .set_attr<FInferType>("FInferType", ElemwiseType<-1, 1>)
.set_attr<FGradient>(
"FGradient", [](const NodePtr& n,
const std::vector<NodeEntry>& ograds) {
const DenseParam& param = nnvm::get<DenseParam>(n->attrs.parsed);
NodeEntry data_grad = MakeNode("matmul",
n->attrs.name + "_data_grad",
{ograds[0], n->inputs[DenseParam::kWeight]});
NodeEntry w_grad_sub = MakeNode("matmul",
n->attrs.name + "_weight_grad_sub0",
{ograds[0], n->inputs[DenseParam::kData]},
{{"transpose_a", "true"}});
TShape w_reduce_axis = {0, -1};
std::ostringstream w_oss; w_oss << w_reduce_axis;
NodeEntry w_grad = MakeNode("sum", n->attrs.name + "_weight_grad",
{w_grad_sub},
{{"axis", w_oss.str()}, {"exclude", "true"}});
std::vector<NodeEntry> grads = {data_grad, w_grad};
if (param.use_bias) {
TShape axis = {-1};
std::ostringstream b_oss; b_oss << axis;
grads.push_back(MakeNode("sum", n->attrs.name + "_bias_grad",
{ograds[0]},
{{"axis", b_oss.str()}, {"exclude", "true"}}));
}
return grads;
})
.set_support_level(1); .set_support_level(1);
// relu // relu
...@@ -82,6 +110,18 @@ NNVM_REGISTER_ELEMWISE_UNARY_OP(relu) ...@@ -82,6 +110,18 @@ NNVM_REGISTER_ELEMWISE_UNARY_OP(relu)
max(input, 0) max(input, 0)
)code" NNVM_ADD_FILELINE) )code" NNVM_ADD_FILELINE)
.set_attr<FGradient>(
"FGradient", [](const NodePtr& n,
const std::vector<NodeEntry>& ograds) {
// y = relu(x)
// grad = indicator(x > 0)
NodeEntry zero = MakeNode("zeros_like", n->attrs.name + "_grad_zero",
{n->inputs[0]});
return std::vector<NodeEntry>{
MakeNode("greater", n->attrs.name + "_grad",
{n->inputs[0], zero}, {{"exclude", "true"}})
};
})
.set_support_level(1); .set_support_level(1);
// dropout // dropout
...@@ -217,7 +257,37 @@ NNVM_REGISTER_OP(softmax) ...@@ -217,7 +257,37 @@ NNVM_REGISTER_OP(softmax)
.set_num_outputs(1) .set_num_outputs(1)
.set_attr<FInferShape>("FInferShape", ElemwiseShape<1, 1>) .set_attr<FInferShape>("FInferShape", ElemwiseShape<1, 1>)
.set_attr<FInferType>("FInferType", ElemwiseType<1, 1>) .set_attr<FInferType>("FInferType", ElemwiseType<1, 1>)
.set_support_level(1); .set_support_level(1)
.set_attr<FGradient>(
"FGradient", [](const NodePtr& n,
const std::vector<NodeEntry>& ograds) {
// grad_x = grad_y dot jacobian of softmax
//
// jacobian of softmax
// [-y1y1 + y1, -y1y2, ... ]
// [ ... , -y2y2 + y2, ... ]
// [ ... ... ]
// [ ... ,-ynyn + yn]
//
// grad_x =
// [-y1*(ograd1*y1 - 1 + ograd2*y2 + ..., -y2*(ograd1*y1 - 1 + ograd2*y2, ..., ...]]
// grad_x = ograd elemwise_mul output
// grad_x = sum(grad_x, keepdim, axis)
// grad_x = grad_x broadcast_mul output
// grad_x = neg grad_x
// grad_x = grad_x + output
const SoftmaxParam& param = nnvm::get<SoftmaxParam>(n->attrs.parsed);
NodeEntry output = NodeEntry{n, 0, 0};
NodeEntry sub0 = MakeNode("elemwise_mul", n->attrs.name + "_grad_sub0", {ograds[0], output});
NodeEntry sub1 = MakeNode("sum", n->attrs.name + "_grad_sub1", {sub0},
{{"axis", std::to_string(param.axis)}, {"keepdims", "true"}});
NodeEntry sub2 = MakeNode("broadcast_mul", n->attrs.name + "_grad_sub2", {sub1, output});
NodeEntry sub3 = MakeNode("negative", n->attrs.name + "_grad_sub3", {sub2});
return std::vector<NodeEntry> {
MakeNode("elemwise_add", n->attrs.name + "_grad", {sub3, output})
};
});
// log_softmax // log_softmax
NNVM_REGISTER_OP(log_softmax) NNVM_REGISTER_OP(log_softmax)
...@@ -236,6 +306,38 @@ NNVM_REGISTER_OP(log_softmax) ...@@ -236,6 +306,38 @@ NNVM_REGISTER_OP(log_softmax)
.set_num_outputs(1) .set_num_outputs(1)
.set_attr<FInferShape>("FInferShape", ElemwiseShape<1, 1>) .set_attr<FInferShape>("FInferShape", ElemwiseShape<1, 1>)
.set_attr<FInferType>("FInferType", ElemwiseType<1, 1>) .set_attr<FInferType>("FInferType", ElemwiseType<1, 1>)
.set_attr<FGradient>(
"FGradient", [](const NodePtr& n,
const std::vector<NodeEntry>& ograds) {
// grad_x = grad_y dot jacobian of softmax
//
// jacobian of softmax
// [-y1 + 1, -y2, ... ]
// [ ... , -y2 + 1, ... ]
// [ ... ... ]
// [ ... ,-yn + 1]
//
// grad_x =
// [-(ograd1*y1 - 1 + ograd2*y2 + ..., -(ograd1*y1 - 1 + ograd2*y2, ..., ...]]
// grad_x = ograd elemwise_mul output
// grad_x = sum(grad_x, keepdim, axis)
// grad_x = neg grad_x
// grad_x = grad_x + ones_like(grad_x)
// grad_x = expand_dims(grad_x, axis)
const SoftmaxParam& param = nnvm::get<SoftmaxParam>(n->attrs.parsed);
NodeEntry output = NodeEntry{n, 0, 0};
NodeEntry sub0 = MakeNode("elemwise_mul", n->attrs.name + "_grad_sub0", {ograds[0], output});
NodeEntry sub1 = MakeNode("sum", n->attrs.name + "_grad_sub1", {sub0},
{{"axis", std::to_string(param.axis)}, {"keepdims", "true"}});
NodeEntry sub2 = MakeNode("negative", n->attrs.name + "_grad_sub2", {sub1});
NodeEntry sub3 = MakeNode("ones_like", n->attrs.name + "_grad_sub3", {sub2});
NodeEntry sub4 = MakeNode("elemwise_add", n->attrs.name + "_grad_sub4", {sub2, sub3});
return std::vector<NodeEntry> {
MakeNode("expand_like", n->attrs.name + "_grad", {sub4, output},
{{"axis", std::to_string(param.axis)}})
};
})
.set_support_level(1); .set_support_level(1);
// leaky_rlu // leaky_rlu
...@@ -255,6 +357,25 @@ NNVM_REGISTER_OP(leaky_relu) ...@@ -255,6 +357,25 @@ NNVM_REGISTER_OP(leaky_relu)
.set_num_outputs(1) .set_num_outputs(1)
.set_attr<FInferShape>("FInferShape", ElemwiseShape<1, 1>) .set_attr<FInferShape>("FInferShape", ElemwiseShape<1, 1>)
.set_attr<FInferType>("FInferType", ElemwiseType<1, 1>) .set_attr<FInferType>("FInferType", ElemwiseType<1, 1>)
.set_attr<FGradient>(
"FGradient", [](const NodePtr& n,
const std::vector<NodeEntry>& ograds) {
// y = leak_relu(x)
// grad = indicator(x > 0) + alpha * indicator(x < 0)
const LeakyReLUParam& param = nnvm::get<LeakyReLUParam>(n->attrs.parsed);
NodeEntry zero = MakeNode("zeros_like", n->attrs.name + "_grad_zero",
{n->inputs[0]});
NodeEntry sub0 = MakeNode("greater", n->attrs.name + "_pos_grad",
{n->inputs[0], zero}, {{"exclude", "true"}});
NodeEntry sub1 = MakeNode("less", n->attrs.name + "_neg_grad",
{n->inputs[0], zero}, {{"exclude", "true"}});
NodeEntry sub2 = MakeNode("__mul_scalar__", n->attrs.name + "_neg_mul_2",
{sub1},
{{"scalar", std::to_string(param.alpha)}});
return std::vector<NodeEntry>{
MakeNode("elemwise_add", n->attrs.name + "_add_grad", {sub0, sub2})
};
})
.set_support_level(1); .set_support_level(1);
......
...@@ -77,8 +77,30 @@ NNVM_REGISTER_OP(max_pool2d) ...@@ -77,8 +77,30 @@ NNVM_REGISTER_OP(max_pool2d)
.set_num_inputs(1) .set_num_inputs(1)
.set_attr<FInferShape>("FInferShape", Pool2DInferShape) .set_attr<FInferShape>("FInferShape", Pool2DInferShape)
.set_attr<FInferType>("FInferType", ElemwiseType<1, 1>) .set_attr<FInferType>("FInferType", ElemwiseType<1, 1>)
.set_attr<FGradient>(
"FGradient", [](const NodePtr& n,
const std::vector<NodeEntry>& ograds) {
return MakeGradNode("_max_pool2d_grad", n,
{ograds[0], n->inputs[0], NodeEntry{n, 0, 0}},
n->attrs.dict);
})
.set_support_level(2); .set_support_level(2);
NNVM_REGISTER_OP(_max_pool2d_grad)
.describe(R"code(Max pooling 2D grad.
)code" NNVM_ADD_FILELINE)
.add_argument("ograd", "4D Tensor", "Output grad.")
.add_argument("input", "4D Tensor", "Input data of max_pool2d grad.")
.add_argument("output", "4D Tensor", "Output data of max_pool2d grad.")
.set_num_inputs(3)
.set_num_outputs(1)
.set_attr_parser(ParamParser<Pool2DParam>)
.set_attr<FGetAttrDict>("FGetAttrDict", ParamGetAttrDict<Pool2DParam>)
.set_attr<FInferShape>("FInferShape", AssignOutputAttr<TShape, 1, 0>)
.set_attr<FInferType>("FInferType", ElemwiseType<3, 1>)
.set_attr<TIsBackward>("TIsBackward", true);
NNVM_REGISTER_OP(avg_pool2d) NNVM_REGISTER_OP(avg_pool2d)
.describe(R"code(Average pooling operation for one dimensional data. .describe(R"code(Average pooling operation for one dimensional data.
......
...@@ -144,7 +144,7 @@ inline std::string attr_assign_error_msg(const NodeAttrs& attrs, ...@@ -144,7 +144,7 @@ inline std::string attr_assign_error_msg(const NodeAttrs& attrs,
} }
/*! /*!
* \brief macro assign shape to out if out is unknown otherwise check consistency * \brief macro assign shape to input if out is unknown otherwise check consistency
* Use macro so we can see the error file more clearly * Use macro so we can see the error file more clearly
* \param inputs the shape array to store the result * \param inputs the shape array to store the result
* \param index the index of in the array * \param index the index of in the array
...@@ -240,10 +240,11 @@ inline bool SameShape(const NodeAttrs& attrs, ...@@ -240,10 +240,11 @@ inline bool SameShape(const NodeAttrs& attrs,
} }
// return shape from node attrs // return shape from node attrs
template<typename PType>
inline bool ZeroShape(const NodeAttrs& attrs, inline bool ZeroShape(const NodeAttrs& attrs,
std::vector<TShape> *ishape, std::vector<TShape> *ishape,
std::vector<TShape> *oshape) { std::vector<TShape> *oshape) {
const TShape& ts = dmlc::get<InitOpParam>(attrs.parsed).shape; const TShape& ts = dmlc::get<PType>(attrs.parsed).shape;
if (ts.ndim() != 0) { if (ts.ndim() != 0) {
SHAPE_ASSIGN(oshape->at(0), ts); SHAPE_ASSIGN(oshape->at(0), ts);
return true; return true;
...@@ -252,15 +253,63 @@ inline bool ZeroShape(const NodeAttrs& attrs, ...@@ -252,15 +253,63 @@ inline bool ZeroShape(const NodeAttrs& attrs,
} }
} }
// simply assign output shape or type from input
template<typename AttrType, int in_index, int out_index>
inline bool AssignOutputAttr(const NodeAttrs& attrs,
std::vector<AttrType> *in_attrs,
std::vector<AttrType> *out_attrs) {
CHECK_LT(in_index, in_attrs->size());
CHECK_LT(out_index, out_attrs->size());
const TShape &dshape = in_attrs->at(in_index);
NNVM_ASSIGN_OUTPUT_SHAPE(attrs, *out_attrs, out_index, dshape);
return true;
}
// return type from node attrs // return type from node attrs
template<typename PType>
inline bool ZeroType(const NodeAttrs& attrs, inline bool ZeroType(const NodeAttrs& attrs,
std::vector<int> *iattr, std::vector<int> *iattr,
std::vector<int> *oattr) { std::vector<int> *oattr) {
int dtype = dmlc::get<InitOpParam>(attrs.parsed).dtype; int dtype = dmlc::get<PType>(attrs.parsed).dtype;
DTYPE_ASSIGN(oattr->at(0), dtype); DTYPE_ASSIGN(oattr->at(0), dtype);
return true; return true;
} }
// Make zero grad node
inline std::vector<NodeEntry> MakeZeroGradNodes(
const NodePtr& n,
const std::vector<NodeEntry>& ograds) {
std::vector<NodeEntry> ret;
for (uint32_t i = 0; i < n->num_inputs(); ++i) {
std::ostringstream os;
ret.push_back(MakeNode("zeros_like", n->attrs.name + "_zero_grad",
{n->inputs[i]}));
}
return ret;
}
// Helper to make gradient node
inline std::vector<NodeEntry> MakeGradNode(
const char* op_name,
const NodePtr& n,
std::vector<NodeEntry> inputs,
std::unordered_map<std::string, std::string> attr = {}) {
NodePtr p = Node::Create();
p->attrs.op = nnvm::Op::Get(op_name);
p->attrs.name = n->attrs.name + "_grad";
p->inputs = std::move(inputs);
p->attrs.dict = std::move(attr);
if (p->attrs.op->attr_parser) {
p->attrs.op->attr_parser(&p->attrs);
}
std::vector<NodeEntry> ret;
for (uint32_t i = 0; i < p->num_outputs(); ++i) {
ret.emplace_back(NodeEntry{p, i, 0});
}
return ret;
}
} // namespace top } // namespace top
} // namespace nnvm } // namespace nnvm
......
...@@ -241,73 +241,70 @@ NNVM_REGISTER_ELEMWISE_UNARY_OP(copy) ...@@ -241,73 +241,70 @@ NNVM_REGISTER_ELEMWISE_UNARY_OP(copy)
}); });
DMLC_REGISTER_PARAMETER(InitOpParam); DMLC_REGISTER_PARAMETER(InitOpParam);
DMLC_REGISTER_PARAMETER(InitOpWithScalarParam);
DMLC_REGISTER_PARAMETER(FillValueParam);
// full // full
NNVM_REGISTER_INIT_OP(full) NNVM_REGISTER_INIT_OP(full)
.describe(R"code(Fill array with scalar value .describe(R"code(Fill array with scalar value
)code" NNVM_ADD_FILELINE) )code" NNVM_ADD_FILELINE)
.set_attr_parser(ParamParser<InitOpWithScalarParam>)
.set_attr<FGetAttrDict>(
"FGetAttrDict", ParamGetAttrDict<InitOpWithScalarParam>)
.add_arguments(InitOpWithScalarParam::__FIELDS__())
.set_attr<FInferShape>("FInferShape", ZeroShape<InitOpWithScalarParam>)
.set_attr<FInferType>("FInferType", ZeroType<InitOpWithScalarParam>)
.set_support_level(1); .set_support_level(1);
NNVM_REGISTER_INIT_OP(zeros) NNVM_REGISTER_INIT_OP(zeros)
.describe(R"code(Fill target with zeros .describe(R"code(Fill target with zeros
)code" NNVM_ADD_FILELINE) )code" NNVM_ADD_FILELINE)
.set_attr_parser(ParamParser<InitOpParam>)
.set_attr<FGetAttrDict>(
"FGetAttrDict", ParamGetAttrDict<InitOpParam>)
.add_arguments(InitOpParam::__FIELDS__())
.set_attr<FInferShape>("FInferShape", ZeroShape<InitOpParam>)
.set_attr<FInferType>("FInferType", ZeroType<InitOpParam>)
.set_support_level(1); .set_support_level(1);
NNVM_REGISTER_INIT_OP(ones) NNVM_REGISTER_INIT_OP(ones)
.describe(R"code(Fill target with ones .describe(R"code(Fill target with ones
)code" NNVM_ADD_FILELINE) )code" NNVM_ADD_FILELINE)
.set_attr_parser(ParamParser<InitOpParam>)
.set_attr<FGetAttrDict>(
"FGetAttrDict", ParamGetAttrDict<InitOpParam>)
.add_arguments(InitOpParam::__FIELDS__())
.set_attr<FInferShape>("FInferShape", ZeroShape<InitOpParam>)
.set_attr<FInferType>("FInferType", ZeroType<InitOpParam>)
.set_support_level(1); .set_support_level(1);
// full_like // full_like
NNVM_REGISTER_ELEMWISE_UNARY_OP(full_like) NNVM_REGISTER_INIT_LIKE_OP(full_like)
.describe(R"code(Return an scalar value array with the same shape and type .describe(R"code(Return an scalar value array with the same shape and type
as the input array as the input array
)code" NNVM_ADD_FILELINE) )code" NNVM_ADD_FILELINE)
.set_support_level(1) .add_arguments(FillValueParam::__FIELDS__())
.add_arguments(InitOpParam::__FIELDS__()) .set_attr_parser(ParamParser<FillValueParam>)
.set_attr_parser(ParamParser<InitOpParam>) .set_attr<FGetAttrDict>("FGetAttrDict", ParamGetAttrDict<FillValueParam>)
.set_attr<FGradient>( .set_support_level(1);
"FGradient", [](const NodePtr& n,
const std::vector<NodeEntry>& ograds){
return std::vector<NodeEntry>{
MakeNode("zeros_like", n->attrs.name + "_grad",
{n->inputs[0]})
};
});
NNVM_REGISTER_ELEMWISE_UNARY_OP(zeros_like) NNVM_REGISTER_INIT_LIKE_OP(zeros_like)
.describe(R"code(Return an array of zeros with the same shape and type .describe(R"code(Return an array of zeros with the same shape and type
as the input array. as the input array.
)code") )code")
.add_argument("data", "Symbol", "The input") .set_support_level(1);
.set_attr<FGradient>(
"FGradient", [](const NodePtr& n,
const std::vector<NodeEntry>& ograds){
return std::vector<NodeEntry>{
MakeNode("zeros_like", n->attrs.name + "_grad",
{n->inputs[0]})
};
});
NNVM_REGISTER_ELEMWISE_UNARY_OP(ones_like) NNVM_REGISTER_INIT_LIKE_OP(ones_like)
.describe(R"code(Return an array of ones with the same shape and type .describe(R"code(Return an array of ones with the same shape and type
as the input array. as the input array.
)code") )code")
.add_argument("data", "Symbol", "The input") .set_support_level(1);
.set_attr<FGradient>(
"FGradient", [](const NodePtr& n,
const std::vector<NodeEntry>& ograds){
return std::vector<NodeEntry>{
MakeNode("zeros_like", n->attrs.name + "_grad",
{n->inputs[0]})
};
});
// unary scalar op // unary scalar op
DMLC_REGISTER_PARAMETER(ScalarParam); DMLC_REGISTER_PARAMETER(ScalarParam);
...@@ -452,64 +449,84 @@ NNVM_REGISTER_ELEMWISE_BINARY_SCALAR(__rpow_scalar__) ...@@ -452,64 +449,84 @@ NNVM_REGISTER_ELEMWISE_BINARY_SCALAR(__rpow_scalar__)
}; };
}); });
DMLC_REGISTER_PARAMETER(ElementWiseReduceParam);
struct ElementWiseSumParam : public dmlc::Parameter<ElementWiseSumParam> { NNVM_REGISTER_ELEMWISE_REDUCE_OP(elemwise_sum)
int num_args;
DMLC_DECLARE_PARAMETER(ElementWiseSumParam) {
DMLC_DECLARE_FIELD(num_args).set_lower_bound(1)
.describe("Number of inputs to be summed.");
}
};
DMLC_REGISTER_PARAMETER(ElementWiseSumParam);
bool ElementWiseSumShape(const NodeAttrs& attrs,
std::vector<TShape> *in_attrs,
std::vector<TShape> *out_attrs) {
CHECK_EQ(out_attrs->size(), 1);
return ElemwiseAttr<TShape, shape_is_none, shape_assign, true, shape_string>(
attrs, in_attrs, out_attrs, TShape());
}
bool ElementWiseSumType(const NodeAttrs& attrs,
std::vector<int> *in_attrs,
std::vector<int> *out_attrs) {
CHECK_EQ(out_attrs->size(), 1);
return ElemwiseAttr<int, type_is_none, type_assign, true, type_string>(
attrs, in_attrs, out_attrs, -1);
}
std::vector<NodeEntry> ElementWiseSumGrad(
const NodePtr& n,
const std::vector<NodeEntry>& ograds) {
// identity constraints in the beginning for easier shape inference.
const Op* copy_op = Op::Get("identity");
CHECK_EQ(ograds.size(), 1);
std::vector<NodeEntry> ret;
NodeEntry n_out{n, 0, 0};
for (size_t i = 0; i < n->inputs.size(); i++) {
NodePtr id_node = Node::Create();
id_node->attrs.op = copy_op;
id_node->inputs = {ograds[0]};
ret.push_back(NodeEntry{id_node, 0, 0});
}
return ret;
}
NNVM_REGISTER_OP(elemwise_sum)
.describe(R"code(Adds all input arguments element-wise. .describe(R"code(Adds all input arguments element-wise.
)code" NNVM_ADD_FILELINE) )code" NNVM_ADD_FILELINE)
.set_attr_parser(ParamParser<ElementWiseSumParam>) .set_attr<nnvm::FGradient>(
.set_num_inputs([](const NodeAttrs& attrs) { "FGradient", [](const NodePtr& n,
uint32_t ret = dmlc::get<ElementWiseSumParam>(attrs.parsed).num_args; const std::vector<NodeEntry>& ograds){
return ret; CHECK_EQ(ograds.size(), 1);
std::vector<NodeEntry> ret;
for (size_t i = 0; i < n->inputs.size(); i++) {
ret.push_back(ograds[0]);
}
return ret;
});
NNVM_REGISTER_ELEMWISE_UNARY_OP(block_grad)
.describe(R"code(Blocks gradient computation for input.
)code" NNVM_ADD_FILELINE)
.set_attr<nnvm::FInplaceIdentity>(
"FInplaceIdentity", [](const NodeAttrs& attrs){
return std::vector<bool>{true};
}) })
.set_attr<nnvm::FInferShape>("FInferShape", ElementWiseSumShape) .set_attr<nnvm::FGradient>("FGradient", MakeZeroGradNodes);
.set_attr<nnvm::FInferType>("FInferType", ElementWiseSumType)
.set_attr<nnvm::FGradient>("FGradient", ElementWiseSumGrad) DMLC_REGISTER_PARAMETER(IndicatorParam);
.add_argument("args", "Symbol[]", "Positional input arguments");
// indicator function
NNVM_REGISTER_INDICATOR_OP(greater)
.describe(R"code(Greater function that returns a mask tensor
with 1.0 if (left > right), otherwise 0.0 element-wise.
)code" NNVM_ADD_FILELINE)
.add_argument("lhs", "Tensor", "First input")
.add_argument("rhs", "Tensor", "Second input")
.set_num_inputs(2)
.set_attr<nnvm::FInferShape>("FInferShape", ElemwiseShape<2, 1>)
.set_support_level(1);
NNVM_REGISTER_INDICATOR_OP(less)
.describe(R"code(Less function that returns a mask tensor
with 1.0 if (left < right), otherwise 0.0 element-wise.
)code" NNVM_ADD_FILELINE)
.add_argument("lhs", "Tensor", "First input")
.add_argument("rhs", "Tensor", "Second input")
.set_num_inputs(2)
.set_attr<nnvm::FInferShape>("FInferShape", ElemwiseShape<2, 1>)
.set_support_level(1);
NNVM_REGISTER_INDICATOR_OP(_max_mask)
.describe(R"code(Function that returns a mask tensor
with 1.0 if the value is maximum over given axes, otherwise 0.0 element-wise.
)code" NNVM_ADD_FILELINE)
.add_argument("data", "Tensor", "Input")
.set_num_inputs(1)
.add_arguments(IndicatorParam::__FIELDS__())
.set_attr_parser(ParamParser<IndicatorParam>)
.set_attr<FGetAttrDict>("FGetAttrDict", ParamGetAttrDict<IndicatorParam>)
.set_attr<nnvm::FInferShape>("FInferShape", ElemwiseShape<1, 1>)
.set_support_level(1);
NNVM_REGISTER_INDICATOR_OP(_min_mask)
.describe(R"code(Function that returns a mask tensor
with 1.0 if the value is minimum over given axes, otherwise 0.0 element-wise.
)code" NNVM_ADD_FILELINE)
.add_argument("data", "Tensor", "Input")
.set_num_inputs(1)
.add_arguments(IndicatorParam::__FIELDS__())
.set_attr_parser(ParamParser<IndicatorParam>)
.set_attr<FGetAttrDict>("FGetAttrDict", ParamGetAttrDict<IndicatorParam>)
.set_attr<nnvm::FInferShape>("FInferShape", ElemwiseShape<1, 1>)
.set_support_level(1);
} // namespace top } // namespace top
} // namespace nnvm } // namespace nnvm
/*!
* Copyright (c) 2017 by Contributors
* \file matrix_op.cc
* \brief Matrix operators
*/
#include <nnvm/op.h>
#include <nnvm/node.h>
#include <nnvm/op_attr_types.h>
#include <nnvm/top/tensor.h>
#include "../op_common.h"
#include "../elemwise_op_common.h"
namespace nnvm {
namespace top {
DMLC_REGISTER_PARAMETER(MatMulParam);
inline bool DotShape(const nnvm::NodeAttrs& attrs,
std::vector<TShape> *in_attrs,
std::vector<TShape> *out_attrs) {
const MatMulParam& param = nnvm::get<MatMulParam>(attrs.parsed);
CHECK_EQ(in_attrs->size(), 2U);
CHECK_EQ(out_attrs->size(), 1U);
TShape lshape = (*in_attrs)[0];
TShape rshape = (*in_attrs)[1];
if (lshape.ndim() == 1) lshape = TShape{1, lshape[0]};
if (rshape.ndim() == 1) rshape = TShape{1, rshape[0]};
if (param.transpose_a) std::reverse(lshape.begin(), lshape.end());
if (param.transpose_b) std::reverse(rshape.begin(), rshape.end());
CHECK_EQ(lshape[lshape.ndim() - 1], rshape[0])
<< "dot shape inconsistent: " << lshape << " X " << rshape;
TShape oshape(lshape.ndim() + rshape.ndim() - 1);
for (int i = 0; i < lshape.ndim() - 1; i++) oshape[i] = lshape[i];
for (int i = 1; i < rshape.ndim(); i++) oshape[i + lshape.ndim() - 1] = rshape[i];
NNVM_ASSIGN_OUTPUT_SHAPE(attrs, *out_attrs, 0, oshape);
return true;
}
NNVM_REGISTER_OP(matmul)
.describe(R"doc(Matrix multiplication of two arrays.
``dot``'s behavior depends on the input array dimensions:
- 1-D arrays: inner product of vectors
- 2-D arrays: matrix multiplication
- N-D arrays: a sum product over the last axis of the first input and the first
axis of the second input
For example, given 3-D ``x`` with shape `(n,m,k)` and ``y`` with shape `(k,r,s)`, the
result array will have shape `(n,m,r,s)`. It is computed by::
dot(x,y) = sum(x[i,j,:]*y[:,a,b])
)doc" NNVM_ADD_FILELINE)
.set_support_level(1)
.set_num_inputs(2)
.set_num_outputs(1)
.set_attr_parser(ParamParser<MatMulParam>)
.set_attr<FGetAttrDict>("FGetAttrDict", ParamGetAttrDict<MatMulParam>)
.add_arguments(MatMulParam::__FIELDS__())
.add_argument("lhs", "NDArray-or-Symbol", "The first input")
.add_argument("rhs", "NDArray-or-Symbol", "The second input")
.set_attr<FInferShape>("FInferShape", DotShape)
.set_attr<FInferType>("FInferType", ElemwiseType<2, 1>)
.set_attr<FGradient>(
"FGradient", [](const NodePtr& n,
const std::vector<NodeEntry>& ograds) {
// z = x dot y
// xshape (n,m,k), yshape (k,r,s)
const MatMulParam& param = nnvm::get<MatMulParam>(n->attrs.parsed);
bool Ta = param.transpose_a;
bool Tb = param.transpose_b;
// Ta = false, Tb = false
// grad_x = grad_z dot y.T
// grad_y = x.T dot grad_z
if (!Ta && !Tb) {
return std::vector<NodeEntry>{
MakeNode("matmul", n->attrs.name + "_grad_0",
{ograds[0], n->inputs[1]},
{{"transpose_a", "false"},
{"transpose_b", "true"}}),
MakeNode("matmul", n->attrs.name + "_grad_1",
{n->inputs[0], ograds[0]},
{{"transpose_a", "true"},
{"transpose_b", "false"}})
};
} else if (Ta && !Tb) {
// Ta = true, Tb = false
// grad_x = y dot grad_z.T
// grad_y = x dot grad_z
return std::vector<NodeEntry>{
MakeNode("matmul", n->attrs.name + "_grad_0",
{n->inputs[1], ograds[0]},
{{"transpose_a", "false"},
{"transpose_b", "true"}}),
MakeNode("matmul", n->attrs.name + "_grad_1",
{n->inputs[0], ograds[0]},
{{"transpose_a", "false"},
{"transpose_b", "false"}})
};
} else if (!Ta && Tb) {
// Ta = false, Tb = true
// grad_x = grad_z dot y
// grad_y = grad_z.T dot x
return std::vector<NodeEntry>{
MakeNode("matmul", n->attrs.name + "_grad_0",
{ograds[0], n->inputs[1]},
{{"transpose_a", "false"},
{"transpose_b", "false"}}),
MakeNode("matmul", n->attrs.name + "_grad_1",
{ograds[0], n->inputs[0]},
{{"transpose_a", "true"},
{"transpose_b", "false"}})
};
} else {
// Ta = true, Tb = true
// grad_x = y.T dot grad_z.T
// grad_y = grad_z.T dot x.T
return std::vector<NodeEntry>{
MakeNode("matmul", n->attrs.name + "_grad_0",
{n->inputs[1], ograds[0]},
{{"transpose_a", "true"},
{"transpose_b", "true"}}),
MakeNode("matmul", n->attrs.name + "_grad_1",
{ograds[0], n->inputs[0]},
{{"transpose_a", "true"},
{"transpose_b", "true"}})
};
}
});
} // namespace top
} // namespace nnvm
...@@ -31,11 +31,19 @@ inline TShape ReduceShapeImpl(const TShape& ishape, ...@@ -31,11 +31,19 @@ inline TShape ReduceShapeImpl(const TShape& ishape,
<< "Reduction axis " << axis[axis.ndim() - 1] << "Reduction axis " << axis[axis.ndim() - 1]
<< " Exceeds input dimensions " << ishape; << " Exceeds input dimensions " << ishape;
TShape in_axis = axis;
for (auto& i : in_axis) {
i = i < 0 ? i + ishape.ndim(): i;
CHECK_GE(i, 0) << "axis out of bounds in reduce operator";
CHECK_LT(i, ishape.ndim()) << "axis out of bounds in reduce operator";
}
std::sort(in_axis.begin(), in_axis.end());
if (keepdims) { if (keepdims) {
TShape oshape(ishape); TShape oshape(ishape);
if (exclude) { if (exclude) {
for (dim_t i = 0, j = 0; i < ishape.ndim(); ++i) { for (dim_t i = 0, j = 0; i < ishape.ndim(); ++i) {
if (j < axis.ndim() && i == axis[j]) { if (j < in_axis.ndim() && i == in_axis[j]) {
++j; ++j;
continue; continue;
} }
...@@ -44,22 +52,22 @@ inline TShape ReduceShapeImpl(const TShape& ishape, ...@@ -44,22 +52,22 @@ inline TShape ReduceShapeImpl(const TShape& ishape,
return oshape; return oshape;
} }
for (dim_t i = 0; i < axis.ndim(); ++i) { for (dim_t i = 0; i < in_axis.ndim(); ++i) {
oshape[axis[i]] = 1; oshape[in_axis[i]] = 1;
} }
return oshape; return oshape;
} }
if (exclude) { if (exclude) {
TShape oshape = TShape(axis.ndim()); TShape oshape = TShape(in_axis.ndim());
for (dim_t i = 0; i < axis.ndim(); ++i) { for (dim_t i = 0; i < in_axis.ndim(); ++i) {
oshape[i] = ishape[axis[i]]; oshape[i] = ishape[in_axis[i]];
} }
return oshape; return oshape;
} }
TShape oshape = TShape(std::max<dim_t>(1, ishape.ndim() - axis.ndim())); TShape oshape = TShape(std::max<dim_t>(1, ishape.ndim() - in_axis.ndim()));
for (dim_t i = 0, j = 0, k = 0; i < ishape.ndim(); ++i) { for (dim_t i = 0, j = 0, k = 0; i < ishape.ndim(); ++i) {
if (j < axis.ndim() && i == axis[j]) { if (j < in_axis.ndim() && i == in_axis[j]) {
++j; ++j;
continue; continue;
} }
...@@ -99,9 +107,7 @@ inline void AxesParamParser(nnvm::NodeAttrs* attrs) { ...@@ -99,9 +107,7 @@ inline void AxesParamParser(nnvm::NodeAttrs* attrs) {
.set_attr<FInferShape>("FInferShape", ReduceShape) \ .set_attr<FInferShape>("FInferShape", ReduceShape) \
.set_attr<FInferType>("FInferType", ElemwiseType<1, 1>) \ .set_attr<FInferType>("FInferType", ElemwiseType<1, 1>) \
.set_num_inputs(1) \ .set_num_inputs(1) \
.set_num_outputs(1) \ .set_num_outputs(1)
NNVM_REGISTER_REDUCE_OP(sum) NNVM_REGISTER_REDUCE_OP(sum)
.describe(R"code(Computes the sum of array elements over given axes. .describe(R"code(Computes the sum of array elements over given axes.
...@@ -120,17 +126,66 @@ Example:: ...@@ -120,17 +126,66 @@ Example::
sum(data, axis=[1,2]) sum(data, axis=[1,2])
[ 12. 19. 27.] [ 12. 19. 27.]
)code" NNVM_ADD_FILELINE); )code" NNVM_ADD_FILELINE)
.set_attr<FGradient>(
"FGradient", [](const NodePtr& n,
const std::vector<NodeEntry>& ograds){
const ReduceParam& param = nnvm::get<ReduceParam>(n->attrs.parsed);
std::ostringstream axis; axis << param.axis;
return std::vector<NodeEntry>{
MakeNode("expand_like", n->attrs.name + "_grad",
{ograds[0], n->inputs[0]},
{{"axis", axis.str()},
{"keepdims", std::to_string(param.keepdims)},
{"exclude", std::to_string(param.exclude)}})
};
});
NNVM_REGISTER_REDUCE_OP(max) NNVM_REGISTER_REDUCE_OP(max)
.describe(R"code(Computes the max of array elements over given axes. .describe(R"code(Computes the max of array elements over given axes.
)code" NNVM_ADD_FILELINE); )code" NNVM_ADD_FILELINE)
.set_attr<FGradient>(
"FGradient", [](const NodePtr& n,
const std::vector<NodeEntry>& ograds){
const ReduceParam& param = nnvm::get<ReduceParam>(n->attrs.parsed);
std::ostringstream axis; axis << param.axis;
NodeEntry sub0 = MakeNode("expand_like", n->attrs.name + "_grad_sub0",
{ograds[0], n->inputs[0]},
{{"axis", axis.str()},
{"keepdims", std::to_string(param.keepdims)},
{"exclude", std::to_string(param.exclude)}});
NodeEntry sub1 = MakeNode("_max_mask", n->attrs.name + "_grad_sub1",
{ograds[0]},
{{"axis", axis.str()},
{"exclude", std::to_string(param.exclude)}});
return std::vector<NodeEntry>{
MakeNode("elemwise_mul", n->attrs.name + "_grad", {sub0, sub1})
};
});
NNVM_REGISTER_REDUCE_OP(min) NNVM_REGISTER_REDUCE_OP(min)
.describe(R"code(Computes the min of array elements over given axes. .describe(R"code(Computes the min of array elements over given axes.
)code" NNVM_ADD_FILELINE); )code" NNVM_ADD_FILELINE)
.set_attr<FGradient>(
"FGradient", [](const NodePtr& n,
const std::vector<NodeEntry>& ograds){
const ReduceParam& param = nnvm::get<ReduceParam>(n->attrs.parsed);
std::ostringstream axis; axis << param.axis;
NodeEntry sub0 = MakeNode("expand_like", n->attrs.name + "_grad_sub0",
{ograds[0], n->inputs[0]},
{{"axis", axis.str()},
{"keepdims", std::to_string(param.keepdims)},
{"exclude", std::to_string(param.exclude)}});
NodeEntry sub1 = MakeNode("_min_mask", n->attrs.name + "_grad_sub1",
{ograds[0]},
{{"axis", axis.str()},
{"exclude", std::to_string(param.exclude)}});
return std::vector<NodeEntry>{
MakeNode("elemwise_mul", n->attrs.name + "_grad", {sub0, sub1})
};
});
} // namespace top } // namespace top
......
...@@ -44,7 +44,7 @@ Example:: ...@@ -44,7 +44,7 @@ Example::
[4,5,6], [4,5,6],
[7,8,9] [7,8,9]
], ],
[ [1,2,3], [ [1,2,3],
[4,5,6], [4,5,6],
[7,8,9] [7,8,9]
]], ]],
...@@ -58,6 +58,12 @@ Example:: ...@@ -58,6 +58,12 @@ Example::
.set_attr<FInferShape>("FInferShape", FlattenInferShape) .set_attr<FInferShape>("FInferShape", FlattenInferShape)
.set_attr<FInferType>("FInferType", ElemwiseType<1, 1>) .set_attr<FInferType>("FInferType", ElemwiseType<1, 1>)
.add_argument("data", "Tensor", "Input data.") .add_argument("data", "Tensor", "Input data.")
.set_attr<FGradient>(
"FGradient", [](const NodePtr& n,
const std::vector<NodeEntry>& ograds){
return MakeGradNode("reshape_like", n,
{ograds[0], n->inputs[0]});
})
.set_support_level(1); .set_support_level(1);
// concatenate // concatenate
...@@ -172,8 +178,8 @@ inline bool ExpandDimsInferShape(const NodeAttrs& attrs, ...@@ -172,8 +178,8 @@ inline bool ExpandDimsInferShape(const NodeAttrs& attrs,
NNVM_REGISTER_OP(expand_dims) NNVM_REGISTER_OP(expand_dims)
.describe(R"code(Inserts a new axis of size 1 into the array shape .describe(R"code(Inserts a new axis of size 1 into the array shape
For example, given ``x`` with shape ``(2,3,4)``, then ``expand_dims(x, axis=1)`` For example, given ``x`` with shape ``(2,3,4)``, then ``expand_dims(x, axis=1, num_newaxis=5)``
will return a new array with shape ``(2,1,3,4)``. will return a new array with shape ``(2,5,3,4)``.
)code" NNVM_ADD_FILELINE) )code" NNVM_ADD_FILELINE)
.add_argument("data", "Tensor", "Input tensor") .add_argument("data", "Tensor", "Input tensor")
...@@ -184,6 +190,61 @@ will return a new array with shape ``(2,1,3,4)``. ...@@ -184,6 +190,61 @@ will return a new array with shape ``(2,1,3,4)``.
.set_attr<FInferType>("FInferType", ElemwiseType<1, 1>) .set_attr<FInferType>("FInferType", ElemwiseType<1, 1>)
.set_num_inputs(1) .set_num_inputs(1)
.set_num_outputs(1) .set_num_outputs(1)
.set_attr<FGradient>(
"FGradient", [](const NodePtr& n,
const std::vector<NodeEntry>& ograds){
const ExpandDimsParam& param = nnvm::get<ExpandDimsParam>(n->attrs.parsed);
return std::vector<NodeEntry> {
MakeNode("sum", n->attrs.name + "_grad", {ograds[0]},
{{"axis", std::to_string(param.axis)}})
};
})
.set_support_level(1);
NNVM_REGISTER_OP(expand_like)
.describe(R"code(Expand an input array with the shape of second array.
This operation can always be composed of unsqueezing and expanding dims.
Examples::
input = [ 12. 19. 27.]
input.shape = (3,)
new_shape_array = [[[1,2],[2,3],[1,3]],
[[1,4],[4,3],[5,2]],
[[7,1],[7,2],[7,3]]]
new_shape_array.shape = (3, 3, 2)
expand_like(input, [1,2], new_shape_array) =
[[[12,12],[12,12],[12,12]],
[[19,19],[19,19],[19,19]],
[[27,27],[27,27],[27,27]]]
)code" NNVM_ADD_FILELINE)
.add_argument("input", "Tensor", "Source input")
.add_argument("shape_like", "Tensor", "Input with new shape")
.add_arguments(ReduceParam::__FIELDS__())
.set_attr_parser(ParamParser<ReduceParam>)
.set_attr<FGetAttrDict>("FGetAttrDict", ParamGetAttrDict<ReduceParam>)
.set_attr<nnvm::FInferShape>("FInferShape", AssignOutputAttr<TShape, 1, 0>)
.set_attr<nnvm::FInferType>("FInferType", ElemwiseType<2, 1>)
.set_num_inputs(2)
.set_num_outputs(1)
.set_attr<FGradient>(
"FGradient", [](const NodePtr& n,
const std::vector<NodeEntry>& ograds) {
const ReduceParam& param = nnvm::get<ReduceParam>(n->attrs.parsed);
std::ostringstream axis;
axis << param.axis;
return std::vector<NodeEntry>{
MakeNode("sum", n->attrs.name + "_grad",
{ograds[0]},
{{"axis", axis.str()},
{"keepdims", std::to_string(param.keepdims)},
{"exclude", std::to_string(param.exclude)}})
};
})
.set_support_level(1); .set_support_level(1);
// split // split
...@@ -383,7 +444,7 @@ NNVM_REGISTER_OP(reshape) ...@@ -383,7 +444,7 @@ NNVM_REGISTER_OP(reshape)
.describe(R"code(Reshapes the input array. .describe(R"code(Reshapes the input array.
Given an array and a shape, this function returns a copy of the array in the new shape. Given an array and a shape, this function returns a copy of the array in the new shape.
The shape is a tuple of integers such as (2,3,4).The size of the new shape should be same as the size of the input array. The shape is a tuple of integers such as (2,3,4). The size of the new shape should be same as the size of the input array.
Example:: Example::
...@@ -443,6 +504,46 @@ The significance of each is explained below: ...@@ -443,6 +504,46 @@ The significance of each is explained below:
.set_attr<FInferType>("FInferType", ElemwiseType<1, 1>) .set_attr<FInferType>("FInferType", ElemwiseType<1, 1>)
.set_num_inputs(1) .set_num_inputs(1)
.set_num_outputs(1) .set_num_outputs(1)
.set_attr<FGradient>(
"FGradient", [](const NodePtr& n,
const std::vector<NodeEntry>& ograds) {
return std::vector<NodeEntry>{
MakeNode("reshape_like", n->attrs.name + "_grad",
{ograds[0], n->inputs[0]})
};
})
.set_support_level(3);
NNVM_REGISTER_OP(reshape_like)
.describe(R"code(Reshapes the input array by the size of another array.
For an input array with shape ``(d1, d2, ..., dk)``, `reshape_like` operation reshapes
the input array into an output array with the same shape as the second input array.
.. note::
Sizes for both array should be compatible.
)code" NNVM_ADD_FILELINE)
.add_argument("data", "Tensor", "Input data.")
.add_argument("shape_like", "Tensor", "Input data.")
.set_num_inputs(1)
.set_num_outputs(1)
.set_attr<FInferShape>(
"FInferShape", [](const NodeAttrs& attrs,
std::vector<TShape>* in_attrs,
std::vector<TShape>* out_attrs) {
CHECK_EQ(in_attrs->at(0).Size(), in_attrs->at(1).Size())
<< "Reshape inputs size should be compatible";
NNVM_ASSIGN_OUTPUT_SHAPE(attrs, *out_attrs, 0, in_attrs->at(1));
return true;
})
.set_attr<FInferType>("FInferType", ElemwiseType<1, 1>)
.set_attr<FGradient>(
"FGradient", [](const NodePtr& n,
const std::vector<NodeEntry>& ograds) {
return MakeGradNode("reshape_like", n,
{ograds[0], n->inputs[0]});
})
.set_support_level(3); .set_support_level(3);
// squeeze // squeeze
...@@ -502,12 +603,14 @@ NNVM_REGISTER_OP(squeeze) ...@@ -502,12 +603,14 @@ NNVM_REGISTER_OP(squeeze)
Examples:: Examples::
x = [[[0], [1], [2]]] x = [[[0], [1], [2]]]
x.shape = (1, 3, 1)
squeeze(x) = [0, 1, 2] squeeze(x) = [0, 1, 2]
squeeze(x, 0) = [[0], [1], [2]] squeeze(x, 0) = [[0], [1], [2]]
squeeze(x, (0, 2)) = [0, 1, 2] squeeze(x, (0, 2)) = [0, 1, 2]
)code" NNVM_ADD_FILELINE) )code" NNVM_ADD_FILELINE)
.add_argument("data", "Tensor", "Source input") .add_argument("data", "Tensor", "Source input")
.add_arguments(SqueezeParam::__FIELDS__()) .add_arguments(SqueezeParam::__FIELDS__())
...@@ -517,6 +620,13 @@ Examples:: ...@@ -517,6 +620,13 @@ Examples::
.set_attr<nnvm::FInferType>("FInferType", ElemwiseType<1, 1>) .set_attr<nnvm::FInferType>("FInferType", ElemwiseType<1, 1>)
.set_num_inputs(1) .set_num_inputs(1)
.set_num_outputs(1) .set_num_outputs(1)
.set_attr<FGradient>(
"FGradient", [](const NodePtr& n,
const std::vector<NodeEntry>& ograds) {
return std::vector<NodeEntry>{
MakeNode("reshape_like", n->attrs.name + "_grad", {n->inputs[0]})
};
})
.set_support_level(1); .set_support_level(1);
// tranpose // tranpose
...@@ -584,7 +694,16 @@ Examples:: ...@@ -584,7 +694,16 @@ Examples::
.set_attr<nnvm::FInferType>("FInferType", ElemwiseType<1, 1>) .set_attr<nnvm::FInferType>("FInferType", ElemwiseType<1, 1>)
.set_num_inputs(1) .set_num_inputs(1)
.set_num_outputs(1) .set_num_outputs(1)
.set_support_level(4); .set_support_level(4)
.set_attr<FGradient>(
"FGradient", [](const NodePtr& n,
const std::vector<NodeEntry>& ograds) {
const TransposeParam& param = nnvm::get<TransposeParam>(n->attrs.parsed);
std::ostringstream oss; oss << param.axes;
return std::vector<NodeEntry>{
MakeNode("transpose", n->attrs.name + "_t", {ograds[0]}, {{"axes", oss.str()}})
};
});
} // namespace top } // namespace top
} // namespace nnvm } // namespace nnvm
...@@ -13,7 +13,7 @@ TEST(Tuple, Basic) { ...@@ -13,7 +13,7 @@ TEST(Tuple, Basic) {
Tuple<int> z{1, 2, 3, 5, 6}; Tuple<int> z{1, 2, 3, 5, 6};
std::ostringstream os; std::ostringstream os;
os << z; os << z;
CHECK_EQ(os.str(), "(1,2,3,5,6)"); CHECK_EQ(os.str(), "[1,2,3,5,6]");
std::istringstream is(os.str()); std::istringstream is(os.str());
is >> y; is >> y;
CHECK_EQ(x, y); CHECK_EQ(x, y);
......
import json import json
import nnvm.symbol as sym import nnvm.symbol as sym
import nnvm.graph as graph import nnvm.graph as graph
import nnvm.compiler.graph_util as graph_util
def test_json_pass(): def test_json_pass():
x = sym.Variable('x') x = sym.Variable('x')
...@@ -117,13 +118,13 @@ def test_gradient(): ...@@ -117,13 +118,13 @@ def test_gradient():
y = sym.Variable("y") y = sym.Variable("y")
z1 = sym.elemwise_add(x, sym.sqrt(y)) z1 = sym.elemwise_add(x, sym.sqrt(y))
z2 = sym.log(x) z2 = sym.log(x)
gradient = graph.gradients([z1, z2], [x, y]) gradient = graph_util.gradients([z1, z2], [x, y])
assert len(gradient) == 2 assert len(gradient) == 2
g1 = sym.Variable("g1") g1 = sym.Variable("g1")
g2 = sym.Variable("g2") g2 = sym.Variable("g2")
grad_ys = [g1, g2] grad_ys = [g1, g2]
gradient = graph.gradients(sym.Group([z1, z2]), gradient = graph_util.gradients(sym.Group([z1, z2]),
sym.Group([x, y]), grad_ys=grad_ys) sym.Group([x, y]), grad_ys=grad_ys)
g_graph = graph.create(sym.Group(gradient)).ir() g_graph = graph.create(sym.Group(gradient)).ir()
assert len(gradient) == 2 assert len(gradient) == 2
......
import nnvm.symbol as sym
from nnvm.compiler import graph_util
def test_cnn_gradients():
# input data
h = 128
w = 128
data_shape = (1000, 3, h, w)
data = sym.Variable('data', shape=data_shape, dtype=0)
# conv2d
num_channels = 64
kernel_size = 32
conv_w_shape = (num_channels, 3, kernel_size, kernel_size)
conv_b_shape = (num_channels,)
conv_w = sym.Variable('conv_w', shape=conv_w_shape)
conv_b = sym.Variable('conv_b', shape=conv_b_shape)
conv1 = sym.conv2d(data=data, weight=conv_w, bias=conv_b,
channels=num_channels, kernel_size=(kernel_size, kernel_size),
name='conv1')
# relu1
relu1 = sym.relu(data=conv1, name='relu1')
# max pooling
max_pooling1 = sym.max_pool2d(data=relu1, pool_size=(2, 2), name='max_pooling1')
# flatten
flatten1 = sym.flatten(data=max_pooling1)
# shape after flatten
flatten_out_shape = (h - kernel_size) * (w - kernel_size) * num_channels
# dense1
dense1_hidden_units = 100
dense1 = sym.dense(data=flatten1, name='dense1', units=dense1_hidden_units)
# relu2
relu2 = sym.relu(data=dense1, name='relu2')
# dense2
dense2_hidden_units = 10
dense2 = sym.dense(data=relu2, name='dense2', units=dense2_hidden_units)
# softmax
mlp = sym.softmax(data=dense2, name='softmax')
# fake non-sparse label
label = sym.full_like(mlp, fill_value=1)
# cross entropy loss
ce_loss = sym.sum(
sym.elemwise_mul(sym.log_softmax(dense2), label),
axis=1,
keepdims=True,
name="ce_loss")
# input variables:
# print grad_g.symbol.list_input_names()
# >> ['data', 'conv_w', 'conv_b',
# 'dense1_weight', 'dense1_bias',
# 'dense2_weight', 'dense2_bias']
# output gradient variables:
# print grad_g.symbol.list_output_names()
# >> ['conv1_grad_data', 'conv1_grad_weight', 'conv1_grad_bias',
# 'dense1_grad_weight', 'dense1_grad_bias',
# 'dense2_grad_weight', 'dense2_grad_bias']
grad_g = graph_util.get_gradient_graph(ce_loss, ce_loss.list_input_variables())
# infer shape
in_shapes, out_shapes = graph_util.infer_shape(grad_g)
# forward graph shape
assert in_shapes == [list(data_shape), list(conv_w_shape), list(conv_b_shape),
[dense1_hidden_units, flatten_out_shape], [dense1_hidden_units],
[dense2_hidden_units, dense1_hidden_units], [dense2_hidden_units]]
# input grads shape should be equal with input shape
assert in_shapes == out_shapes
# output grads w.r.t input variables
grads = graph_util.gradients(ce_loss, ce_loss.list_input_variables())
# gradients number should be equal with grad_input number
assert len(grads) == len(ce_loss.list_input_variables())
# infer type
in_dtypes, out_dtypes = graph_util.infer_dtype(grad_g)
assert out_dtypes == ['float32', 'float32', 'float32', 'float32', 'float32', 'float32', 'float32']
def test_multi_loss_graph_gradients():
# input data
shape1 = (1000, 100)
data1 = sym.Variable('data1', shape=(1000, 100), dtype=0)
# fake non-sparse label
label = sym.full(fill_value=3)
# square loss
sub1 = sym.elemwise_sub(data1, label, name="sub1")
square_loss = sym.sum(data=sub1**2, axis=1, name="square_loss")
# fake loss1
shape2 = (1000, )
data2 = sym.Variable('data2', shape=shape2, dtype=0)
loss1 = sym.sqrt(data2, name="loss1")
# fake loss2
loss2 = sym.relu(data1, name='loss2')
# block loss1
total_loss = sym.elemwise_sum(
sym.block_grad(loss1),
square_loss,
num_args=2,
name="total_loss")
# grad_g.symbol.list_output_names()
# >> ['loss1_grad_0_output', 'grad_sum_output']
grad_g = graph_util.get_gradient_graph([total_loss, loss2], total_loss.list_input_variables())
# infer shape
in_shapes, out_shapes = graph_util.infer_shape(grad_g)
assert out_shapes == [list(shape2), list(shape1)]
# grad_data1 is elemwise_sum of grad_loss2, grad_square_loss
grad_data1 = grad_g.symbol[1]
assert grad_data1.list_attr()['num_args'] == '2'
# block grad should return zero grad
grad_data2 = grad_g.symbol[0]
assert 'zeros_like' in grad_g.ir()
# test reverse infer shape for label
assert grad_g.apply('InferShape').json_attr('shape_num_unknown_nodes') == 0
# infer type
in_dtypes, out_dtypes = graph_util.infer_dtype(grad_g)
assert out_dtypes == ['float32', 'float32']
# test reverse infer type for label
assert grad_g.apply('InferType').json_attr('dtype_num_unknown_nodes') == 0
if __name__ == "__main__":
test_cnn_gradients()
test_multi_loss_graph_gradients()
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment