Commit f34e1744 by yuruofeifei Committed by Tianqi Chen

[GRADIENT] Register more gradient operators (#300)

* Add conv2d max_pool backward op

* Added tests

* Fix testing

* Address comments

* Change dot to matmul

* Address comments

* Break down indicator function

* Make greater, less numpy compatible
parent 4bd92a4a
......@@ -28,6 +28,7 @@ This level enables fully connected multi-layer perceptron.
:nosignatures:
nnvm.symbol.dense
nnvm.symbol.matmul
nnvm.symbol.relu
nnvm.symbol.tanh
nnvm.symbol.sigmoid
......@@ -38,6 +39,7 @@ This level enables fully connected multi-layer perceptron.
nnvm.symbol.elemwise_sub
nnvm.symbol.elemwise_mul
nnvm.symbol.elemwise_div
nnvm.symbol.elemwise_sum
nnvm.symbol.full
nnvm.symbol.full_like
nnvm.symbol.ones
......@@ -54,6 +56,8 @@ This level enables fully connected multi-layer perceptron.
nnvm.symbol.softmax
nnvm.symbol.log_softmax
nnvm.symbol.pad
nnvm.symbol.block_grad
nnvm.symbol.indicator
**Level 2: Convolutions**
......@@ -77,6 +81,8 @@ This level enables typical convnet models.
:nosignatures:
nnvm.symbol.reshape
nnvm.symbol.reshape_like
nnvm.symbol.expand_like
nnvm.symbol.copy
nnvm.symbol.negative
nnvm.symbol.leaky_relu
......@@ -107,6 +113,7 @@ This level enables typical convnet models.
Detailed Definitions
--------------------
.. autofunction:: nnvm.symbol.dense
.. autofunction:: nnvm.symbol.matmul
.. autofunction:: nnvm.symbol.relu
.. autofunction:: nnvm.symbol.tanh
.. autofunction:: nnvm.symbol.sigmoid
......@@ -117,6 +124,7 @@ Detailed Definitions
.. autofunction:: nnvm.symbol.elemwise_sub
.. autofunction:: nnvm.symbol.elemwise_mul
.. autofunction:: nnvm.symbol.elemwise_div
.. autofunction:: nnvm.symbol.elemwise_sum
.. autofunction:: nnvm.symbol.full
.. autofunction:: nnvm.symbol.full_like
.. autofunction:: nnvm.symbol.ones
......@@ -133,6 +141,8 @@ Detailed Definitions
.. autofunction:: nnvm.symbol.softmax
.. autofunction:: nnvm.symbol.log_softmax
.. autofunction:: nnvm.symbol.pad
.. autofunction:: nnvm.symbol.block_grad
.. autofunction:: nnvm.symbol.indicator
.. autofunction:: nnvm.symbol.conv2d
.. autofunction:: nnvm.symbol.conv2d_transpose
......@@ -142,6 +152,8 @@ Detailed Definitions
.. autofunction:: nnvm.symbol.global_avg_pool2d
.. autofunction:: nnvm.symbol.reshape
.. autofunction:: nnvm.symbol.reshape_like
.. autofunction:: nnvm.symbol.expand_like
.. autofunction:: nnvm.symbol.copy
.. autofunction:: nnvm.symbol.negative
.. autofunction:: nnvm.symbol.leaky_relu
......
......@@ -62,6 +62,13 @@ enum TypeFlag {
kUint64 = 10,
};
enum IndicatorRuleFlag {
kGT0 = 0,
kLT0 = 1,
kMax = 2,
kMin = 3,
};
#define DMLC_DECLARE_DTYPE_FIELD(name) \
DMLC_DECLARE_FIELD(name) \
.add_enum("float16", kFloat16) \
......@@ -84,6 +91,28 @@ struct CastParam : public dmlc::Parameter<CastParam> {
}
};
struct IndicatorParam : public dmlc::Parameter<IndicatorParam> {
TShape axis;
bool exclude;
DMLC_DECLARE_PARAMETER(IndicatorParam) {
DMLC_DECLARE_FIELD(axis).set_default(TShape())
.describe(R"code(The axis or axes along which to perform the indicator rule.
The default, `axis=()`, will compute over all elements into a
scalar array with shape `(1,)`.
If `axis` is int, rule is applied on a particular axis.
If `axis` is a tuple of ints, rule is applied on all the axes
specified in the tuple.
If `exclude` is true, rule will be applied on the axes that are
NOT in axis instead.)code");
DMLC_DECLARE_FIELD(exclude).set_default(false)
.describe("Whether to apply rule on axis that are NOT in axis instead.");
}
};
struct ReshapeParam : public dmlc::Parameter<ReshapeParam> {
Tuple<int64_t> shape;
......@@ -97,8 +126,7 @@ struct SqueezeParam : public dmlc::Parameter<SqueezeParam> {
DMLC_DECLARE_PARAMETER(SqueezeParam) {
DMLC_DECLARE_FIELD(axis).set_default(TShape())
.describe("The axis to squeeze in the input tensor."
" If set to None, all size=1 axes will be squeezed");
.describe("The axis to squeeze in the input tensor.");
}
};
......@@ -110,6 +138,15 @@ struct ScalarParam : public dmlc::Parameter<ScalarParam> {
}
};
struct FillValueParam : public dmlc::Parameter<FillValueParam> {
double fill_value;
DMLC_DECLARE_PARAMETER(FillValueParam) {
DMLC_DECLARE_FIELD(fill_value)
.describe("Scalar value to be filled");
}
};
struct TransposeParam : public dmlc::Parameter<TransposeParam> {
TShape axes;
......@@ -158,16 +195,49 @@ struct ReduceParam : public dmlc::Parameter<ReduceParam> {
}
};
struct InitOpWithScalarParam : public dmlc::Parameter<InitOpWithScalarParam> {
TShape shape;
int dtype;
double fill_value;
DMLC_DECLARE_PARAMETER(InitOpWithScalarParam) {
DMLC_DECLARE_FIELD(shape).set_default(TShape());
DMLC_DECLARE_DTYPE_FIELD(dtype).set_default(kFloat32)
.describe("Target data type.");
DMLC_DECLARE_FIELD(fill_value).describe("Scalar value to fill");
}
};
struct InitOpParam : public dmlc::Parameter<InitOpParam> {
TShape shape;
int dtype;
double value;
DMLC_DECLARE_PARAMETER(InitOpParam) {
DMLC_DECLARE_FIELD(shape).set_default(TShape());
DMLC_DECLARE_DTYPE_FIELD(dtype).set_default(kFloat32)
.describe("Target data type.");
DMLC_DECLARE_FIELD(value).describe("Value to fill");
}
};
struct ElementWiseReduceParam : public dmlc::Parameter<ElementWiseReduceParam> {
int num_args;
DMLC_DECLARE_PARAMETER(ElementWiseReduceParam) {
DMLC_DECLARE_FIELD(num_args).set_lower_bound(1)
.describe("Number of inputs to be reduced.");
}
};
struct MatMulParam : public dmlc::Parameter<MatMulParam> {
bool transpose_a;
bool transpose_b;
DMLC_DECLARE_PARAMETER(MatMulParam) {
DMLC_DECLARE_FIELD(transpose_a)
.describe("If true then transpose the first input before dot.")
.set_default(false);
DMLC_DECLARE_FIELD(transpose_b)
.describe("If true then transpose the second input before dot.")
.set_default(false);
}
};
......
......@@ -188,7 +188,7 @@ def build(graph, target=None, shape=None, dtype="float32", params=None, target_h
The input types to the graph
params : dict of str to NDArray
Input parameetrs to the graph that do not change
Input parameters to the graph that do not change
during inference time. Used for pre-compute
folding optimization.
......
......@@ -5,6 +5,9 @@ from __future__ import absolute_import as _abs
import tvm
from . import graph_attr
from ..graph import create
from ..symbol import Group, ones_like
def infer_shape(graph, **shape):
"""Infer the shape given the shape of inputs.
......@@ -89,3 +92,57 @@ def check_graph_equal(grapha, graphb, compare_variable_attrs=False):
err = _deep_compare(grapha, graphb, compare_variable_attrs)
if err:
raise ValueError("Graph compare error: " + err)
def get_gradient_graph(ys, xs, grad_ys=None):
"""Create gradient graph of ys with respect to xs.
Parameters
----------
ys : Symbol or list of Symbol
Symbols from which the gradient is calculated.
xs : Symbol or list of Symbol
Symbols the gradient respect to.
For group symbol, gradients for all outputs will be calculated.
grad_ys : Symbol or list of Symbol
Head gradients for ys.
Returns
-------
ret : Graph
Generated gradient graph.
"""
if isinstance(ys, list):
ys = Group(ys)
g = create(ys)
g._set_symbol_list_attr('grad_ys', ys)
g._set_symbol_list_attr('grad_xs', xs)
ny = len(ys.list_output_names())
if grad_ys is None:
grad_ys = [ones_like(ys[i]) for i in range(ny)]
g._set_symbol_list_attr('grad_ys_out_grad', grad_ys)
return g.apply('Gradient')
def gradients(ys, xs, grad_ys=None):
"""Create gradient symbol of ys respect to xs.
Parameters
----------
ys : Symbol or list of Symbol
Symbols from which the gradient is calculated.
xs : Symbol or list of Symbol
Symbols the gradient respect to.
For group symbol, gradients for all outputs will be calculated.
grad_ys : Symbol or list of Symbol
Head gradients for ys.
Returns
-------
ret : list of Symbol
Generated gradient symbol. For each xs,
all gradients from ys are merged into a single symbol.
"""
grad_g = get_gradient_graph(ys, xs, grad_ys)
nx = len(Group(xs).list_output_names()) \
if isinstance(xs, list) else len(xs.list_output_names())
ret = [grad_g.symbol[i] for i in range(nx)]
return ret
......@@ -13,7 +13,6 @@ from ._base import c_array, c_str, nn_uint, py_str, string_types
from ._base import GraphHandle, SymbolHandle
from ._base import check_call
from .symbol import Variable, Symbol, Group as _Group
from .symbol import ones_like
class GraphIndex(object):
"""Index for quickly accessing graph attributes.
......@@ -271,38 +270,3 @@ def create(symbol):
check_call(_LIB.NNGraphCreate(
symbol.handle, ctypes.byref(ghandle)))
return Graph(ghandle)
def gradients(ys, xs, grad_ys=None):
"""Create gradient symbol of ys respect to xs.
Parameters
----------
ys : Symbol or list of Symbol
Symbols from which the gradient is calculated.
xs : Symbol or list of Symbol
Symbols the gradient respect to.
For group symbol, gradients for all outputs will be calculated.
grad_ys : Symbol or list of Symbol
Head gradients for ys.
Returns
-------
ret : list of Symbol
Generated gradient symbol. For each xs,
all gradients from ys are merged into a single symbol.
"""
if isinstance(ys, list):
ys = _Group(ys)
g = create(ys)
g._set_symbol_list_attr('grad_ys', ys)
g._set_symbol_list_attr('grad_xs', xs)
ny = len(ys.list_output_names())
if grad_ys is None:
grad_ys = [ones_like(ys[i]) for i in range(ny)]
g._set_symbol_list_attr('grad_ys_out_grad', grad_ys)
sym = g.apply('Gradient').symbol
nx = len(_Group(xs).list_output_names()) \
if isinstance(xs, list) else len(xs.list_output_names())
ret = [sym[i] for i in range(nx)]
return ret
......@@ -14,18 +14,23 @@ namespace pass {
namespace {
// default aggregate gradient function
// require operator __zero__ and __ewise_sum__ to be presented.
// require operator zeros and elemwise_sum to be presented.
NodeEntry DefaultAggregateGradient(std::vector<NodeEntry>&& v) {
if (v.size() == 1) {
return std::move(v[0]);
} else if (v.size() == 0) {
NodePtr zero_node = Node::Create();
zero_node->attrs.op = Op::Get("_zeros");
zero_node->attrs.op = Op::Get("zeros");
zero_node->attrs.name = "zero_grad";
zero_node->attrs.op->attr_parser(&(zero_node->attrs));
return NodeEntry{zero_node, 0, 0};
} else {
NodePtr sum_node = Node::Create();
sum_node->attrs.op = Op::Get("elemwise_sum");
sum_node->inputs = std::move(v);
sum_node->attrs.name = "grad_sum";
sum_node->attrs.dict["num_args"] = std::to_string(sum_node->inputs.size());
sum_node->attrs.op->attr_parser(&(sum_node->attrs));
return NodeEntry{sum_node, 0, 0};
}
}
......
......@@ -84,6 +84,22 @@ inline bool ElemwiseType(const NodeAttrs& attrs,
attrs, in_attrs, out_attrs, -1);
}
inline bool ElementWiseReduceShape(const NodeAttrs& attrs,
std::vector<TShape> *in_attrs,
std::vector<TShape> *out_attrs) {
CHECK_EQ(out_attrs->size(), 1);
return ElemwiseAttr<TShape, shape_is_none, shape_assign, true, shape_string>(
attrs, in_attrs, out_attrs, TShape());
}
inline bool ElementWiseReduceType(const NodeAttrs& attrs,
std::vector<int> *in_attrs,
std::vector<int> *out_attrs) {
CHECK_EQ(out_attrs->size(), 1);
return ElemwiseAttr<int, type_is_none, type_assign, true, type_string>(
attrs, in_attrs, out_attrs, -1);
}
#define NNVM_REGISTER_ELEMWISE_UNARY_OP(name) \
NNVM_REGISTER_OP(name) \
.set_num_inputs(1) \
......@@ -100,11 +116,13 @@ inline bool ElemwiseType(const NodeAttrs& attrs,
#define NNVM_REGISTER_INIT_OP(name) \
NNVM_REGISTER_OP(name) \
.set_num_inputs(0) \
.set_num_outputs(1) \
.set_attr_parser(ParamParser<InitOpParam>) \
.add_arguments(InitOpParam::__FIELDS__()) \
.set_attr<FInferShape>("FInferShape", ZeroShape) \
.set_attr<FInferType>("FInferType", ZeroType)
.set_num_outputs(1)
#define NNVM_REGISTER_INIT_LIKE_OP(name) \
NNVM_REGISTER_ELEMWISE_UNARY_OP(name) \
.set_attr<FGradient>("FGradient", MakeZeroGradNodes) \
.add_argument("data", "Symbol", "The input")
#define NNVM_REGISTER_ELEMWISE_BINARY_OP(name) \
......@@ -120,6 +138,41 @@ inline bool ElemwiseType(const NodeAttrs& attrs,
.add_argument("lhs", "Tensor", "first input") \
.add_argument("rhs", "Tensor", "second input")
#define NNVM_REGISTER_ELEMWISE_REDUCE_OP(name) \
NNVM_REGISTER_OP(name) \
.set_num_inputs([](const NodeAttrs& attrs) { \
return static_cast<uint32_t>( \
dmlc::get<ElementWiseReduceParam>(attrs.parsed).num_args); \
}) \
.set_attr_parser(ParamParser<ElementWiseReduceParam>) \
.set_attr<FGetAttrDict>("FGetAttrDict", \
ParamGetAttrDict<ElementWiseReduceParam>) \
.set_attr<nnvm::FInferShape>("FInferShape", \
ElementWiseReduceShape) \
.set_attr<nnvm::FInferType>("FInferType", ElementWiseReduceType) \
.add_argument("args", "Symbol[]", "Positional input arguments")
#define NNVM_REGISTER_INDICATOR_OP(name) \
NNVM_REGISTER_OP(name) \
.set_num_outputs(1) \
.set_attr<FInferType>( \
"FInferType", [](const NodeAttrs& attrs, \
std::vector<int>* in_attrs, \
std::vector<int>* out_attrs) { \
CHECK_EQ(out_attrs->size(), 1U); \
NNVM_ASSIGN_OUTPUT_TYPE(attrs, *out_attrs, 0, \
static_cast<int>(kFloat32)); \
return true; \
}) \
.set_attr<FGradient>( \
"FGradient", [](const NodePtr& n, \
const std::vector<NodeEntry>& ograds) { \
return MakeZeroGradNodes(n, ograds); \
})
} // namespace top
} // namespace nnvm
#endif // NNVM_TOP_ELEMWISE_OP_COMMON_H_
......@@ -120,7 +120,42 @@ a bias vector is created and added to the outputs.
.set_attr<FInferType>("FInferType", ElemwiseType<-1, 1>)
.set_num_outputs(1)
.set_num_inputs(UseBiasNumInputs<Conv2DParam>)
.set_support_level(2);
.set_support_level(2)
.set_attr<FGradient>(
"FGradient", [](const NodePtr& n,
const std::vector<NodeEntry>& ograds) {
return MakeGradNode("_conv2d_grad", n,
{ograds[0], n->inputs[Conv2DParam::kData],
n->inputs[Conv2DParam::kWeight]},
n->attrs.dict);
});
NNVM_REGISTER_OP(_conv2d_grad)
.describe(R"code(2D convolution grad.
)code" NNVM_ADD_FILELINE)
.add_argument("ograd", "4D Tensor", "Output grad.")
.add_argument("data", "4D Tensor", "Input data of conv2d.")
.add_argument("weight", "4D Tensor", "Input weight.")
.set_num_inputs(3)
.set_num_outputs(UseBiasNumInputs<Conv2DParam>)
.set_attr<FListOutputNames>("FListOutputNames", UseBiasListInputNames<Conv2DParam>)
.set_attr_parser(ParamParser<Conv2DParam>)
.set_attr<FGetAttrDict>("FGetAttrDict", ParamGetAttrDict<Conv2DParam>)
.set_attr<FInferShape>(
"FInferShape", [](const nnvm::NodeAttrs& attrs,
std::vector<TShape>* in_attrs,
std::vector<TShape>* out_attrs) {
const Conv2DParam& param = nnvm::get<Conv2DParam>(attrs.parsed);
NNVM_ASSIGN_OUTPUT_SHAPE(attrs, *out_attrs, Conv2DParam::kData, in_attrs->at(1));
NNVM_ASSIGN_OUTPUT_SHAPE(attrs, *out_attrs, Conv2DParam::kWeight, in_attrs->at(2));
if (param.use_bias) {
NNVM_ASSIGN_OUTPUT_SHAPE(attrs, *out_attrs, Conv2DParam::kBias, TShape({param.channels}));
}
return true;
})
.set_attr<FInferType>("FInferType", ElemwiseType<3, -1>)
.set_attr<TIsBackward>("TIsBackward", true);
DMLC_REGISTER_PARAMETER(Conv2DTransposeParam);
......
......@@ -54,7 +54,7 @@ NNVM_REGISTER_OP(dense)
- **data**: `(x1, x2, ..., xn, input_dim)`
- **weight**: `(units, input_dim)`
- **bias**: `(units,)`
- **out**: `(x1, x2, ..., xn, num_hidden)`
- **out**: `(x1, x2, ..., xn, units)`
The learnable parameters include both ``weight`` and ``bias``.
......@@ -72,6 +72,34 @@ If ``use_bias`` is set to be false, then the ``bias`` term is ignored.
.set_attr<FListInputNames>("FListInputNames", UseBiasListInputNames<DenseParam>)
.set_attr<FInferShape>("FInferShape", DenseInferShape)
.set_attr<FInferType>("FInferType", ElemwiseType<-1, 1>)
.set_attr<FGradient>(
"FGradient", [](const NodePtr& n,
const std::vector<NodeEntry>& ograds) {
const DenseParam& param = nnvm::get<DenseParam>(n->attrs.parsed);
NodeEntry data_grad = MakeNode("matmul",
n->attrs.name + "_data_grad",
{ograds[0], n->inputs[DenseParam::kWeight]});
NodeEntry w_grad_sub = MakeNode("matmul",
n->attrs.name + "_weight_grad_sub0",
{ograds[0], n->inputs[DenseParam::kData]},
{{"transpose_a", "true"}});
TShape w_reduce_axis = {0, -1};
std::ostringstream w_oss; w_oss << w_reduce_axis;
NodeEntry w_grad = MakeNode("sum", n->attrs.name + "_weight_grad",
{w_grad_sub},
{{"axis", w_oss.str()}, {"exclude", "true"}});
std::vector<NodeEntry> grads = {data_grad, w_grad};
if (param.use_bias) {
TShape axis = {-1};
std::ostringstream b_oss; b_oss << axis;
grads.push_back(MakeNode("sum", n->attrs.name + "_bias_grad",
{ograds[0]},
{{"axis", b_oss.str()}, {"exclude", "true"}}));
}
return grads;
})
.set_support_level(1);
// relu
......@@ -82,6 +110,18 @@ NNVM_REGISTER_ELEMWISE_UNARY_OP(relu)
max(input, 0)
)code" NNVM_ADD_FILELINE)
.set_attr<FGradient>(
"FGradient", [](const NodePtr& n,
const std::vector<NodeEntry>& ograds) {
// y = relu(x)
// grad = indicator(x > 0)
NodeEntry zero = MakeNode("zeros_like", n->attrs.name + "_grad_zero",
{n->inputs[0]});
return std::vector<NodeEntry>{
MakeNode("greater", n->attrs.name + "_grad",
{n->inputs[0], zero}, {{"exclude", "true"}})
};
})
.set_support_level(1);
// dropout
......@@ -217,7 +257,37 @@ NNVM_REGISTER_OP(softmax)
.set_num_outputs(1)
.set_attr<FInferShape>("FInferShape", ElemwiseShape<1, 1>)
.set_attr<FInferType>("FInferType", ElemwiseType<1, 1>)
.set_support_level(1);
.set_support_level(1)
.set_attr<FGradient>(
"FGradient", [](const NodePtr& n,
const std::vector<NodeEntry>& ograds) {
// grad_x = grad_y dot jacobian of softmax
//
// jacobian of softmax
// [-y1y1 + y1, -y1y2, ... ]
// [ ... , -y2y2 + y2, ... ]
// [ ... ... ]
// [ ... ,-ynyn + yn]
//
// grad_x =
// [-y1*(ograd1*y1 - 1 + ograd2*y2 + ..., -y2*(ograd1*y1 - 1 + ograd2*y2, ..., ...]]
// grad_x = ograd elemwise_mul output
// grad_x = sum(grad_x, keepdim, axis)
// grad_x = grad_x broadcast_mul output
// grad_x = neg grad_x
// grad_x = grad_x + output
const SoftmaxParam& param = nnvm::get<SoftmaxParam>(n->attrs.parsed);
NodeEntry output = NodeEntry{n, 0, 0};
NodeEntry sub0 = MakeNode("elemwise_mul", n->attrs.name + "_grad_sub0", {ograds[0], output});
NodeEntry sub1 = MakeNode("sum", n->attrs.name + "_grad_sub1", {sub0},
{{"axis", std::to_string(param.axis)}, {"keepdims", "true"}});
NodeEntry sub2 = MakeNode("broadcast_mul", n->attrs.name + "_grad_sub2", {sub1, output});
NodeEntry sub3 = MakeNode("negative", n->attrs.name + "_grad_sub3", {sub2});
return std::vector<NodeEntry> {
MakeNode("elemwise_add", n->attrs.name + "_grad", {sub3, output})
};
});
// log_softmax
NNVM_REGISTER_OP(log_softmax)
......@@ -236,6 +306,38 @@ NNVM_REGISTER_OP(log_softmax)
.set_num_outputs(1)
.set_attr<FInferShape>("FInferShape", ElemwiseShape<1, 1>)
.set_attr<FInferType>("FInferType", ElemwiseType<1, 1>)
.set_attr<FGradient>(
"FGradient", [](const NodePtr& n,
const std::vector<NodeEntry>& ograds) {
// grad_x = grad_y dot jacobian of softmax
//
// jacobian of softmax
// [-y1 + 1, -y2, ... ]
// [ ... , -y2 + 1, ... ]
// [ ... ... ]
// [ ... ,-yn + 1]
//
// grad_x =
// [-(ograd1*y1 - 1 + ograd2*y2 + ..., -(ograd1*y1 - 1 + ograd2*y2, ..., ...]]
// grad_x = ograd elemwise_mul output
// grad_x = sum(grad_x, keepdim, axis)
// grad_x = neg grad_x
// grad_x = grad_x + ones_like(grad_x)
// grad_x = expand_dims(grad_x, axis)
const SoftmaxParam& param = nnvm::get<SoftmaxParam>(n->attrs.parsed);
NodeEntry output = NodeEntry{n, 0, 0};
NodeEntry sub0 = MakeNode("elemwise_mul", n->attrs.name + "_grad_sub0", {ograds[0], output});
NodeEntry sub1 = MakeNode("sum", n->attrs.name + "_grad_sub1", {sub0},
{{"axis", std::to_string(param.axis)}, {"keepdims", "true"}});
NodeEntry sub2 = MakeNode("negative", n->attrs.name + "_grad_sub2", {sub1});
NodeEntry sub3 = MakeNode("ones_like", n->attrs.name + "_grad_sub3", {sub2});
NodeEntry sub4 = MakeNode("elemwise_add", n->attrs.name + "_grad_sub4", {sub2, sub3});
return std::vector<NodeEntry> {
MakeNode("expand_like", n->attrs.name + "_grad", {sub4, output},
{{"axis", std::to_string(param.axis)}})
};
})
.set_support_level(1);
// leaky_rlu
......@@ -255,6 +357,25 @@ NNVM_REGISTER_OP(leaky_relu)
.set_num_outputs(1)
.set_attr<FInferShape>("FInferShape", ElemwiseShape<1, 1>)
.set_attr<FInferType>("FInferType", ElemwiseType<1, 1>)
.set_attr<FGradient>(
"FGradient", [](const NodePtr& n,
const std::vector<NodeEntry>& ograds) {
// y = leak_relu(x)
// grad = indicator(x > 0) + alpha * indicator(x < 0)
const LeakyReLUParam& param = nnvm::get<LeakyReLUParam>(n->attrs.parsed);
NodeEntry zero = MakeNode("zeros_like", n->attrs.name + "_grad_zero",
{n->inputs[0]});
NodeEntry sub0 = MakeNode("greater", n->attrs.name + "_pos_grad",
{n->inputs[0], zero}, {{"exclude", "true"}});
NodeEntry sub1 = MakeNode("less", n->attrs.name + "_neg_grad",
{n->inputs[0], zero}, {{"exclude", "true"}});
NodeEntry sub2 = MakeNode("__mul_scalar__", n->attrs.name + "_neg_mul_2",
{sub1},
{{"scalar", std::to_string(param.alpha)}});
return std::vector<NodeEntry>{
MakeNode("elemwise_add", n->attrs.name + "_add_grad", {sub0, sub2})
};
})
.set_support_level(1);
......
......@@ -77,8 +77,30 @@ NNVM_REGISTER_OP(max_pool2d)
.set_num_inputs(1)
.set_attr<FInferShape>("FInferShape", Pool2DInferShape)
.set_attr<FInferType>("FInferType", ElemwiseType<1, 1>)
.set_attr<FGradient>(
"FGradient", [](const NodePtr& n,
const std::vector<NodeEntry>& ograds) {
return MakeGradNode("_max_pool2d_grad", n,
{ograds[0], n->inputs[0], NodeEntry{n, 0, 0}},
n->attrs.dict);
})
.set_support_level(2);
NNVM_REGISTER_OP(_max_pool2d_grad)
.describe(R"code(Max pooling 2D grad.
)code" NNVM_ADD_FILELINE)
.add_argument("ograd", "4D Tensor", "Output grad.")
.add_argument("input", "4D Tensor", "Input data of max_pool2d grad.")
.add_argument("output", "4D Tensor", "Output data of max_pool2d grad.")
.set_num_inputs(3)
.set_num_outputs(1)
.set_attr_parser(ParamParser<Pool2DParam>)
.set_attr<FGetAttrDict>("FGetAttrDict", ParamGetAttrDict<Pool2DParam>)
.set_attr<FInferShape>("FInferShape", AssignOutputAttr<TShape, 1, 0>)
.set_attr<FInferType>("FInferType", ElemwiseType<3, 1>)
.set_attr<TIsBackward>("TIsBackward", true);
NNVM_REGISTER_OP(avg_pool2d)
.describe(R"code(Average pooling operation for one dimensional data.
......
......@@ -144,7 +144,7 @@ inline std::string attr_assign_error_msg(const NodeAttrs& attrs,
}
/*!
* \brief macro assign shape to out if out is unknown otherwise check consistency
* \brief macro assign shape to input if out is unknown otherwise check consistency
* Use macro so we can see the error file more clearly
* \param inputs the shape array to store the result
* \param index the index of in the array
......@@ -240,10 +240,11 @@ inline bool SameShape(const NodeAttrs& attrs,
}
// return shape from node attrs
template<typename PType>
inline bool ZeroShape(const NodeAttrs& attrs,
std::vector<TShape> *ishape,
std::vector<TShape> *oshape) {
const TShape& ts = dmlc::get<InitOpParam>(attrs.parsed).shape;
const TShape& ts = dmlc::get<PType>(attrs.parsed).shape;
if (ts.ndim() != 0) {
SHAPE_ASSIGN(oshape->at(0), ts);
return true;
......@@ -252,15 +253,63 @@ inline bool ZeroShape(const NodeAttrs& attrs,
}
}
// simply assign output shape or type from input
template<typename AttrType, int in_index, int out_index>
inline bool AssignOutputAttr(const NodeAttrs& attrs,
std::vector<AttrType> *in_attrs,
std::vector<AttrType> *out_attrs) {
CHECK_LT(in_index, in_attrs->size());
CHECK_LT(out_index, out_attrs->size());
const TShape &dshape = in_attrs->at(in_index);
NNVM_ASSIGN_OUTPUT_SHAPE(attrs, *out_attrs, out_index, dshape);
return true;
}
// return type from node attrs
template<typename PType>
inline bool ZeroType(const NodeAttrs& attrs,
std::vector<int> *iattr,
std::vector<int> *oattr) {
int dtype = dmlc::get<InitOpParam>(attrs.parsed).dtype;
int dtype = dmlc::get<PType>(attrs.parsed).dtype;
DTYPE_ASSIGN(oattr->at(0), dtype);
return true;
}
// Make zero grad node
inline std::vector<NodeEntry> MakeZeroGradNodes(
const NodePtr& n,
const std::vector<NodeEntry>& ograds) {
std::vector<NodeEntry> ret;
for (uint32_t i = 0; i < n->num_inputs(); ++i) {
std::ostringstream os;
ret.push_back(MakeNode("zeros_like", n->attrs.name + "_zero_grad",
{n->inputs[i]}));
}
return ret;
}
// Helper to make gradient node
inline std::vector<NodeEntry> MakeGradNode(
const char* op_name,
const NodePtr& n,
std::vector<NodeEntry> inputs,
std::unordered_map<std::string, std::string> attr = {}) {
NodePtr p = Node::Create();
p->attrs.op = nnvm::Op::Get(op_name);
p->attrs.name = n->attrs.name + "_grad";
p->inputs = std::move(inputs);
p->attrs.dict = std::move(attr);
if (p->attrs.op->attr_parser) {
p->attrs.op->attr_parser(&p->attrs);
}
std::vector<NodeEntry> ret;
for (uint32_t i = 0; i < p->num_outputs(); ++i) {
ret.emplace_back(NodeEntry{p, i, 0});
}
return ret;
}
} // namespace top
} // namespace nnvm
......
......@@ -241,73 +241,70 @@ NNVM_REGISTER_ELEMWISE_UNARY_OP(copy)
});
DMLC_REGISTER_PARAMETER(InitOpParam);
DMLC_REGISTER_PARAMETER(InitOpWithScalarParam);
DMLC_REGISTER_PARAMETER(FillValueParam);
// full
NNVM_REGISTER_INIT_OP(full)
.describe(R"code(Fill array with scalar value
)code" NNVM_ADD_FILELINE)
.set_attr_parser(ParamParser<InitOpWithScalarParam>)
.set_attr<FGetAttrDict>(
"FGetAttrDict", ParamGetAttrDict<InitOpWithScalarParam>)
.add_arguments(InitOpWithScalarParam::__FIELDS__())
.set_attr<FInferShape>("FInferShape", ZeroShape<InitOpWithScalarParam>)
.set_attr<FInferType>("FInferType", ZeroType<InitOpWithScalarParam>)
.set_support_level(1);
NNVM_REGISTER_INIT_OP(zeros)
.describe(R"code(Fill target with zeros
)code" NNVM_ADD_FILELINE)
.set_attr_parser(ParamParser<InitOpParam>)
.set_attr<FGetAttrDict>(
"FGetAttrDict", ParamGetAttrDict<InitOpParam>)
.add_arguments(InitOpParam::__FIELDS__())
.set_attr<FInferShape>("FInferShape", ZeroShape<InitOpParam>)
.set_attr<FInferType>("FInferType", ZeroType<InitOpParam>)
.set_support_level(1);
NNVM_REGISTER_INIT_OP(ones)
.describe(R"code(Fill target with ones
)code" NNVM_ADD_FILELINE)
.set_attr_parser(ParamParser<InitOpParam>)
.set_attr<FGetAttrDict>(
"FGetAttrDict", ParamGetAttrDict<InitOpParam>)
.add_arguments(InitOpParam::__FIELDS__())
.set_attr<FInferShape>("FInferShape", ZeroShape<InitOpParam>)
.set_attr<FInferType>("FInferType", ZeroType<InitOpParam>)
.set_support_level(1);
// full_like
NNVM_REGISTER_ELEMWISE_UNARY_OP(full_like)
.describe(R"code(Return an scalar value array with the same shape and type
NNVM_REGISTER_INIT_LIKE_OP(full_like)
.describe(R"code(Return an scalar value array with the same shape and type
as the input array
)code" NNVM_ADD_FILELINE)
.set_support_level(1)
.add_arguments(InitOpParam::__FIELDS__())
.set_attr_parser(ParamParser<InitOpParam>)
.set_attr<FGradient>(
"FGradient", [](const NodePtr& n,
const std::vector<NodeEntry>& ograds){
return std::vector<NodeEntry>{
MakeNode("zeros_like", n->attrs.name + "_grad",
{n->inputs[0]})
};
});
.add_arguments(FillValueParam::__FIELDS__())
.set_attr_parser(ParamParser<FillValueParam>)
.set_attr<FGetAttrDict>("FGetAttrDict", ParamGetAttrDict<FillValueParam>)
.set_support_level(1);
NNVM_REGISTER_ELEMWISE_UNARY_OP(zeros_like)
NNVM_REGISTER_INIT_LIKE_OP(zeros_like)
.describe(R"code(Return an array of zeros with the same shape and type
as the input array.
)code")
.add_argument("data", "Symbol", "The input")
.set_attr<FGradient>(
"FGradient", [](const NodePtr& n,
const std::vector<NodeEntry>& ograds){
return std::vector<NodeEntry>{
MakeNode("zeros_like", n->attrs.name + "_grad",
{n->inputs[0]})
};
});
.set_support_level(1);
NNVM_REGISTER_ELEMWISE_UNARY_OP(ones_like)
NNVM_REGISTER_INIT_LIKE_OP(ones_like)
.describe(R"code(Return an array of ones with the same shape and type
as the input array.
)code")
.add_argument("data", "Symbol", "The input")
.set_attr<FGradient>(
"FGradient", [](const NodePtr& n,
const std::vector<NodeEntry>& ograds){
return std::vector<NodeEntry>{
MakeNode("zeros_like", n->attrs.name + "_grad",
{n->inputs[0]})
};
});
.set_support_level(1);
// unary scalar op
DMLC_REGISTER_PARAMETER(ScalarParam);
......@@ -452,64 +449,84 @@ NNVM_REGISTER_ELEMWISE_BINARY_SCALAR(__rpow_scalar__)
};
});
DMLC_REGISTER_PARAMETER(ElementWiseReduceParam);
struct ElementWiseSumParam : public dmlc::Parameter<ElementWiseSumParam> {
int num_args;
DMLC_DECLARE_PARAMETER(ElementWiseSumParam) {
DMLC_DECLARE_FIELD(num_args).set_lower_bound(1)
.describe("Number of inputs to be summed.");
}
};
DMLC_REGISTER_PARAMETER(ElementWiseSumParam);
bool ElementWiseSumShape(const NodeAttrs& attrs,
std::vector<TShape> *in_attrs,
std::vector<TShape> *out_attrs) {
CHECK_EQ(out_attrs->size(), 1);
return ElemwiseAttr<TShape, shape_is_none, shape_assign, true, shape_string>(
attrs, in_attrs, out_attrs, TShape());
}
bool ElementWiseSumType(const NodeAttrs& attrs,
std::vector<int> *in_attrs,
std::vector<int> *out_attrs) {
CHECK_EQ(out_attrs->size(), 1);
return ElemwiseAttr<int, type_is_none, type_assign, true, type_string>(
attrs, in_attrs, out_attrs, -1);
}
std::vector<NodeEntry> ElementWiseSumGrad(
const NodePtr& n,
const std::vector<NodeEntry>& ograds) {
// identity constraints in the beginning for easier shape inference.
const Op* copy_op = Op::Get("identity");
NNVM_REGISTER_ELEMWISE_REDUCE_OP(elemwise_sum)
.describe(R"code(Adds all input arguments element-wise.
)code" NNVM_ADD_FILELINE)
.set_attr<nnvm::FGradient>(
"FGradient", [](const NodePtr& n,
const std::vector<NodeEntry>& ograds){
CHECK_EQ(ograds.size(), 1);
std::vector<NodeEntry> ret;
NodeEntry n_out{n, 0, 0};
for (size_t i = 0; i < n->inputs.size(); i++) {
NodePtr id_node = Node::Create();
id_node->attrs.op = copy_op;
id_node->inputs = {ograds[0]};
ret.push_back(NodeEntry{id_node, 0, 0});
ret.push_back(ograds[0]);
}
return ret;
}
});
NNVM_REGISTER_OP(elemwise_sum)
.describe(R"code(Adds all input arguments element-wise.
NNVM_REGISTER_ELEMWISE_UNARY_OP(block_grad)
.describe(R"code(Blocks gradient computation for input.
)code" NNVM_ADD_FILELINE)
.set_attr_parser(ParamParser<ElementWiseSumParam>)
.set_num_inputs([](const NodeAttrs& attrs) {
uint32_t ret = dmlc::get<ElementWiseSumParam>(attrs.parsed).num_args;
return ret;
.set_attr<nnvm::FInplaceIdentity>(
"FInplaceIdentity", [](const NodeAttrs& attrs){
return std::vector<bool>{true};
})
.set_attr<nnvm::FInferShape>("FInferShape", ElementWiseSumShape)
.set_attr<nnvm::FInferType>("FInferType", ElementWiseSumType)
.set_attr<nnvm::FGradient>("FGradient", ElementWiseSumGrad)
.add_argument("args", "Symbol[]", "Positional input arguments");
.set_attr<nnvm::FGradient>("FGradient", MakeZeroGradNodes);
DMLC_REGISTER_PARAMETER(IndicatorParam);
// indicator function
NNVM_REGISTER_INDICATOR_OP(greater)
.describe(R"code(Greater function that returns a mask tensor
with 1.0 if (left > right), otherwise 0.0 element-wise.
)code" NNVM_ADD_FILELINE)
.add_argument("lhs", "Tensor", "First input")
.add_argument("rhs", "Tensor", "Second input")
.set_num_inputs(2)
.set_attr<nnvm::FInferShape>("FInferShape", ElemwiseShape<2, 1>)
.set_support_level(1);
NNVM_REGISTER_INDICATOR_OP(less)
.describe(R"code(Less function that returns a mask tensor
with 1.0 if (left < right), otherwise 0.0 element-wise.
)code" NNVM_ADD_FILELINE)
.add_argument("lhs", "Tensor", "First input")
.add_argument("rhs", "Tensor", "Second input")
.set_num_inputs(2)
.set_attr<nnvm::FInferShape>("FInferShape", ElemwiseShape<2, 1>)
.set_support_level(1);
NNVM_REGISTER_INDICATOR_OP(_max_mask)
.describe(R"code(Function that returns a mask tensor
with 1.0 if the value is maximum over given axes, otherwise 0.0 element-wise.
)code" NNVM_ADD_FILELINE)
.add_argument("data", "Tensor", "Input")
.set_num_inputs(1)
.add_arguments(IndicatorParam::__FIELDS__())
.set_attr_parser(ParamParser<IndicatorParam>)
.set_attr<FGetAttrDict>("FGetAttrDict", ParamGetAttrDict<IndicatorParam>)
.set_attr<nnvm::FInferShape>("FInferShape", ElemwiseShape<1, 1>)
.set_support_level(1);
NNVM_REGISTER_INDICATOR_OP(_min_mask)
.describe(R"code(Function that returns a mask tensor
with 1.0 if the value is minimum over given axes, otherwise 0.0 element-wise.
)code" NNVM_ADD_FILELINE)
.add_argument("data", "Tensor", "Input")
.set_num_inputs(1)
.add_arguments(IndicatorParam::__FIELDS__())
.set_attr_parser(ParamParser<IndicatorParam>)
.set_attr<FGetAttrDict>("FGetAttrDict", ParamGetAttrDict<IndicatorParam>)
.set_attr<nnvm::FInferShape>("FInferShape", ElemwiseShape<1, 1>)
.set_support_level(1);
} // namespace top
} // namespace nnvm
/*!
* Copyright (c) 2017 by Contributors
* \file matrix_op.cc
* \brief Matrix operators
*/
#include <nnvm/op.h>
#include <nnvm/node.h>
#include <nnvm/op_attr_types.h>
#include <nnvm/top/tensor.h>
#include "../op_common.h"
#include "../elemwise_op_common.h"
namespace nnvm {
namespace top {
DMLC_REGISTER_PARAMETER(MatMulParam);
inline bool DotShape(const nnvm::NodeAttrs& attrs,
std::vector<TShape> *in_attrs,
std::vector<TShape> *out_attrs) {
const MatMulParam& param = nnvm::get<MatMulParam>(attrs.parsed);
CHECK_EQ(in_attrs->size(), 2U);
CHECK_EQ(out_attrs->size(), 1U);
TShape lshape = (*in_attrs)[0];
TShape rshape = (*in_attrs)[1];
if (lshape.ndim() == 1) lshape = TShape{1, lshape[0]};
if (rshape.ndim() == 1) rshape = TShape{1, rshape[0]};
if (param.transpose_a) std::reverse(lshape.begin(), lshape.end());
if (param.transpose_b) std::reverse(rshape.begin(), rshape.end());
CHECK_EQ(lshape[lshape.ndim() - 1], rshape[0])
<< "dot shape inconsistent: " << lshape << " X " << rshape;
TShape oshape(lshape.ndim() + rshape.ndim() - 1);
for (int i = 0; i < lshape.ndim() - 1; i++) oshape[i] = lshape[i];
for (int i = 1; i < rshape.ndim(); i++) oshape[i + lshape.ndim() - 1] = rshape[i];
NNVM_ASSIGN_OUTPUT_SHAPE(attrs, *out_attrs, 0, oshape);
return true;
}
NNVM_REGISTER_OP(matmul)
.describe(R"doc(Matrix multiplication of two arrays.
``dot``'s behavior depends on the input array dimensions:
- 1-D arrays: inner product of vectors
- 2-D arrays: matrix multiplication
- N-D arrays: a sum product over the last axis of the first input and the first
axis of the second input
For example, given 3-D ``x`` with shape `(n,m,k)` and ``y`` with shape `(k,r,s)`, the
result array will have shape `(n,m,r,s)`. It is computed by::
dot(x,y) = sum(x[i,j,:]*y[:,a,b])
)doc" NNVM_ADD_FILELINE)
.set_support_level(1)
.set_num_inputs(2)
.set_num_outputs(1)
.set_attr_parser(ParamParser<MatMulParam>)
.set_attr<FGetAttrDict>("FGetAttrDict", ParamGetAttrDict<MatMulParam>)
.add_arguments(MatMulParam::__FIELDS__())
.add_argument("lhs", "NDArray-or-Symbol", "The first input")
.add_argument("rhs", "NDArray-or-Symbol", "The second input")
.set_attr<FInferShape>("FInferShape", DotShape)
.set_attr<FInferType>("FInferType", ElemwiseType<2, 1>)
.set_attr<FGradient>(
"FGradient", [](const NodePtr& n,
const std::vector<NodeEntry>& ograds) {
// z = x dot y
// xshape (n,m,k), yshape (k,r,s)
const MatMulParam& param = nnvm::get<MatMulParam>(n->attrs.parsed);
bool Ta = param.transpose_a;
bool Tb = param.transpose_b;
// Ta = false, Tb = false
// grad_x = grad_z dot y.T
// grad_y = x.T dot grad_z
if (!Ta && !Tb) {
return std::vector<NodeEntry>{
MakeNode("matmul", n->attrs.name + "_grad_0",
{ograds[0], n->inputs[1]},
{{"transpose_a", "false"},
{"transpose_b", "true"}}),
MakeNode("matmul", n->attrs.name + "_grad_1",
{n->inputs[0], ograds[0]},
{{"transpose_a", "true"},
{"transpose_b", "false"}})
};
} else if (Ta && !Tb) {
// Ta = true, Tb = false
// grad_x = y dot grad_z.T
// grad_y = x dot grad_z
return std::vector<NodeEntry>{
MakeNode("matmul", n->attrs.name + "_grad_0",
{n->inputs[1], ograds[0]},
{{"transpose_a", "false"},
{"transpose_b", "true"}}),
MakeNode("matmul", n->attrs.name + "_grad_1",
{n->inputs[0], ograds[0]},
{{"transpose_a", "false"},
{"transpose_b", "false"}})
};
} else if (!Ta && Tb) {
// Ta = false, Tb = true
// grad_x = grad_z dot y
// grad_y = grad_z.T dot x
return std::vector<NodeEntry>{
MakeNode("matmul", n->attrs.name + "_grad_0",
{ograds[0], n->inputs[1]},
{{"transpose_a", "false"},
{"transpose_b", "false"}}),
MakeNode("matmul", n->attrs.name + "_grad_1",
{ograds[0], n->inputs[0]},
{{"transpose_a", "true"},
{"transpose_b", "false"}})
};
} else {
// Ta = true, Tb = true
// grad_x = y.T dot grad_z.T
// grad_y = grad_z.T dot x.T
return std::vector<NodeEntry>{
MakeNode("matmul", n->attrs.name + "_grad_0",
{n->inputs[1], ograds[0]},
{{"transpose_a", "true"},
{"transpose_b", "true"}}),
MakeNode("matmul", n->attrs.name + "_grad_1",
{ograds[0], n->inputs[0]},
{{"transpose_a", "true"},
{"transpose_b", "true"}})
};
}
});
} // namespace top
} // namespace nnvm
......@@ -31,11 +31,19 @@ inline TShape ReduceShapeImpl(const TShape& ishape,
<< "Reduction axis " << axis[axis.ndim() - 1]
<< " Exceeds input dimensions " << ishape;
TShape in_axis = axis;
for (auto& i : in_axis) {
i = i < 0 ? i + ishape.ndim(): i;
CHECK_GE(i, 0) << "axis out of bounds in reduce operator";
CHECK_LT(i, ishape.ndim()) << "axis out of bounds in reduce operator";
}
std::sort(in_axis.begin(), in_axis.end());
if (keepdims) {
TShape oshape(ishape);
if (exclude) {
for (dim_t i = 0, j = 0; i < ishape.ndim(); ++i) {
if (j < axis.ndim() && i == axis[j]) {
if (j < in_axis.ndim() && i == in_axis[j]) {
++j;
continue;
}
......@@ -44,22 +52,22 @@ inline TShape ReduceShapeImpl(const TShape& ishape,
return oshape;
}
for (dim_t i = 0; i < axis.ndim(); ++i) {
oshape[axis[i]] = 1;
for (dim_t i = 0; i < in_axis.ndim(); ++i) {
oshape[in_axis[i]] = 1;
}
return oshape;
}
if (exclude) {
TShape oshape = TShape(axis.ndim());
for (dim_t i = 0; i < axis.ndim(); ++i) {
oshape[i] = ishape[axis[i]];
TShape oshape = TShape(in_axis.ndim());
for (dim_t i = 0; i < in_axis.ndim(); ++i) {
oshape[i] = ishape[in_axis[i]];
}
return oshape;
}
TShape oshape = TShape(std::max<dim_t>(1, ishape.ndim() - axis.ndim()));
TShape oshape = TShape(std::max<dim_t>(1, ishape.ndim() - in_axis.ndim()));
for (dim_t i = 0, j = 0, k = 0; i < ishape.ndim(); ++i) {
if (j < axis.ndim() && i == axis[j]) {
if (j < in_axis.ndim() && i == in_axis[j]) {
++j;
continue;
}
......@@ -99,9 +107,7 @@ inline void AxesParamParser(nnvm::NodeAttrs* attrs) {
.set_attr<FInferShape>("FInferShape", ReduceShape) \
.set_attr<FInferType>("FInferType", ElemwiseType<1, 1>) \
.set_num_inputs(1) \
.set_num_outputs(1) \
.set_num_outputs(1)
NNVM_REGISTER_REDUCE_OP(sum)
.describe(R"code(Computes the sum of array elements over given axes.
......@@ -120,17 +126,66 @@ Example::
sum(data, axis=[1,2])
[ 12. 19. 27.]
)code" NNVM_ADD_FILELINE);
)code" NNVM_ADD_FILELINE)
.set_attr<FGradient>(
"FGradient", [](const NodePtr& n,
const std::vector<NodeEntry>& ograds){
const ReduceParam& param = nnvm::get<ReduceParam>(n->attrs.parsed);
std::ostringstream axis; axis << param.axis;
return std::vector<NodeEntry>{
MakeNode("expand_like", n->attrs.name + "_grad",
{ograds[0], n->inputs[0]},
{{"axis", axis.str()},
{"keepdims", std::to_string(param.keepdims)},
{"exclude", std::to_string(param.exclude)}})
};
});
NNVM_REGISTER_REDUCE_OP(max)
.describe(R"code(Computes the max of array elements over given axes.
)code" NNVM_ADD_FILELINE);
)code" NNVM_ADD_FILELINE)
.set_attr<FGradient>(
"FGradient", [](const NodePtr& n,
const std::vector<NodeEntry>& ograds){
const ReduceParam& param = nnvm::get<ReduceParam>(n->attrs.parsed);
std::ostringstream axis; axis << param.axis;
NodeEntry sub0 = MakeNode("expand_like", n->attrs.name + "_grad_sub0",
{ograds[0], n->inputs[0]},
{{"axis", axis.str()},
{"keepdims", std::to_string(param.keepdims)},
{"exclude", std::to_string(param.exclude)}});
NodeEntry sub1 = MakeNode("_max_mask", n->attrs.name + "_grad_sub1",
{ograds[0]},
{{"axis", axis.str()},
{"exclude", std::to_string(param.exclude)}});
return std::vector<NodeEntry>{
MakeNode("elemwise_mul", n->attrs.name + "_grad", {sub0, sub1})
};
});
NNVM_REGISTER_REDUCE_OP(min)
.describe(R"code(Computes the min of array elements over given axes.
)code" NNVM_ADD_FILELINE);
)code" NNVM_ADD_FILELINE)
.set_attr<FGradient>(
"FGradient", [](const NodePtr& n,
const std::vector<NodeEntry>& ograds){
const ReduceParam& param = nnvm::get<ReduceParam>(n->attrs.parsed);
std::ostringstream axis; axis << param.axis;
NodeEntry sub0 = MakeNode("expand_like", n->attrs.name + "_grad_sub0",
{ograds[0], n->inputs[0]},
{{"axis", axis.str()},
{"keepdims", std::to_string(param.keepdims)},
{"exclude", std::to_string(param.exclude)}});
NodeEntry sub1 = MakeNode("_min_mask", n->attrs.name + "_grad_sub1",
{ograds[0]},
{{"axis", axis.str()},
{"exclude", std::to_string(param.exclude)}});
return std::vector<NodeEntry>{
MakeNode("elemwise_mul", n->attrs.name + "_grad", {sub0, sub1})
};
});
} // namespace top
......
......@@ -58,6 +58,12 @@ Example::
.set_attr<FInferShape>("FInferShape", FlattenInferShape)
.set_attr<FInferType>("FInferType", ElemwiseType<1, 1>)
.add_argument("data", "Tensor", "Input data.")
.set_attr<FGradient>(
"FGradient", [](const NodePtr& n,
const std::vector<NodeEntry>& ograds){
return MakeGradNode("reshape_like", n,
{ograds[0], n->inputs[0]});
})
.set_support_level(1);
// concatenate
......@@ -172,8 +178,8 @@ inline bool ExpandDimsInferShape(const NodeAttrs& attrs,
NNVM_REGISTER_OP(expand_dims)
.describe(R"code(Inserts a new axis of size 1 into the array shape
For example, given ``x`` with shape ``(2,3,4)``, then ``expand_dims(x, axis=1)``
will return a new array with shape ``(2,1,3,4)``.
For example, given ``x`` with shape ``(2,3,4)``, then ``expand_dims(x, axis=1, num_newaxis=5)``
will return a new array with shape ``(2,5,3,4)``.
)code" NNVM_ADD_FILELINE)
.add_argument("data", "Tensor", "Input tensor")
......@@ -184,6 +190,61 @@ will return a new array with shape ``(2,1,3,4)``.
.set_attr<FInferType>("FInferType", ElemwiseType<1, 1>)
.set_num_inputs(1)
.set_num_outputs(1)
.set_attr<FGradient>(
"FGradient", [](const NodePtr& n,
const std::vector<NodeEntry>& ograds){
const ExpandDimsParam& param = nnvm::get<ExpandDimsParam>(n->attrs.parsed);
return std::vector<NodeEntry> {
MakeNode("sum", n->attrs.name + "_grad", {ograds[0]},
{{"axis", std::to_string(param.axis)}})
};
})
.set_support_level(1);
NNVM_REGISTER_OP(expand_like)
.describe(R"code(Expand an input array with the shape of second array.
This operation can always be composed of unsqueezing and expanding dims.
Examples::
input = [ 12. 19. 27.]
input.shape = (3,)
new_shape_array = [[[1,2],[2,3],[1,3]],
[[1,4],[4,3],[5,2]],
[[7,1],[7,2],[7,3]]]
new_shape_array.shape = (3, 3, 2)
expand_like(input, [1,2], new_shape_array) =
[[[12,12],[12,12],[12,12]],
[[19,19],[19,19],[19,19]],
[[27,27],[27,27],[27,27]]]
)code" NNVM_ADD_FILELINE)
.add_argument("input", "Tensor", "Source input")
.add_argument("shape_like", "Tensor", "Input with new shape")
.add_arguments(ReduceParam::__FIELDS__())
.set_attr_parser(ParamParser<ReduceParam>)
.set_attr<FGetAttrDict>("FGetAttrDict", ParamGetAttrDict<ReduceParam>)
.set_attr<nnvm::FInferShape>("FInferShape", AssignOutputAttr<TShape, 1, 0>)
.set_attr<nnvm::FInferType>("FInferType", ElemwiseType<2, 1>)
.set_num_inputs(2)
.set_num_outputs(1)
.set_attr<FGradient>(
"FGradient", [](const NodePtr& n,
const std::vector<NodeEntry>& ograds) {
const ReduceParam& param = nnvm::get<ReduceParam>(n->attrs.parsed);
std::ostringstream axis;
axis << param.axis;
return std::vector<NodeEntry>{
MakeNode("sum", n->attrs.name + "_grad",
{ograds[0]},
{{"axis", axis.str()},
{"keepdims", std::to_string(param.keepdims)},
{"exclude", std::to_string(param.exclude)}})
};
})
.set_support_level(1);
// split
......@@ -383,7 +444,7 @@ NNVM_REGISTER_OP(reshape)
.describe(R"code(Reshapes the input array.
Given an array and a shape, this function returns a copy of the array in the new shape.
The shape is a tuple of integers such as (2,3,4).The size of the new shape should be same as the size of the input array.
The shape is a tuple of integers such as (2,3,4). The size of the new shape should be same as the size of the input array.
Example::
......@@ -443,6 +504,46 @@ The significance of each is explained below:
.set_attr<FInferType>("FInferType", ElemwiseType<1, 1>)
.set_num_inputs(1)
.set_num_outputs(1)
.set_attr<FGradient>(
"FGradient", [](const NodePtr& n,
const std::vector<NodeEntry>& ograds) {
return std::vector<NodeEntry>{
MakeNode("reshape_like", n->attrs.name + "_grad",
{ograds[0], n->inputs[0]})
};
})
.set_support_level(3);
NNVM_REGISTER_OP(reshape_like)
.describe(R"code(Reshapes the input array by the size of another array.
For an input array with shape ``(d1, d2, ..., dk)``, `reshape_like` operation reshapes
the input array into an output array with the same shape as the second input array.
.. note::
Sizes for both array should be compatible.
)code" NNVM_ADD_FILELINE)
.add_argument("data", "Tensor", "Input data.")
.add_argument("shape_like", "Tensor", "Input data.")
.set_num_inputs(1)
.set_num_outputs(1)
.set_attr<FInferShape>(
"FInferShape", [](const NodeAttrs& attrs,
std::vector<TShape>* in_attrs,
std::vector<TShape>* out_attrs) {
CHECK_EQ(in_attrs->at(0).Size(), in_attrs->at(1).Size())
<< "Reshape inputs size should be compatible";
NNVM_ASSIGN_OUTPUT_SHAPE(attrs, *out_attrs, 0, in_attrs->at(1));
return true;
})
.set_attr<FInferType>("FInferType", ElemwiseType<1, 1>)
.set_attr<FGradient>(
"FGradient", [](const NodePtr& n,
const std::vector<NodeEntry>& ograds) {
return MakeGradNode("reshape_like", n,
{ograds[0], n->inputs[0]});
})
.set_support_level(3);
// squeeze
......@@ -502,12 +603,14 @@ NNVM_REGISTER_OP(squeeze)
Examples::
x = [[[0], [1], [2]]]
x.shape = (1, 3, 1)
squeeze(x) = [0, 1, 2]
squeeze(x, 0) = [[0], [1], [2]]
squeeze(x, (0, 2)) = [0, 1, 2]
)code" NNVM_ADD_FILELINE)
.add_argument("data", "Tensor", "Source input")
.add_arguments(SqueezeParam::__FIELDS__())
......@@ -517,6 +620,13 @@ Examples::
.set_attr<nnvm::FInferType>("FInferType", ElemwiseType<1, 1>)
.set_num_inputs(1)
.set_num_outputs(1)
.set_attr<FGradient>(
"FGradient", [](const NodePtr& n,
const std::vector<NodeEntry>& ograds) {
return std::vector<NodeEntry>{
MakeNode("reshape_like", n->attrs.name + "_grad", {n->inputs[0]})
};
})
.set_support_level(1);
// tranpose
......@@ -584,7 +694,16 @@ Examples::
.set_attr<nnvm::FInferType>("FInferType", ElemwiseType<1, 1>)
.set_num_inputs(1)
.set_num_outputs(1)
.set_support_level(4);
.set_support_level(4)
.set_attr<FGradient>(
"FGradient", [](const NodePtr& n,
const std::vector<NodeEntry>& ograds) {
const TransposeParam& param = nnvm::get<TransposeParam>(n->attrs.parsed);
std::ostringstream oss; oss << param.axes;
return std::vector<NodeEntry>{
MakeNode("transpose", n->attrs.name + "_t", {ograds[0]}, {{"axes", oss.str()}})
};
});
} // namespace top
} // namespace nnvm
......@@ -13,7 +13,7 @@ TEST(Tuple, Basic) {
Tuple<int> z{1, 2, 3, 5, 6};
std::ostringstream os;
os << z;
CHECK_EQ(os.str(), "(1,2,3,5,6)");
CHECK_EQ(os.str(), "[1,2,3,5,6]");
std::istringstream is(os.str());
is >> y;
CHECK_EQ(x, y);
......
import json
import nnvm.symbol as sym
import nnvm.graph as graph
import nnvm.compiler.graph_util as graph_util
def test_json_pass():
x = sym.Variable('x')
......@@ -117,13 +118,13 @@ def test_gradient():
y = sym.Variable("y")
z1 = sym.elemwise_add(x, sym.sqrt(y))
z2 = sym.log(x)
gradient = graph.gradients([z1, z2], [x, y])
gradient = graph_util.gradients([z1, z2], [x, y])
assert len(gradient) == 2
g1 = sym.Variable("g1")
g2 = sym.Variable("g2")
grad_ys = [g1, g2]
gradient = graph.gradients(sym.Group([z1, z2]),
gradient = graph_util.gradients(sym.Group([z1, z2]),
sym.Group([x, y]), grad_ys=grad_ys)
g_graph = graph.create(sym.Group(gradient)).ir()
assert len(gradient) == 2
......
import nnvm.symbol as sym
from nnvm.compiler import graph_util
def test_cnn_gradients():
# input data
h = 128
w = 128
data_shape = (1000, 3, h, w)
data = sym.Variable('data', shape=data_shape, dtype=0)
# conv2d
num_channels = 64
kernel_size = 32
conv_w_shape = (num_channels, 3, kernel_size, kernel_size)
conv_b_shape = (num_channels,)
conv_w = sym.Variable('conv_w', shape=conv_w_shape)
conv_b = sym.Variable('conv_b', shape=conv_b_shape)
conv1 = sym.conv2d(data=data, weight=conv_w, bias=conv_b,
channels=num_channels, kernel_size=(kernel_size, kernel_size),
name='conv1')
# relu1
relu1 = sym.relu(data=conv1, name='relu1')
# max pooling
max_pooling1 = sym.max_pool2d(data=relu1, pool_size=(2, 2), name='max_pooling1')
# flatten
flatten1 = sym.flatten(data=max_pooling1)
# shape after flatten
flatten_out_shape = (h - kernel_size) * (w - kernel_size) * num_channels
# dense1
dense1_hidden_units = 100
dense1 = sym.dense(data=flatten1, name='dense1', units=dense1_hidden_units)
# relu2
relu2 = sym.relu(data=dense1, name='relu2')
# dense2
dense2_hidden_units = 10
dense2 = sym.dense(data=relu2, name='dense2', units=dense2_hidden_units)
# softmax
mlp = sym.softmax(data=dense2, name='softmax')
# fake non-sparse label
label = sym.full_like(mlp, fill_value=1)
# cross entropy loss
ce_loss = sym.sum(
sym.elemwise_mul(sym.log_softmax(dense2), label),
axis=1,
keepdims=True,
name="ce_loss")
# input variables:
# print grad_g.symbol.list_input_names()
# >> ['data', 'conv_w', 'conv_b',
# 'dense1_weight', 'dense1_bias',
# 'dense2_weight', 'dense2_bias']
# output gradient variables:
# print grad_g.symbol.list_output_names()
# >> ['conv1_grad_data', 'conv1_grad_weight', 'conv1_grad_bias',
# 'dense1_grad_weight', 'dense1_grad_bias',
# 'dense2_grad_weight', 'dense2_grad_bias']
grad_g = graph_util.get_gradient_graph(ce_loss, ce_loss.list_input_variables())
# infer shape
in_shapes, out_shapes = graph_util.infer_shape(grad_g)
# forward graph shape
assert in_shapes == [list(data_shape), list(conv_w_shape), list(conv_b_shape),
[dense1_hidden_units, flatten_out_shape], [dense1_hidden_units],
[dense2_hidden_units, dense1_hidden_units], [dense2_hidden_units]]
# input grads shape should be equal with input shape
assert in_shapes == out_shapes
# output grads w.r.t input variables
grads = graph_util.gradients(ce_loss, ce_loss.list_input_variables())
# gradients number should be equal with grad_input number
assert len(grads) == len(ce_loss.list_input_variables())
# infer type
in_dtypes, out_dtypes = graph_util.infer_dtype(grad_g)
assert out_dtypes == ['float32', 'float32', 'float32', 'float32', 'float32', 'float32', 'float32']
def test_multi_loss_graph_gradients():
# input data
shape1 = (1000, 100)
data1 = sym.Variable('data1', shape=(1000, 100), dtype=0)
# fake non-sparse label
label = sym.full(fill_value=3)
# square loss
sub1 = sym.elemwise_sub(data1, label, name="sub1")
square_loss = sym.sum(data=sub1**2, axis=1, name="square_loss")
# fake loss1
shape2 = (1000, )
data2 = sym.Variable('data2', shape=shape2, dtype=0)
loss1 = sym.sqrt(data2, name="loss1")
# fake loss2
loss2 = sym.relu(data1, name='loss2')
# block loss1
total_loss = sym.elemwise_sum(
sym.block_grad(loss1),
square_loss,
num_args=2,
name="total_loss")
# grad_g.symbol.list_output_names()
# >> ['loss1_grad_0_output', 'grad_sum_output']
grad_g = graph_util.get_gradient_graph([total_loss, loss2], total_loss.list_input_variables())
# infer shape
in_shapes, out_shapes = graph_util.infer_shape(grad_g)
assert out_shapes == [list(shape2), list(shape1)]
# grad_data1 is elemwise_sum of grad_loss2, grad_square_loss
grad_data1 = grad_g.symbol[1]
assert grad_data1.list_attr()['num_args'] == '2'
# block grad should return zero grad
grad_data2 = grad_g.symbol[0]
assert 'zeros_like' in grad_g.ir()
# test reverse infer shape for label
assert grad_g.apply('InferShape').json_attr('shape_num_unknown_nodes') == 0
# infer type
in_dtypes, out_dtypes = graph_util.infer_dtype(grad_g)
assert out_dtypes == ['float32', 'float32']
# test reverse infer type for label
assert grad_g.apply('InferType').json_attr('dtype_num_unknown_nodes') == 0
if __name__ == "__main__":
test_cnn_gradients()
test_multi_loss_graph_gradients()
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment