Commit 999dd1ef by yuruofeifei Committed by Tianqi Chen

[GRADIENT] Add backward operator to enable backward graph (#276)

* Update docs

* Add backward operator to enable backward graph

* Fix testing

* Refactor top level1 test code

* Fix format

* Test

* Added zeros ones op

* Register fill_like operator

* Fix unit test
parent e36fb360
...@@ -38,6 +38,8 @@ This level enables fully connected multi-layer perceptron. ...@@ -38,6 +38,8 @@ This level enables fully connected multi-layer perceptron.
nnvm.symbol.elemwise_sub nnvm.symbol.elemwise_sub
nnvm.symbol.elemwise_mul nnvm.symbol.elemwise_mul
nnvm.symbol.elemwise_div nnvm.symbol.elemwise_div
nnvm.symbol.fill
nnvm.symbol.fill_like
nnvm.symbol.flatten nnvm.symbol.flatten
nnvm.symbol.concatenate nnvm.symbol.concatenate
nnvm.symbol.expand_dims nnvm.symbol.expand_dims
...@@ -111,6 +113,8 @@ Detailed Definitions ...@@ -111,6 +113,8 @@ Detailed Definitions
.. autofunction:: nnvm.symbol.elemwise_sub .. autofunction:: nnvm.symbol.elemwise_sub
.. autofunction:: nnvm.symbol.elemwise_mul .. autofunction:: nnvm.symbol.elemwise_mul
.. autofunction:: nnvm.symbol.elemwise_div .. autofunction:: nnvm.symbol.elemwise_div
.. autofunction:: nnvm.symbol.fill
.. autofunction:: nnvm.symbol.fill_like
.. autofunction:: nnvm.symbol.flatten .. autofunction:: nnvm.symbol.flatten
.. autofunction:: nnvm.symbol.concatenate .. autofunction:: nnvm.symbol.concatenate
.. autofunction:: nnvm.symbol.expand_dims .. autofunction:: nnvm.symbol.expand_dims
......
...@@ -62,21 +62,24 @@ enum TypeFlag { ...@@ -62,21 +62,24 @@ enum TypeFlag {
kUint64 = 10, kUint64 = 10,
}; };
#define DMLC_DECLARE_DTYPE_FIELD(name) \
DMLC_DECLARE_FIELD(name) \
.add_enum("float16", kFloat16) \
.add_enum("float32", kFloat32) \
.add_enum("float64", kFloat64) \
.add_enum("uint8", kUint8) \
.add_enum("uint16", kUint16) \
.add_enum("uint32", kUint32) \
.add_enum("uint64", kUint64) \
.add_enum("int8", kInt8) \
.add_enum("int16", kInt16) \
.add_enum("int32", kInt32) \
.add_enum("int64", kInt64)
struct CastParam : public dmlc::Parameter<CastParam> { struct CastParam : public dmlc::Parameter<CastParam> {
int dtype; int dtype;
DMLC_DECLARE_PARAMETER(CastParam) { DMLC_DECLARE_PARAMETER(CastParam) {
DMLC_DECLARE_FIELD(dtype) DMLC_DECLARE_DTYPE_FIELD(dtype)
.add_enum("float16", kFloat16)
.add_enum("float32", kFloat32)
.add_enum("float64", kFloat64)
.add_enum("uint8", kUint8)
.add_enum("uint16", kUint16)
.add_enum("uint32", kUint32)
.add_enum("uint64", kUint64)
.add_enum("int8", kInt8)
.add_enum("int16", kInt16)
.add_enum("int32", kInt32)
.add_enum("int64", kInt64)
.describe("Output data type."); .describe("Output data type.");
} }
}; };
...@@ -155,6 +158,19 @@ struct ReduceParam : public dmlc::Parameter<ReduceParam> { ...@@ -155,6 +158,19 @@ struct ReduceParam : public dmlc::Parameter<ReduceParam> {
} }
}; };
struct InitOpParam : public dmlc::Parameter<InitOpParam> {
TShape shape;
int dtype;
double value;
DMLC_DECLARE_PARAMETER(InitOpParam) {
DMLC_DECLARE_FIELD(shape).set_default(TShape());
DMLC_DECLARE_DTYPE_FIELD(dtype).set_default(kFloat32)
.describe("Target data type.");
DMLC_DECLARE_FIELD(value).describe("Value to fill");
}
};
} // namespace top } // namespace top
} // namespace nnvm } // namespace nnvm
......
...@@ -7,7 +7,7 @@ class OpPattern(object): ...@@ -7,7 +7,7 @@ class OpPattern(object):
See Also See Also
-------- --------
top.tag : Contains explaination of the tag type. top.tag : Contains explanation of the tag type.
""" """
# Elementwise operator # Elementwise operator
ELEMWISE = 0 ELEMWISE = 0
......
...@@ -97,6 +97,16 @@ inline bool ElemwiseType(const NodeAttrs& attrs, ...@@ -97,6 +97,16 @@ inline bool ElemwiseType(const NodeAttrs& attrs,
.add_argument("data", "Tensor", "The input tensor.") .add_argument("data", "Tensor", "The input tensor.")
#define NNVM_REGISTER_INIT_OP(name) \
NNVM_REGISTER_OP(name) \
.set_num_inputs(0) \
.set_num_outputs(1) \
.set_attr_parser(ParamParser<InitOpParam>) \
.add_arguments(InitOpParam::__FIELDS__()) \
.set_attr<FInferShape>("FInferShape", ZeroShape) \
.set_attr<FInferType>("FInferType", ZeroType)
#define NNVM_REGISTER_ELEMWISE_BINARY_OP(name) \ #define NNVM_REGISTER_ELEMWISE_BINARY_OP(name) \
NNVM_REGISTER_OP(name) \ NNVM_REGISTER_OP(name) \
.set_num_inputs(2) \ .set_num_inputs(2) \
......
...@@ -8,6 +8,7 @@ ...@@ -8,6 +8,7 @@
#include <dmlc/logging.h> #include <dmlc/logging.h>
#include <dmlc/parameter.h> #include <dmlc/parameter.h>
#include <nnvm/top/tensor.h>
#include <string> #include <string>
#include <vector> #include <vector>
#include <unordered_set> #include <unordered_set>
...@@ -16,7 +17,7 @@ namespace nnvm { ...@@ -16,7 +17,7 @@ namespace nnvm {
namespace top { namespace top {
/*! /*!
* \brief Parse keyword arguments as PType arguments and save to parsed * \brief Parse keyword arguments as PType arguments and save to parsed
* \tparam PType the arameter type. * \tparam PType the parameter type.
* \param attrs The attributes. * \param attrs The attributes.
*/ */
template<typename PType> template<typename PType>
...@@ -202,6 +203,28 @@ inline std::string attr_assign_error_msg(const NodeAttrs& attrs, ...@@ -202,6 +203,28 @@ inline std::string attr_assign_error_msg(const NodeAttrs& attrs,
} \ } \
} }
/*!
* \brief macro assign rhs shape to lhs
* Use macro so we can see the error file more clearly
* \param lhs lhs shape
* \param rhs rhs shape
*/
#define SHAPE_ASSIGN(lhs, rhs) \
if ((lhs).ndim() == 0) (lhs) = (rhs); \
else \
CHECK_EQ(lhs, rhs) << "shape inference inconsistent"; \
/*!
* \brief macro assign rhs type to lhs
* Use macro so we can see the error file more clearly
* \param lhs lhs type
* \param rhs rhs type
*/
#define DTYPE_ASSIGN(lhs, rhs) \
if ((lhs) == -1) (lhs) = (rhs); \
else \
CHECK_EQ(lhs, rhs) << "type inference inconsistent"; \
// simply return the shape as same // simply return the shape as same
inline bool SameShape(const NodeAttrs& attrs, inline bool SameShape(const NodeAttrs& attrs,
std::vector<TShape> *ishape, std::vector<TShape> *ishape,
...@@ -216,6 +239,28 @@ inline bool SameShape(const NodeAttrs& attrs, ...@@ -216,6 +239,28 @@ inline bool SameShape(const NodeAttrs& attrs,
return true; return true;
} }
// return shape from node attrs
inline bool ZeroShape(const NodeAttrs& attrs,
std::vector<TShape> *ishape,
std::vector<TShape> *oshape) {
const TShape& ts = dmlc::get<InitOpParam>(attrs.parsed).shape;
if (ts.ndim() != 0) {
SHAPE_ASSIGN(oshape->at(0), ts);
return true;
} else {
return false;
}
}
// return type from node attrs
inline bool ZeroType(const NodeAttrs& attrs,
std::vector<int> *iattr,
std::vector<int> *oattr) {
int dtype = dmlc::get<InitOpParam>(attrs.parsed).dtype;
DTYPE_ASSIGN(oattr->at(0), dtype);
return true;
}
} // namespace top } // namespace top
} // namespace nnvm } // namespace nnvm
......
...@@ -30,7 +30,21 @@ NNVM_REGISTER_ELEMWISE_UNARY_OP(sigmoid) ...@@ -30,7 +30,21 @@ NNVM_REGISTER_ELEMWISE_UNARY_OP(sigmoid)
Y = 1 / (1 + exp(-X)) Y = 1 / (1 + exp(-X))
)code" NNVM_ADD_FILELINE) )code" NNVM_ADD_FILELINE)
.set_support_level(1); .set_support_level(1)
.set_attr<FGradient>(
"FGradient", [](const NodePtr& n,
const std::vector<NodeEntry>& ograds) {
// y = 1 / (1 + exp(-n0))
// grad_0 = grad_y * y * (1 - y)
NodeEntry sub0 = MakeNode("elemwise_mul", n->attrs.name + "_grad_sub_0",
{ograds[0], NodeEntry{n, 0, 0}});
NodeEntry sub1 = MakeNode("__rsub_scalar__", n->attrs.name + "_grad_sub_1",
{NodeEntry{n, 0, 0}}, {{"scalar", "1"}});
return std::vector<NodeEntry>{
MakeNode("elemwise_mul", n->attrs.name + "_grad_0",
{sub0, sub1})
};
});
// tanh // tanh
NNVM_REGISTER_ELEMWISE_UNARY_OP(tanh) NNVM_REGISTER_ELEMWISE_UNARY_OP(tanh)
...@@ -40,7 +54,21 @@ NNVM_REGISTER_ELEMWISE_UNARY_OP(tanh) ...@@ -40,7 +54,21 @@ NNVM_REGISTER_ELEMWISE_UNARY_OP(tanh)
Y = sinh(X) / cosh(X) Y = sinh(X) / cosh(X)
)code" NNVM_ADD_FILELINE) )code" NNVM_ADD_FILELINE)
.set_support_level(1); .set_support_level(1)
.set_attr<FGradient>(
"FGradient", [](const NodePtr& n,
const std::vector<NodeEntry>& ograds) {
// y = sinh(n0) / cosh(n0)
// grad_0 = grad_y * (1 - y^2)
NodeEntry sub0 = MakeNode("elemwise_mul", n->attrs.name + "_grad_sub_0",
{NodeEntry{n, 0, 0}, NodeEntry{n, 0, 0}});
NodeEntry sub1 = MakeNode("__rsub_scalar__", n->attrs.name + "_grad_sub_1",
{sub0}, {{"scalar", "1"}});
return std::vector<NodeEntry>{
MakeNode("elemwise_mul", n->attrs.name + "_grad_0",
{ograds[0], sub1})
};
});
// exp // exp
NNVM_REGISTER_ELEMWISE_UNARY_OP(exp) NNVM_REGISTER_ELEMWISE_UNARY_OP(exp)
...@@ -50,7 +78,17 @@ NNVM_REGISTER_ELEMWISE_UNARY_OP(exp) ...@@ -50,7 +78,17 @@ NNVM_REGISTER_ELEMWISE_UNARY_OP(exp)
exp(x) exp(x)
)code" NNVM_ADD_FILELINE) )code" NNVM_ADD_FILELINE)
.set_support_level(1); .set_support_level(1)
.set_attr<FGradient>(
"FGradient", [](const NodePtr& n,
const std::vector<NodeEntry>& ograds) {
// y = exp(n0)
// grad_0 = grad_y * y
return std::vector<NodeEntry>{
MakeNode("elemwise_mul", n->attrs.name + "_grad_0",
{ograds[0], NodeEntry{n, 0, 0}})
};
});
// log // log
NNVM_REGISTER_ELEMWISE_UNARY_OP(log) NNVM_REGISTER_ELEMWISE_UNARY_OP(log)
...@@ -60,7 +98,17 @@ NNVM_REGISTER_ELEMWISE_UNARY_OP(log) ...@@ -60,7 +98,17 @@ NNVM_REGISTER_ELEMWISE_UNARY_OP(log)
log(x) log(x)
)code" NNVM_ADD_FILELINE) )code" NNVM_ADD_FILELINE)
.set_support_level(1); .set_support_level(1)
.set_attr<FGradient>(
"FGradient", [](const NodePtr& n,
const std::vector<NodeEntry>& ograds) {
// y = log(n0)
// grad_0 = grad_y / n0
return std::vector<NodeEntry>{
MakeNode("elemwise_div", n->attrs.name + "_grad_0",
{ograds[0], n->inputs[0]})
};
});
// sqrt // sqrt
NNVM_REGISTER_ELEMWISE_UNARY_OP(sqrt) NNVM_REGISTER_ELEMWISE_UNARY_OP(sqrt)
...@@ -70,7 +118,19 @@ NNVM_REGISTER_ELEMWISE_UNARY_OP(sqrt) ...@@ -70,7 +118,19 @@ NNVM_REGISTER_ELEMWISE_UNARY_OP(sqrt)
\sqrt(x) \sqrt(x)
)code" NNVM_ADD_FILELINE) )code" NNVM_ADD_FILELINE)
.set_support_level(1); .set_support_level(1)
.set_attr<FGradient>(
"FGradient", [](const NodePtr& n,
const std::vector<NodeEntry>& ograds) {
// y = sqrt(n0)
// grad_0 = grad_y / (2 * y)
NodeEntry sub0 = MakeNode("__mul_scalar__", n->attrs.name + "_grad_sub_0",
{NodeEntry{n, 0, 0}}, {{"scalar", "2"}});
return std::vector<NodeEntry>{
MakeNode("elemwise_div", n->attrs.name + "_grad_0",
{ograds[0], sub0})
};
});
// binary ops // binary ops
...@@ -78,39 +138,132 @@ NNVM_REGISTER_ELEMWISE_BINARY_OP(elemwise_add) ...@@ -78,39 +138,132 @@ NNVM_REGISTER_ELEMWISE_BINARY_OP(elemwise_add)
.describe(R"code(Element-wise add .describe(R"code(Element-wise add
)code") )code")
.set_support_level(1); .set_support_level(1)
.set_attr<FGradient>(
"FGradient", [](const NodePtr& n,
const std::vector<NodeEntry>& ograds){
// y = n0 + n1
// grad_0 = grad_y
// grad_1 = grad_y
return std::vector<NodeEntry>{ograds[0], ograds[0]};
});
NNVM_REGISTER_ELEMWISE_BINARY_OP(elemwise_sub) NNVM_REGISTER_ELEMWISE_BINARY_OP(elemwise_sub)
.describe(R"code(Element-wise substraction .describe(R"code(Element-wise substraction
)code" NNVM_ADD_FILELINE) )code" NNVM_ADD_FILELINE)
.set_support_level(1); .set_support_level(1)
.set_attr<FGradient>(
"FGradient", [](const NodePtr& n,
const std::vector<NodeEntry>& ograds){
// y = n0 - n1
// grad_0 = grad_y
// grad_1 = - grad_y
return std::vector<NodeEntry>{
ograds[0],
MakeNode("negative", n->attrs.name + "_grad_1", {ograds[0]}),
};
});
NNVM_REGISTER_ELEMWISE_BINARY_OP(elemwise_mul) NNVM_REGISTER_ELEMWISE_BINARY_OP(elemwise_mul)
.describe(R"code(Element-wise multiplication .describe(R"code(Element-wise multiplication
)code" NNVM_ADD_FILELINE) )code" NNVM_ADD_FILELINE)
.set_support_level(1); .set_support_level(1)
.set_attr<FGradient>(
"FGradient", [](const NodePtr& n,
const std::vector<NodeEntry>& ograds){
// y = n0 * n1
// grad_0 = grad_y * n1
// grad_1 = grad_y * n0
return std::vector<NodeEntry>{
MakeNode("elemwise_mul", n->attrs.name + "_grad_0",
{ograds[0], n->inputs[1]}),
MakeNode("elemwise_mul", n->attrs.name + "_grad_1",
{ograds[0], n->inputs[0]})
};
});
NNVM_REGISTER_ELEMWISE_BINARY_OP(elemwise_div) NNVM_REGISTER_ELEMWISE_BINARY_OP(elemwise_div)
.describe(R"code(Element-wise multiplication .describe(R"code(Element-wise multiplication
)code" NNVM_ADD_FILELINE) )code" NNVM_ADD_FILELINE)
.set_support_level(1); .set_support_level(1)
.set_attr<FGradient>(
"FGradient", [](const NodePtr& n,
const std::vector<NodeEntry>& ograds){
// y = n0 / n1
// grad_0 = grad_y / n1
// grad_1 = - grad_y * n0 / n1^2
NodeEntry sub0 = MakeNode("elemwise_mul", n->attrs.name + "_grad_sub_0",
{ograds[0], n->inputs[0]});
NodeEntry sub1 = MakeNode("negative", n->attrs.name + "_grad_sub_1", {sub0});
NodeEntry sub2 = MakeNode("elemwise_mul", n->attrs.name + "_grad_sub_2",
{n->inputs[1], n->inputs[1]});
return std::vector<NodeEntry>{
MakeNode("elemwise_div", n->attrs.name + "_grad_0",
{ograds[0], n->inputs[1]}),
MakeNode("elemwise_div", n->attrs.name + "_grad_1",
{sub1, sub2})
};
});
// negative // negative
NNVM_REGISTER_ELEMWISE_UNARY_OP(negative) NNVM_REGISTER_ELEMWISE_UNARY_OP(negative)
.describe(R"code(Elemenwise numeric negative .describe(R"code(Elemenwise numeric negative
)code" NNVM_ADD_FILELINE) )code" NNVM_ADD_FILELINE)
.set_support_level(3); .set_support_level(3)
.set_attr<FGradient>(
"FGradient", [](const NodePtr& n,
const std::vector<NodeEntry>& ograds){
// y = - n0
// grad_0 = - grad_y
return std::vector<NodeEntry>{
MakeNode("negative", n->attrs.name + "_grad_0", {ograds[0]}),
};
});
// copy // copy
NNVM_REGISTER_ELEMWISE_UNARY_OP(copy) NNVM_REGISTER_ELEMWISE_UNARY_OP(copy)
.describe(R"code(Copy tensor to another one. .describe(R"code(Copy tensor to another one.
)code" NNVM_ADD_FILELINE) )code" NNVM_ADD_FILELINE)
.set_support_level(3); .set_support_level(3)
.set_attr<FGradient>(
"FGradient", [](const NodePtr& n,
const std::vector<NodeEntry>& ograds){
// y = copy(n0)
// grad_0 = grad_y
return std::vector<NodeEntry>{ograds[0]};
});
DMLC_REGISTER_PARAMETER(InitOpParam);
// fill
NNVM_REGISTER_INIT_OP(fill)
.describe(R"code(Fill array with scalar value
)code" NNVM_ADD_FILELINE)
.set_support_level(1);
// fill_like
NNVM_REGISTER_ELEMWISE_UNARY_OP(fill_like)
.describe(R"code(Return an scalar value array with the same shape and type
as the input array
)code" NNVM_ADD_FILELINE)
.set_support_level(1)
.add_arguments(InitOpParam::__FIELDS__())
.set_attr_parser(ParamParser<InitOpParam>)
.set_attr<FGradient>(
"FGradient", [](const NodePtr& n,
const std::vector<NodeEntry>& ograds){
return std::vector<NodeEntry>{
MakeNode("fill_like", n->attrs.name + "_zero",
{n->inputs[0]}, {{"value", "0"}})
};
});
// unary scalar op // unary scalar op
DMLC_REGISTER_PARAMETER(ScalarParam); DMLC_REGISTER_PARAMETER(ScalarParam);
...@@ -126,49 +279,134 @@ NNVM_REGISTER_ELEMWISE_BINARY_SCALAR(__add_scalar__) ...@@ -126,49 +279,134 @@ NNVM_REGISTER_ELEMWISE_BINARY_SCALAR(__add_scalar__)
.describe(R"code(Tensor add scalar .describe(R"code(Tensor add scalar
)code" NNVM_ADD_FILELINE) )code" NNVM_ADD_FILELINE)
.set_support_level(3); .set_support_level(3)
.set_attr<FGradient>(
"FGradient", [](const NodePtr& n,
const std::vector<NodeEntry>& ograds){
return std::vector<NodeEntry>{ograds[0]};
});
NNVM_REGISTER_ELEMWISE_BINARY_SCALAR(__sub_scalar__) NNVM_REGISTER_ELEMWISE_BINARY_SCALAR(__sub_scalar__)
.describe(R"code(Tensor substract scalar .describe(R"code(Tensor substract scalar
)code" NNVM_ADD_FILELINE) )code" NNVM_ADD_FILELINE)
.set_support_level(3); .set_support_level(3)
.set_attr<FGradient>(
"FGradient", [](const NodePtr& n,
const std::vector<NodeEntry>& ograds){
return std::vector<NodeEntry>{ograds[0]};
});
NNVM_REGISTER_ELEMWISE_BINARY_SCALAR(__rsub_scalar__) NNVM_REGISTER_ELEMWISE_BINARY_SCALAR(__rsub_scalar__)
.describe(R"code(scalar substract Tensor .describe(R"code(scalar substract Tensor
)code" NNVM_ADD_FILELINE) )code" NNVM_ADD_FILELINE)
.set_support_level(3); .set_support_level(3)
.set_attr<FGradient>(
"FGradient", [](const NodePtr& n,
const std::vector<NodeEntry>& ograds){
return std::vector<NodeEntry>{
MakeNode("negative", n->attrs.name + "_grad_0", {ograds[0]})
};
});
NNVM_REGISTER_ELEMWISE_BINARY_SCALAR(__mul_scalar__) NNVM_REGISTER_ELEMWISE_BINARY_SCALAR(__mul_scalar__)
.describe(R"code(Tensor multiplies scalar .describe(R"code(Tensor multiplies scalar
)code" NNVM_ADD_FILELINE) )code" NNVM_ADD_FILELINE)
.set_support_level(3); .set_support_level(3)
.set_attr<FGradient>(
"FGradient", [](const NodePtr& n,
const std::vector<NodeEntry>& ograds){
// y = n0 * scalar
// grad_0 = grad_y * scalar
return std::vector<NodeEntry>{
MakeNode("__mul_scalar__", n->attrs.name + "_grad_0",
{ograds[0]}, {{"scalar", n->attrs.dict["scalar"]}})
};
});
NNVM_REGISTER_ELEMWISE_BINARY_SCALAR(__div_scalar__) NNVM_REGISTER_ELEMWISE_BINARY_SCALAR(__div_scalar__)
.describe(R"code(Tensor divides scalar .describe(R"code(Tensor divides scalar
)code" NNVM_ADD_FILELINE) )code" NNVM_ADD_FILELINE)
.set_support_level(3); .set_support_level(3)
.set_attr<FGradient>(
"FGradient", [](const NodePtr& n,
const std::vector<NodeEntry>& ograds){
// y = n0 / scalar
// grad_0 = grad_y / scalar
return std::vector<NodeEntry>{
MakeNode("__div_scalar__", n->attrs.name + "_grad_0",
{ograds[0]}, {{"scalar", n->attrs.dict["scalar"]}})
};
});
NNVM_REGISTER_ELEMWISE_BINARY_SCALAR(__rdiv_scalar__) NNVM_REGISTER_ELEMWISE_BINARY_SCALAR(__rdiv_scalar__)
.describe(R"code(scalar divides Tensor .describe(R"code(scalar divides Tensor
)code" NNVM_ADD_FILELINE) )code" NNVM_ADD_FILELINE)
.set_support_level(3); .set_support_level(3)
.set_attr<FGradient>(
"FGradient", [](const NodePtr& n,
const std::vector<NodeEntry>& ograds){
// y = scalar / n0
// grad_0 = - grad_y * scalar / n0^2
NodeEntry sub0 = MakeNode("__mul_scalar__", n->attrs.name + "_grad_sub_0",
{ograds[0]}, {{"scalar", n->attrs.dict["scalar"]}});
NodeEntry sub1 = MakeNode("negative", n->attrs.name + "_grad_sub_1", {sub0});
NodeEntry sub2 = MakeNode("elemwise_mul", n->attrs.name + "_grad_sub_2",
{n->inputs[0], n->inputs[0]});
return std::vector<NodeEntry>{
MakeNode("elemwise_div", n->attrs.name + "_grad_0",
{sub1, sub2})
};
});
NNVM_REGISTER_ELEMWISE_BINARY_SCALAR(__pow_scalar__) NNVM_REGISTER_ELEMWISE_BINARY_SCALAR(__pow_scalar__)
.describe(R"code(Tensor power scalar .describe(R"code(Tensor power scalar
)code" NNVM_ADD_FILELINE) )code" NNVM_ADD_FILELINE)
.set_support_level(3); .set_support_level(3)
.set_attr<FGradient>(
"FGradient", [](const NodePtr& n,
const std::vector<NodeEntry>& ograds){
// y = n0^scalar
// grad_0 = grad_y * scalar * n0^(scalar - 1)
double scalar = std::stod(n->attrs.dict["scalar"]);
NodeEntry sub0 = MakeNode("__pow_scalar__", n->attrs.name + "_grad_sub_0",
{n->inputs[0]},
{{"scalar", std::to_string(scalar - 1)}});
NodeEntry sub1 = MakeNode("__mul_scalar__", n->attrs.name + "_grad_sub_1",
{ograds[0]},
{{"scalar", std::to_string(scalar)}});
return std::vector<NodeEntry>{
MakeNode("elemwise_mul", n->attrs.name + "_grad_0",
{sub0, sub1})
};
});
NNVM_REGISTER_ELEMWISE_BINARY_SCALAR(__rpow_scalar__) NNVM_REGISTER_ELEMWISE_BINARY_SCALAR(__rpow_scalar__)
.describe(R"code(scalar power Tensor .describe(R"code(scalar power Tensor
)code" NNVM_ADD_FILELINE) )code" NNVM_ADD_FILELINE)
.set_support_level(3); .set_support_level(3)
.set_attr<FGradient>(
"FGradient", [](const NodePtr& n,
const std::vector<NodeEntry>& ograds){
// y = scalar^n0
// grad_0 = grad_y * scalar^n0 * log(scalar)
double num = std::stod(n->attrs.dict["scalar"]);
NodeEntry sub0 = MakeNode("__mul_scalar__", n->attrs.name + "_grad_sub_0",
{NodeEntry{n, 0, 0}},
{{"scalar", std::to_string(std::log(num))}});
return std::vector<NodeEntry>{
MakeNode("__mul_symbol__", n->attrs.name + "_grad_0",
{ograds[0], sub0})
};
});
} // namespace top } // namespace top
} // namespace nnvm } // namespace nnvm
...@@ -6,146 +6,199 @@ import nnvm.symbol as sym ...@@ -6,146 +6,199 @@ import nnvm.symbol as sym
import nnvm.compiler import nnvm.compiler
from nnvm.testing.config import ctx_list from nnvm.testing.config import ctx_list
def helper(symbol, inputs, dtype,
np_forward, np_backward=None):
ishapes = {}
input_syms = []
np_inputs = {}
for (k, v) in inputs.items():
ishapes.update({k: v[0]})
np_inputs.update({k: np.random.uniform(size=v[0]).astype(dtype)})
if len(v) > 1:
input_syms.append(v[1])
for target, ctx in ctx_list():
graph, lib, _ = nnvm.compiler.build(symbol, target, ishapes)
m = graph_runtime.create(graph, lib, ctx)
m.run(**np_inputs)
y_np = np_forward(**np_inputs)
out = m.get_output(0, tvm.nd.empty(y_np.shape, dtype))
np.testing.assert_allclose(out.asnumpy(), y_np, atol=1e-5, rtol=1e-5)
# backward
if np_backward:
graph._set_symbol_list_attr("grad_ys", symbol)
for x in input_syms:
graph._set_symbol_list_attr("grad_xs", x)
graph._set_symbol_list_attr("grad_ys_out_grad", sym.Variable("head_grads"))
graph = graph.apply("Gradient")
ishapes.update({"head_grads": y_np.shape})
graph, lib, _ = nnvm.compiler.build(graph, target, ishapes)
m = graph_runtime.create(graph, lib, ctx)
head_grads = np.random.uniform(size=y_np.shape).astype(dtype)
y_np = head_grads * np_backward(**np_inputs)
m.run(head_grads=head_grads, **np_inputs)
out = m.get_output(0, tvm.nd.empty(y_np.shape, dtype))
np.testing.assert_allclose(out.asnumpy(), y_np, atol=1e-5, rtol=1e-5)
def test_relu(): def test_relu():
x = sym.Variable("x") x = sym.Variable("x")
y = sym.leaky_relu(x, alpha=0.3) - 0.2 y = sym.relu(sym.leaky_relu(x, alpha=0.3) - 0.2)
y = sym.relu(y)
def forward(x):
x = (x < 0) * x * 0.3 + (x > 0) * x - 0.2
return (x > 0) * x
dtype = "float32"
dshape = (1, 3, 32, 32)
inputs = {'x': (dshape, x)}
helper(y, inputs, dtype, forward)
def test_sym_scalar_pow():
scalar = 3
x = sym.Variable("x")
y = x**scalar
def forward(x):
return x**scalar
def backward(x):
return scalar * x**(scalar - 1)
dtype = "float32" dtype = "float32"
dshape = (1, 3, 32, 32) dshape = (1, 3, 32, 32)
oshape = dshape inputs = {'x': (dshape, x)}
for target, ctx in ctx_list(): helper(y, inputs, dtype, forward, backward)
graph, lib, _ = nnvm.compiler.build(y, target, {"x": dshape})
m = graph_runtime.create(graph, lib, ctx)
data = np.random.uniform(size=dshape).astype(dtype) def test_scalar_sym_pow():
m.run(x=data) scalar = 3
data = (data < 0) * data * 0.3 + (data>0) * data - 0.2 x = sym.Variable("x")
data = (data > 0) * data y = scalar**x
out = m.get_output(0, tvm.nd.empty(oshape, dtype))
np.testing.assert_allclose(out.asnumpy(), data, atol=1e-5, rtol=1e-5) def forward(x):
return scalar**x
def backward(x):
return np.log(scalar) * scalar**x
dtype = "float32"
dshape = (1, 3, 32, 32)
inputs = {'x': (dshape, x)}
helper(y, inputs, dtype, forward, backward)
def test_exp(): def test_exp():
x = sym.Variable("x") x = sym.Variable("x")
y = sym.exp(x) y = sym.exp(x)
def forward(x):
return np.exp(x)
def backward(x):
return np.exp(x)
dtype = "float32" dtype = "float32"
dshape = (1, 3, 32, 32) dshape = (1, 3, 32, 32)
oshape = dshape inputs = {'x': (dshape, x)}
for target, ctx in ctx_list(): helper(y, inputs, dtype, forward, backward)
graph, lib, _ = nnvm.compiler.build(y, target, {"x": dshape})
m = graph_runtime.create(graph, lib, ctx)
data = np.random.uniform(size=dshape).astype(dtype)
m.run(x=data)
out = m.get_output(0, tvm.nd.empty(oshape, dtype))
y_np = np.exp(data)
np.testing.assert_allclose(out.asnumpy(), y_np, atol=1e-5, rtol=1e-5)
def test_log(): def test_log():
x = sym.Variable("x") x = sym.Variable("x")
y = sym.log(x) y = sym.log(x)
def forward(x):
return np.log(x)
def backward(x):
return 1. / x
dtype = "float32" dtype = "float32"
dshape = (1, 3, 32, 32) dshape = (1, 3, 32, 32)
oshape = dshape inputs = {'x': (dshape, x)}
for target, ctx in ctx_list(): helper(y, inputs, dtype, forward, backward)
with nnvm.compiler.build_config(opt_level=1):
graph, lib, _ = nnvm.compiler.build(y, target, {"x": dshape})
m = graph_runtime.create(graph, lib, ctx)
data = np.random.uniform(size=dshape).astype(dtype)
m.run(x=data)
out = m.get_output(0, tvm.nd.empty(oshape, dtype))
y_np = np.log(data)
np.testing.assert_allclose(out.asnumpy(), y_np, atol=1e-5, rtol=1e-5)
def test_tanh(): def test_tanh():
x = sym.Variable("x") x = sym.Variable("x")
y = sym.tanh(x) y = sym.tanh(x)
def forward(x):
return np.sinh(x) / np.cosh(x)
def backward(x):
y_np = forward(x)
return (1 - y_np**2)
dtype = "float32" dtype = "float32"
dshape = (1, 3, 32, 32) dshape = (1, 3, 32, 32)
oshape = dshape inputs = {'x': (dshape, x)}
for target, ctx in ctx_list(): helper(y, inputs, dtype, forward, backward)
with nnvm.compiler.build_config(opt_level=1):
graph, lib, _ = nnvm.compiler.build(y, target, {"x": dshape})
m = graph_runtime.create(graph, lib, ctx)
data = np.random.uniform(size=dshape).astype(dtype)
m.run(x=data)
out = m.get_output(0, tvm.nd.empty(oshape, dtype))
y_np = np.sinh(data) / np.cosh(data)
np.testing.assert_allclose(out.asnumpy(), y_np, atol=1e-5, rtol=1e-5)
def test_sigmoid(): def test_sigmoid():
x = sym.Variable("x") x = sym.Variable("x")
y = sym.sigmoid(x) y = sym.sigmoid(x)
def forward(x):
return 1.0 / (1.0 + np.exp(-x))
def backward(x):
y_np = forward(x)
return y_np *(1 - y_np)
dtype = "float32" dtype = "float32"
dshape = (1, 3, 32, 32) dshape = (1, 3, 32, 32)
oshape = dshape inputs = {'x': (dshape, x)}
for target, ctx in ctx_list(): helper(y, inputs, dtype, forward, backward)
graph, lib, _ = nnvm.compiler.build(y, target, {"x": dshape})
m = graph_runtime.create(graph, lib, ctx)
data = np.random.uniform(size=dshape).astype(dtype)
m.run(x=data)
out = m.get_output(0, tvm.nd.empty(oshape, dtype))
y_np = 1.0 / (1.0 + np.exp(-data))
np.testing.assert_allclose(out.asnumpy(), y_np, atol=1e-5, rtol=1e-5)
def test_softmax(): def test_softmax():
x = sym.Variable("x") x = sym.Variable("x")
y = sym.softmax(x) y = sym.softmax(x)
def forward(x):
return topi.testing.softmax_python(x)
dtype = "float32" dtype = "float32"
dshape = (10, 1000) dshape = (10, 1000)
oshape = dshape inputs = {'x': (dshape, x)}
for target, ctx in ctx_list(): helper(y, inputs, dtype, forward)
with nnvm.compiler.build_config(opt_level=1):
graph, lib, _ = nnvm.compiler.build(y, target, {"x": dshape})
m = graph_runtime.create(graph, lib, ctx)
data = np.random.uniform(size=dshape).astype(dtype)
m.run(x=data)
out = m.get_output(0, tvm.nd.empty(oshape, dtype))
y_np = topi.testing.softmax_python(data)
np.testing.assert_allclose(out.asnumpy(), y_np, atol=1e-5, rtol=1e-5)
def test_log_softmax(): def test_log_softmax():
x = sym.Variable("x") x = sym.Variable("x")
y = sym.log_softmax(x) y = sym.log_softmax(x)
def forward(x):
return topi.testing.log_softmax_python(x)
dtype = "float32" dtype = "float32"
dshape = (10, 1000) dshape = (10, 1000)
oshape = dshape inputs = {'x': (dshape, x)}
for target, ctx in ctx_list(): helper(y, inputs, dtype, forward)
with nnvm.compiler.build_config(opt_level=1):
graph, lib, _ = nnvm.compiler.build(y, target, {"x": dshape})
m = graph_runtime.create(graph, lib, ctx)
data = np.random.uniform(size=dshape).astype(dtype)
m.run(x=data)
out = m.get_output(0, tvm.nd.empty(oshape, dtype))
y_np = topi.testing.log_softmax_python(data)
np.testing.assert_allclose(out.asnumpy(), y_np, atol=1e-5, rtol=1e-5)
def test_dense(): def test_dense():
x = sym.Variable("x") x = sym.Variable("x")
y = sym.dense(x, units=3, name="dense") y = sym.dense(x, units=3, name="dense")
y = sym.flatten(y) y = sym.flatten(y)
def forward(x, dense_weight, dense_bias):
return np.dot(x, dense_weight.T) + dense_bias
dtype = "float32" dtype = "float32"
shape = { inputs = {
"x" : (10, 100), 'x': ((10, 100), x),
"dense_weight" : (3, 100), 'dense_weight': ((3, 100),),
"dense_bias" : (3,), 'dense_bias': ((3,),)
} }
for target, ctx in ctx_list(): helper(y, inputs, dtype, forward)
graph, lib, _ = nnvm.compiler.build(y, target, shape)
m = graph_runtime.create(graph, lib, ctx)
x_np = np.random.uniform(size=shape["x"]).astype(dtype)
w_np = np.random.uniform(size=shape["dense_weight"]).astype(dtype)
b_np = np.random.uniform(size=shape["dense_bias"]).astype(dtype)
res = tvm.nd.empty((10, 3))
m.run(x=x_np, dense_weight=w_np, dense_bias=b_np)
m.get_output(0, res)
res_np = np.dot(x_np, w_np.T) + b_np
np.testing.assert_allclose(
res.asnumpy(), res_np, atol=1e-5, rtol=1e-5)
def test_batchnorm(): def test_batchnorm():
...@@ -154,27 +207,23 @@ def test_batchnorm(): ...@@ -154,27 +207,23 @@ def test_batchnorm():
gamma = sym.Variable("gamma") gamma = sym.Variable("gamma")
moving_var = sym.Variable("moving_var") moving_var = sym.Variable("moving_var")
moving_mean = sym.Variable("moving_mean") moving_mean = sym.Variable("moving_mean")
shape = (10, 20)
eps = 1e-5 eps = 1e-5
dtype = "float32"
y = sym.batch_norm( y = sym.batch_norm(
x, gamma, beta, moving_mean, moving_var, epsilon=eps) x, gamma, beta, moving_mean, moving_var, epsilon=eps)
for target, ctx in ctx_list(): def forward(x, gamma, beta, moving_mean, moving_var):
graph, lib, _ = nnvm.compiler.build(y, "llvm", {"x": shape}) return (x - moving_mean) / np.sqrt(moving_var + eps) * gamma + beta
m = graph_runtime.create(graph, lib, tvm.cpu(0))
x_np = np.random.uniform(size=shape).astype(dtype) dtype = "float32"
mean_np = np.random.uniform(size=shape[1]).astype(dtype) inputs = {
var_np = np.random.uniform(size=shape[1]).astype(dtype) 'x': ((10, 20), x),
gamma_np = np.random.uniform(size=shape[1]).astype(dtype) 'gamma': ((20,),),
beta_np = np.random.uniform(size=shape[1]).astype(dtype) 'beta': ((20,),),
res = tvm.nd.empty(shape) 'moving_mean': ((20,),),
m.run(x=x_np, moving_mean=mean_np, moving_var=var_np, 'moving_var': ((20,),)
gamma=gamma_np, beta=beta_np) }
m.get_output(0, res)
res_np = (x_np - mean_np) / np.sqrt(var_np + eps) * gamma_np + beta_np helper(y, inputs, dtype, forward)
np.testing.assert_allclose(
res.asnumpy(), res_np, atol=1e-5, rtol=1e-5)
def verify_concatenate(ishape, axis): def verify_concatenate(ishape, axis):
...@@ -195,6 +244,7 @@ def verify_concatenate(ishape, axis): ...@@ -195,6 +244,7 @@ def verify_concatenate(ishape, axis):
out = m.get_output(0, tvm.nd.empty(out_np.shape)) out = m.get_output(0, tvm.nd.empty(out_np.shape))
np.testing.assert_allclose(out.asnumpy(), out_np, atol=1e-5, rtol=1e-5) np.testing.assert_allclose(out.asnumpy(), out_np, atol=1e-5, rtol=1e-5)
def test_concatenate(): def test_concatenate():
verify_concatenate([(2, 3, 4), (1, 3, 4)], axis=0) verify_concatenate([(2, 3, 4), (1, 3, 4)], axis=0)
verify_concatenate([(2, 4), (2, 7)], axis=1) verify_concatenate([(2, 4), (2, 7)], axis=1)
...@@ -215,6 +265,7 @@ def verify_split(ishape, indices_or_sections, axis): ...@@ -215,6 +265,7 @@ def verify_split(ishape, indices_or_sections, axis):
out = m.get_output(i, tvm.nd.empty(arr.shape)) out = m.get_output(i, tvm.nd.empty(arr.shape))
np.testing.assert_allclose(out.asnumpy(), arr, atol=1e-5, rtol=1e-5) np.testing.assert_allclose(out.asnumpy(), arr, atol=1e-5, rtol=1e-5)
def test_split(): def test_split():
verify_split((2, 3), 2, axis=0) verify_split((2, 3), 2, axis=0)
verify_split((5, 3), [3], axis=0) verify_split((5, 3), [3], axis=0)
...@@ -228,16 +279,14 @@ def verify_squeeze(dshape, axis): ...@@ -228,16 +279,14 @@ def verify_squeeze(dshape, axis):
else: else:
y = sym.squeeze(x) y = sym.squeeze(x)
y = y + 1 y = y + 1
def forward(x):
return np.squeeze(x, axis=axis) + 1
dtype = "float32" dtype = "float32"
for target, ctx in ctx_list(): inputs = {'x': (dshape, x)}
graph, lib, _ = nnvm.compiler.build(y, target, {"x": dshape}) helper(y, inputs, dtype, forward)
m = graph_runtime.create(graph, lib, ctx)
# set input
data = tvm.nd.array(np.random.uniform(size=dshape).astype(dtype))
m.run(x=data)
out_np = np.squeeze(data.asnumpy(), axis=axis) + 1
out = m.get_output(0, tvm.nd.empty(out_np.shape))
np.testing.assert_allclose(out.asnumpy(), out_np, atol=1e-5, rtol=1e-5)
def test_squeeze(): def test_squeeze():
verify_squeeze((1, 3, 2, 5), None) verify_squeeze((1, 3, 2, 5), None)
...@@ -248,19 +297,15 @@ def test_squeeze(): ...@@ -248,19 +297,15 @@ def test_squeeze():
def test_pad(): def test_pad():
x = sym.Variable("x") x = sym.Variable("x")
y = sym.pad(x, pad_width=((0, 0), (0, 0), (0, 1), (2, 3)), pad_value=1.) y = sym.pad(x, pad_width=((0, 0), (0, 0), (0, 1), (2, 3)), pad_value=1.)
def forward(x):
return np.pad(x,
pad_width=((0, 0), (0, 0), (0, 1), (2, 3)),
mode='constant', constant_values=1.)
dtype = "float32" dtype = "float32"
dshape = (1, 3, 28, 28) inputs = {'x': ((1, 3, 28, 28), x)}
oshape = (1, 3, 29, 33) helper(y, inputs, dtype, forward)
shape_dict = {"x": dshape}
for target, ctx in ctx_list():
graph, lib, _ = nnvm.compiler.build(y, target, shape_dict)
m = graph_runtime.create(graph, lib, ctx)
data = tvm.nd.array(np.random.uniform(size=dshape).astype(dtype))
m.run(x=data)
out = m.get_output(0, tvm.nd.empty(oshape, dtype))
b_np = np.pad(data.asnumpy(), pad_width=((0, 0), (0, 0), (0, 1), (2, 3)),
mode='constant', constant_values=1.)
np.testing.assert_allclose(out.asnumpy(), b_np, rtol=1e-5)
if __name__ == "__main__": if __name__ == "__main__":
...@@ -270,6 +315,8 @@ if __name__ == "__main__": ...@@ -270,6 +315,8 @@ if __name__ == "__main__":
test_batchnorm() test_batchnorm()
test_dense() test_dense()
test_relu() test_relu()
test_sym_scalar_pow()
test_scalar_sym_pow()
test_exp() test_exp()
test_log() test_log()
test_tanh() test_tanh()
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment