Enable optional dependency memory and attr hint (#79)

* Enable optional dependency memory and attr hint * fix travis

Enable optional dependency memory and attr hint (#79)
* Enable optional dependency memory and attr hint * fix travis
86379cee · Tianqi Chen · a1f59908 · 86379cee · 86379cee · 86379cee
Commit 86379cee authored Nov 20, 2016 by Tianqi Chen
Showing with 71 additions and 9 deletions

nnvm/include/nnvm/op_attr_types.h
+12 -0

nnvm/include/nnvm/pass_functions.h
+9 -1

nnvm/src/pass/gradient.cc
+23 -3

nnvm/src/pass/plan_memory.cc
+24 -2

nnvm/tests/travis/setup.sh
+3 -3

No files found.
--- a/nnvm/include/nnvm/op_attr_types.h
+++ b/nnvm/include/nnvm/op_attr_types.h
@@ -137,6 +137,18 @@ using FInplaceOption = std::function<
  std::vector<std::pair<int, int> > (const NodeAttrs& attrs)>;
 /*!
+ * \brief Get list of inputs in the op whose content are actually not used by the operator
+ *  These are dummy input that can be used for example in zeros_like, ones_like.
+ *
+ * \param attrs The attributes of the node
+ * \return list input index that are not used by the operator.
+ *
+ * \note Register under "FIgnoreInputs".
+ */
+using FIgnoreInputs = std::function<
+  std::vector<uint32_t> (const NodeAttrs& attrs)>;
+/*!
 * \brief Get the gradient node of the op node
 *  This function generates the backward graph of the node
 * \param nodeptr The node to take gradient

--- a/nnvm/include/nnvm/pass_functions.h
+++ b/nnvm/include/nnvm/pass_functions.h
@@ -129,6 +129,7 @@ inline Graph PlaceDevice(Graph graph,
 * \param ys_out_grad The symbol for additional gradient to be propagate back to y.
 * \param aggregate_fun Aggregation function applied to aggregate the inputs.
 * \param mirror_fun Optional mirror function to do mirror optimization and save memory.
+ * \param attr_hint_fun Optional, hint function to output a node that like src, but its attr is same as like.
 * \return A new graph, whose outputs correspond to inputs of xs.
 */
 inline Graph Gradient(
@@ -137,7 +138,9 @@ inline Graph Gradient(
    std::vector<NodeEntry> xs,
    std::vector<NodeEntry> ys_out_grad,
    std::function<NodeEntry(std::vector<NodeEntry>&& inputs)> aggregate_fun = nullptr,
-    std::function<int(const Node& node)> mirror_fun = nullptr) {
+    std::function<int(const Node& node)> mirror_fun = nullptr,
+    std::function<NodeEntry(const NodeEntry& src, const NodeEntry &like)>
+    attr_hint_fun = nullptr) {
  graph.attrs["grad_ys"] = std::make_shared<any>(std::move(ys));
  graph.attrs["grad_xs"] = std::make_shared<any>(std::move(xs));
@@ -145,10 +148,15 @@ inline Graph Gradient(
  if (aggregate_fun != nullptr) {
    graph.attrs["grad_aggregate_fun"] = std::make_shared<any>(aggregate_fun);
  }
  if (mirror_fun != nullptr) {
    graph.attrs["grad_mirror_fun"] = std::make_shared<any>(mirror_fun);
  }
+  if (attr_hint_fun != nullptr) {
+    graph.attrs["attr_hint_fun"] = std::make_shared<any>(attr_hint_fun);
+  }
  return ApplyPass(std::move(graph), "Gradient");
 }

--- a/nnvm/src/pass/gradient.cc
+++ b/nnvm/src/pass/gradient.cc
@@ -43,6 +43,7 @@ struct GradEntry {
 Graph Gradient(Graph src) {
  using nnvm::FGradient;
  using MirrorFun = std::function<int (const Node& node)>;
+  using AttrHintFun = std::function<NodeEntry (const NodeEntry& src, const NodeEntry &like)>;
  CHECK_NE(src.attrs.count("grad_ys"), 0)
      << "Gradient require grad_ys to be presented.";
@@ -65,6 +66,10 @@ Graph Gradient(Graph src) {
  if (src.attrs.count("grad_mirror_fun") != 0) {
    mirror_fun = src.GetAttr<MirrorFun>("grad_mirror_fun");
  }
+  AttrHintFun attr_hint_fun = nullptr;
+  if (src.attrs.count("attr_hint_fun") != 0) {
+    attr_hint_fun = src.GetAttr<AttrHintFun>("attr_hint_fun");
+  }
  // topo sort
  std::vector<NodePtr> topo_order;
@@ -79,7 +84,11 @@ Graph Gradient(Graph src) {
  CHECK_EQ(ys.size(), ys_out_grad.size());
  for (size_t i = 0; i < ys.size(); ++i) {
-    output_grads[ys[i].node.get()][ys[i].index].grads = { ys_out_grad[i] };
+    NodeEntry ograd = ys_out_grad[i];
+    if (attr_hint_fun != nullptr) {
+      ograd = attr_hint_fun(ograd, ys[i]);
+    }
+    output_grads[ys[i].node.get()][ys[i].index].grads = { ograd };
  }
  // construct mirror reduece memory strategy if needed
@@ -105,6 +114,8 @@ Graph Gradient(Graph src) {
  // traverse backward
  static auto& grad_fun_map = Op::GetAttr<FGradient>("FGradient");
+  static auto& finfer_shape = Op::GetAttr<FInferShape>("FInferShape");
  std::vector<NodeEntry> out_agg_grads;
  for (auto rit = topo_order.rbegin(); rit != topo_order.rend(); ++rit) {
    const NodePtr& ptr = *rit;
@@ -115,8 +126,17 @@ Graph Gradient(Graph src) {
      out_agg_grads.push_back(e.sum);
    }
    if ((*rit)->inputs.size() != 0) {
-      std::vector<NodeEntry> input_grads = grad_fun_map[ptr->op()]
+      NodePtr fwd_node = (mirror_map.size() == 0 ? ptr : mirror_map.at(ptr.get()));
-          (mirror_map.size() == 0 ? ptr : mirror_map.at(ptr.get()), out_agg_grads);
+      std::vector<NodeEntry> input_grads = grad_fun_map[ptr->op()](
+          fwd_node, out_agg_grads);
+      if (attr_hint_fun != nullptr) {
+        // only insert hint when shape inference function is not available.
+        for (size_t i = 0; i < input_grads.size(); ++i) {
+          if (finfer_shape.count(input_grads[i].node->op())) continue;
+          input_grads[i] = attr_hint_fun(input_grads[i], fwd_node->inputs[i]);
+        }
+      }
      CHECK_EQ((*rit)->inputs.size(), input_grads.size())
          << "Gradient function not returning enough gradient";
      auto git = input_grads.begin();

--- a/nnvm/src/pass/plan_memory.cc
+++ b/nnvm/src/pass/plan_memory.cc
@@ -137,13 +137,25 @@ class GraphAllocator {
 Graph PlanMemory(Graph ret) {
  // setup ref counter
  const IndexedGraph& idx = ret.indexed_graph();
+  static auto& fignore_inputs = Op::GetAttr<FIgnoreInputs>("FIgnoreInputs");
  // reference counter of each node
  std::vector<uint32_t> ref_count(idx.num_node_entries(), 0);
  // step 1: initialize reference count
  for (uint32_t nid = 0; nid < idx.num_nodes(); ++nid) {
-    for (const auto& e : idx[nid].inputs) {
+    const auto& inode = idx[nid];
+    if (inode.source->is_variable()) continue;
+    for (const auto& e : inode.inputs) {
      ++ref_count[idx.entry_id(e)];
    }
+    // no dataflow dependency is needed for those are ignored.
+    // revoke the dependency counter.
+    if (fignore_inputs.count(inode.source->op()) != 0) {
+      auto ignore_inputs = fignore_inputs[inode.source->op()](inode.source->attrs);
+      for (uint32_t i : ignore_inputs) {
+        --ref_count[idx.entry_id(inode.inputs[i])];
+      }
+    }
  }
  for (const auto& e : idx.outputs()) {
    ++ref_count[idx.entry_id(e)];
@@ -195,8 +207,18 @@ Graph PlanMemory(Graph ret) {
        storage[eid] = allocator.Request(dev_id, dtype_vec[eid], shape_vec[eid], nid);
      }
    }
+    // check if certain inputs is ignored.
+    std::vector<uint32_t> ignore_inputs;
+    if (fignore_inputs.count(inode.source->op()) != 0) {
+      ignore_inputs = fignore_inputs[inode.source->op()](inode.source->attrs);
+      std::sort(ignore_inputs.begin(), ignore_inputs.end());
+    }
    // then free inputs
-    for (const auto& e : inode.inputs) {
+    for (size_t i = 0; i < inode.inputs.size(); ++i) {
+      // ref counter of ignored input is already decreased.
+      if (std::binary_search(ignore_inputs.begin(), ignore_inputs.end(), i)) continue;
+      const auto& e = inode.inputs[i];
      uint32_t eid = idx.entry_id(e);
      // temp_ref_count == 0 means it is taken by inplace op
      if (ref_count[eid] == 0) continue;

--- a/nnvm/tests/travis/setup.sh
+++ b/nnvm/tests/travis/setup.sh
@@ -5,11 +5,11 @@ if [ ${TRAVIS_OS_NAME} == "osx" ]; then
    brew update
    brew install python3
    if [ ${TASK} == "python_test" ]; then
-        python -m pip install nose numpy cython --user `whoami`
+        python -m pip install --user nose numpy cython
-        python3 -m pip install nose numpy cython --user `whoami`
+        python3 -m pip install --user nose numpy cython
    fi
 fi
 if [ ${TASK} == "lint" ]; then
-    pip install cpplint 'pylint==1.4.4' 'astroid==1.3.6' --user `whoami`
+    pip install --user cpplint 'pylint==1.4.4' 'astroid==1.3.6'
 fi