[PASS] Add place device (#18)

204c4442 · Tianqi Chen · cf02f5c9 · 204c4442 · 204c4442 · 204c4442
Commit 204c4442 authored Jul 21, 2016 by Tianqi Chen
11 changed files
--- a/nnvm/Makefile
+++ b/nnvm/Makefile
 export LDFLAGS = -pthread -lm
-export CFLAGS =  -std=c++11 -Wall -O3 -msse2  -Wno-unknown-pragmas -funroll-loops\
+export CFLAGS =  -std=c++11 -Wall -O2 -msse2  -Wno-unknown-pragmas -funroll-loops\
 	 -Iinclude -Idmlc-core/include -fPIC

 # specify tensor path

--- a/nnvm/README.md
+++ b/nnvm/README.md
@@ -20,6 +20,13 @@ interface defintion and how operators are executed.
 NNVM is inspired by LLVM, aiming to be an intermediate representation library
 for neural nets and computation graphs generation and optimizations.

+## Why build deep learning system by parts
+
+- Essential parts can be assembled in minimum way for embedding systems.
+- Hackers can hack the parts they need and compose with other well defined parts.
+- Decentralized modules enable new extensions creators to own their project
+  without creating a monothilic version.
+
 ## Deep learning system by parts

 This is one way to divide the deep learning system into common parts.

--- a/nnvm/include/nnvm/graph.h
+++ b/nnvm/include/nnvm/graph.h
@@ -71,14 +71,8 @@ class IndexedGraph {
    uint32_t node_id;
    /*! \brief index of output from the source. */
    uint32_t index;
-    /*!
-     * \brief compare equality
-     * \param other the other entry to compare
-     * \return whether two entries equals to each other
-     */
-    inline bool operator==(const NodeEntry& other) const {
-      return node_id == other.node_id && index == other.index;
-    }
+    /*! \brief version of the node */
+    uint32_t version;
  };
  /*! \brief Node data structure in IndexedGraph */
  struct Node {

--- a/nnvm/include/nnvm/graph_attr_types.h
+++ b/nnvm/include/nnvm/graph_attr_types.h
@@ -45,7 +45,7 @@ using ShapeVector = std::vector<TShape>;
 *
 * \code
 *  Graph g = ApplyPass(src_graph, {"InferType"});
- *  const DTypeVector& types = g.GetAttr<ShapeVector>("dtype");
+ *  const DTypeVector& types = g.GetAttr<DTypeVector>("dtype");
 *  // get shape by entry id
 *  int entry_type = dtypes[g.indexed_graph().entry_id(my_entry)];
 * \endcode
@@ -54,6 +54,28 @@ using ShapeVector = std::vector<TShape>;
 */
 using DTypeVector = std::vector<int>;

+/*!
+ * \brief The result holder of device of each operator in the graph.
+ * \note Stored under graph.attrs["device"], provided by Pass "PlaceDevice"
+ *
+ * \code
+ *  Graph g = ApplyPass(src_graph, {"PlaceDevice"});
+ *  const &device = g.GetAttr<DeviceVector>("dtype");
+ *  // get device by node_id
+ *  int device_type = device[g.indexed_graph().node_id(my_node)];
+ * \endcode
+ */
+using DeviceVector = std::vector<int>;
+
+/*!
+ * \brief The result holder of device of each operator in the graph.
+ *
+ * \note Stored under graph.attrs["device_assign_map"], needed by Pass "PlaceDevice"
+ * -1 means unknown device
+ */
+using DeviceAssignMap = std::unordered_map<std::string, int>;
+
+
 }  // namespace nnvm

 #endif  // NNVM_GRAPH_ATTR_TYPES_H_
--- a/nnvm/include/nnvm/pass_functions.h
+++ b/nnvm/include/nnvm/pass_functions.h
@@ -91,6 +91,24 @@ inline Graph InferType(Graph graph,
  return ApplyPass(std::move(graph), {"InferType"});
 }

+/*!
+ * \brief Place the devices
+ * \param graph source graph
+ * \param device_group_attr_key The attribute name for hinting the device group.
+ * \param device_assign_map The assignment map of device
+ * \param device_copy_op The name of copy op to be inserted when cross device copy happened.
+ * \return A graph with new attribute "device", cotaining device information of each node.
+ */
+inline Graph PlaceDevice(Graph graph,
+                         std::string device_group_attr_key,
+                         DeviceAssignMap device_assign_map,
+                         std::string device_copy_op) {
+  graph.attrs["device_group_attr_key"] = std::make_shared<any>(std::move(device_group_attr_key));
+  graph.attrs["device_assign_map"] = std::make_shared<any>(std::move(device_assign_map));
+  graph.attrs["device_copy_op"] = std::make_shared<any>(std::move(device_copy_op));
+  return ApplyPass(std::move(graph), {"PlaceDevice"});
+}
+
 }  // namespace pass
 }  // namespace nnvm
 #endif  // NNVM_PASS_FUNCTIONS_H_
--- a/nnvm/src/core/graph.cc
+++ b/nnvm/src/core/graph.cc
@@ -40,7 +40,7 @@ IndexedGraph::IndexedGraph(const Graph &g) {
      for (const auto& e : n->inputs) {
        auto it = node2index_.find(e.node.get());
        CHECK(it != node2index_.end() && it->first == e.node.get());
-        input_entries_.emplace_back(NodeEntry{it->second, e.index});
+        input_entries_.emplace_back(NodeEntry{it->second, e.index, e.version});
      }
      inputs_rptr.push_back(input_entries_.size());
      // control deps

--- a/nnvm/src/example/operator.cc
+++ b/nnvm/src/example/operator.cc
@@ -94,6 +94,11 @@ NNVM_REGISTER_OP(exp)
 .attr("inplace_pair", std::make_pair(0, 0))
 .attr<FInferShape>("FInferShape", SameShape);

+NNVM_REGISTER_OP(cross_device_copy)
+.describe("Copy data across device.")
+.set_num_inputs(1)
+.attr<FInferShape>("FInferShape", SameShape);
+

 NNVM_REGISTER_OP(conv2d)
 .describe("take conv of input")

--- a/nnvm/src/pass/infer_shape_type.cc
+++ b/nnvm/src/pass/infer_shape_type.cc
@@ -35,10 +35,14 @@ Graph InferAttr(Graph &&ret,
    for (size_t i = 0; i < shape_args.size(); ++i) {
      rshape[idx.entry_id(idx.arg_nodes()[i], 0)] = shape_args[i];
    }
+    // erase the provided arguments
+    ret.attrs.erase(arg_name);
  }
  std::string shape_attr_key;
  if (ret.attrs.count(attr_key_name) != 0) {
    shape_attr_key = ret.GetAttr<std::string>(attr_key_name);
+    // erase the provided arguments
+    ret.attrs.erase(attr_key_name);
  }

  // temp space for shape inference.

--- a/nnvm/src/pass/place_device.cc
+++ b/nnvm/src/pass/place_device.cc
+/*!
+ *  Copyright (c) 2016 by Contributors
+ * \file place_device.cc
+ * \brief Inference the device of each operator given known information.
+ *  Insert a copy node automatically when there is a cross device.
+ */
+#include <nnvm/pass.h>
+#include <nnvm/op_attr_types.h>
+#include <nnvm/graph_attr_types.h>
+
+namespace nnvm {
+namespace pass {
+
+// simply logic to place device according to device_group hint
+// insert copy node when there is
+Graph PlaceDevice(Graph src) {
+  CHECK_NE(src.attrs.count("device_group_attr_key"), 0)
+      << "Need graph attribute \"device_group_attr_key\" in PlaceDevice";
+  CHECK_NE(src.attrs.count("device_assign_map"), 0)
+      << "Need graph attribute \"device_assign_map\" in PlaceDevice";
+  CHECK_NE(src.attrs.count("device_copy_op"), 0)
+      << "Need graph attribute \"device_copy_op\" in PlaceDevice";
+
+  std::string device_group_attr_key = src.GetAttr<std::string>("device_group_attr_key");
+  const Op* copy_op = Op::Get(src.GetAttr<std::string>("device_copy_op"));
+  auto& device_assign_map = src.GetAttr<DeviceAssignMap>("device_assign_map");
+  const IndexedGraph& idx = src.indexed_graph();
+
+  DeviceVector device(idx.num_nodes(), -1);
+  // forward pass
+  for (uint32_t nid = 0; nid < idx.num_nodes(); ++nid) {
+    const auto& inode = idx[nid];
+    auto it = inode.source->attrs.dict.find(device_group_attr_key);
+    if (it != inode.source->attrs.dict.end()) {
+      const std::string& device_group = it->second;
+      auto dit = device_assign_map.find(device_group);
+      CHECK_NE(dit, device_assign_map.end())
+          << "The device assignment not found for group " << device_group;
+      device[nid] = dit->second;
+    } else {
+      for (const IndexedGraph::NodeEntry& e : inode.inputs) {
+        if (device[e.node_id] != -1) {
+          device[nid] = device[e.node_id]; break;
+        }
+      }
+    }
+  }
+  // backward pass
+  for (uint32_t i = idx.num_nodes(); i != 0; --i) {
+    uint32_t nid = i - 1;
+    const auto& inode = idx[nid];
+    if (device[nid] == -1) continue;
+    for (const IndexedGraph::NodeEntry& e : inode.inputs) {
+      if (device[e.node_id] == -1) device[e.node_id] = device[nid];
+    }
+  }
+
+  int num_dev = 1, other_dev_id = -1;
+  for (int& dev : device) {
+    if (dev == -1) dev = 0;
+    if (dev != other_dev_id) {
+      if (other_dev_id != -1) ++num_dev;
+      other_dev_id = dev;
+    }
+  }
+
+  if (num_dev == 1) {
+    src.attrs.erase("device_group_attr_key");
+    src.attrs.erase("device_assign_map");
+    src.attrs.erase("device_copy_op");
+    src.attrs["device"] = std::make_shared<any>(std::move(device));
+    return src;
+  }
+
+  std::map<std::tuple<uint32_t, uint32_t, int>, NodePtr> copy_map;
+  std::vector<NodePtr> new_node_map(idx.num_nodes(), nullptr);
+  std::unordered_map<const Node*, int> new_device_map;
+
+  // insert copy node
+  for (uint32_t nid = 0; nid < idx.num_nodes(); ++nid) {
+    int dev_id = device[nid];
+    const auto& inode = idx[nid];
+    // check if mutation is needed
+    bool need_mutate = false;
+    for (const IndexedGraph::NodeEntry& e : inode.inputs) {
+      if (new_node_map[e.node_id] != nullptr || dev_id != device[e.node_id]) {
+        need_mutate = true; break;
+      }
+    }
+    if (!need_mutate) {
+      for (const uint32_t cid : inode.control_deps) {
+        if (new_node_map[cid] != nullptr)  {
+          need_mutate = true; break;
+        }
+      }
+    }
+    if (need_mutate) {
+      NodePtr new_node = Node::Create();
+      new_node->attrs = inode.source->attrs;
+      new_node->inputs.reserve(inode.inputs.size());
+      for (size_t i = 0; i < inode.inputs.size(); ++i) {
+        const IndexedGraph::NodeEntry& e = inode.inputs[i];
+        if (dev_id != device[e.node_id]) {
+          auto copy_key = std::make_tuple(e.node_id, e.index, dev_id);
+          auto it = copy_map.find(copy_key);
+          if (it != copy_map.end() && it->first == copy_key) {
+            new_node->inputs.emplace_back(
+                NodeEntry{it->second, 0, 0});
+          } else {
+            NodePtr copy_node = Node::Create();
+            copy_node->op = copy_op;
+            std::ostringstream os;
+            os << inode.source->inputs[i].node->attrs.name << "_" << e.index <<"_copy";
+            copy_node->attrs.name = os.str();
+            copy_node->inputs.push_back(inode.source->inputs[i]);
+            copy_map[copy_key] = copy_node;
+            new_device_map[copy_node.get()] = dev_id;
+            new_node->inputs.emplace_back(
+                NodeEntry{std::move(copy_node), 0, 0});
+          }
+        } else {
+          if (new_node_map[e.node_id] != nullptr) {
+            new_node->inputs.emplace_back(
+                NodeEntry{new_node_map[e.node_id], e.index, 0});
+        } else {
+            new_node->inputs.push_back(inode.source->inputs[i]);
+          }
+        }
+      }
+      new_node->control_deps.reserve(inode.control_deps.size());
+      for (size_t i = 0; i < inode.control_deps.size(); ++i) {
+        uint32_t cid = inode.control_deps[i];
+        if (new_node_map[cid] != nullptr) {
+          new_node->control_deps.push_back(new_node_map[cid]);
+        } else {
+          new_node->control_deps.push_back(inode.source->control_deps[i]);
+        }
+      }
+      new_device_map[new_node.get()] = dev_id;
+      new_node_map[nid] = std::move(new_node);
+    } else {
+      new_device_map[inode.source] = dev_id;
+    }
+  }
+
+  // make the new graph
+  Graph ret;
+  for (const NodeEntry& e : src.outputs) {
+    if (new_node_map[idx.node_id(e.node.get())] != nullptr) {
+      ret.outputs.emplace_back(
+          NodeEntry{new_node_map[idx.node_id(e.node.get())], e.index, e.version});
+    } else {
+      ret.outputs.emplace_back(e);
+    }
+  }
+  DeviceVector new_device_vec(ret.indexed_graph().num_nodes());
+  for (uint32_t nid = 0; nid < ret.indexed_graph().num_nodes(); ++nid) {
+    if (new_device_map.count(ret.indexed_graph()[nid].source) == 0) {
+      LOG(INFO) << "canot find " << ret.indexed_graph()[nid].source->attrs.name;
+    }
+    new_device_vec[nid] = new_device_map.at(ret.indexed_graph()[nid].source);
+  }
+  ret.attrs["device"] = std::make_shared<any>(std::move(new_device_vec));
+  return ret;
+}
+
+NNVM_REGISTER_PASS(PlaceDevice)
+.describe("Infer the device type of each operator."\
+          "Insert a copy node when there is cross device copy")
+.set_body(PlaceDevice)
+.set_change_graph(true)
+.provide_graph_attr("device")
+.depend_graph_attr("device_group_attr_key")
+.depend_graph_attr("device_assign_map")
+.depend_graph_attr("device_copy_op");
+
+DMLC_JSON_ENABLE_ANY(DeviceAssignMap, dict_str_int);
+
+}  // namespace pass
+}  // namespace nnvm
--- a/nnvm/src/pass/saveload_json.cc
+++ b/nnvm/src/pass/saveload_json.cc
@@ -68,14 +68,18 @@ struct JSONNode {
    writer->BeginObject();
    if (node->op != nullptr) {
      writer->WriteObjectKeyValue("op", node->op->name);
-      writer->WriteObjectKeyValue("attr", node->attrs.dict);
    } else {
      std::string json_null = "null";
      writer->WriteObjectKeyValue("op", json_null);
    }
    writer->WriteObjectKeyValue("name", node->attrs.name);
+    if (node->attrs.dict.size() != 0) {
+      writer->WriteObjectKeyValue("attr", node->attrs.dict);
+    }
    writer->WriteObjectKeyValue("inputs", inputs);
+    if (control_deps.size() != 0) {
      writer->WriteObjectKeyValue("control_deps", control_deps);
+    }
    writer->EndObject();
  }


--- a/nnvm/tests/python/test_graph.py
+++ b/nnvm/tests/python/test_graph.py
@@ -76,6 +76,25 @@ def test_infer_type():
    assert g.json_attr('dtype')[jnode_row_ptr[nindex["cast1"]]] == 1
    assert g.json_attr('dtype')[jnode_row_ptr[nindex["add1"]]] == 0

+def test_place_device():
+    x = sym.Variable('x', device_group="stage1")
+    y = sym.add(x, x, name='add1')
+    y = sym.cast(y, dtype=1, name="cast1")
+    z = sym.add(y, y, device_group="stage2", name="add2")
+    z = sym.add(z, sym.exp(y, device_group="stage2"),  name="add3")
+    g = graph.create(z)
+    g._set_json_attr("device_group_attr_key", "device_group")
+    g._set_json_attr("device_assign_map", {"stage1": 0, "stage2" : 1}, "dict_str_int")
+    g._set_json_attr("device_copy_op", "cross_device_copy")
+    g = g.apply("PlaceDevice")
+    jgraph = json.loads(g.apply('SaveJSON').json_attr('json'))
+    jnodes = jgraph['nodes']
+    jnode_row_ptr = jgraph['node_row_ptr']
+    nindex = {n['name']: i for i, n in enumerate(jnodes)}
+    assert g.json_attr('device')[jnode_row_ptr[nindex["add2"]]] == 1
+    assert g.json_attr('device')[jnode_row_ptr[nindex["add3"]]] == 1
+    assert g.json_attr('device')[jnode_row_ptr[nindex["cast1"]]] == 0
+

 if __name__ == "__main__":
    test_order_mutation_pass()
@@ -83,3 +102,4 @@ if __name__ == "__main__":
    test_json_pass()
    test_infer_shape()
    test_infer_type()
+    test_place_device()