Commit e1e91f1f by hlu1 Committed by Tianqi Chen

[GraphRuntime] Debug graph runtime (#3232)

parent 29cfac6d
...@@ -207,10 +207,8 @@ class DebugResult(object): ...@@ -207,10 +207,8 @@ class DebugResult(object):
def display_debug_result(self): def display_debug_result(self):
"""Displays the debugger result" """Displays the debugger result"
""" """
header = ["Node Name", "Ops", "Time(us)", "Time(%)", "Start Time", \ header = ["Node Name", "Ops", "Time(us)", "Time(%)", "Shape", "Inputs", "Outputs"]
"End Time", "Shape", "Inputs", "Outputs"] lines = ["---------", "---", "--------", "-------", "-----", "------", "-------"]
lines = ["---------", "---", "--------", "-------", "----------", \
"--------", "-----", "------", "-------"]
eid = 0 eid = 0
data = [] data = []
total_time = sum(time[0] for time in self._time_list) total_time = sum(time[0] for time in self._time_list)
...@@ -223,12 +221,11 @@ class DebugResult(object): ...@@ -223,12 +221,11 @@ class DebugResult(object):
continue continue
name = node['name'] name = node['name']
shape = str(self._output_tensor_list[eid].shape) shape = str(self._output_tensor_list[eid].shape)
time_us = round(time[0] * 1000000, 2) time_us = round(time[0] * 1000000, 3)
time_percent = round(((time[0] / total_time) * 100), 2) time_percent = round(((time[0] / total_time) * 100), 3)
inputs = str(node['attrs']['num_inputs']) inputs = str(node['attrs']['num_inputs'])
outputs = str(node['attrs']['num_outputs']) outputs = str(node['attrs']['num_outputs'])
node_data = [name, op, time_us, time_percent, str(time[1]), str(time[2]), \ node_data = [name, op, time_us, time_percent, shape, inputs, outputs]
shape, inputs, outputs]
data.append(node_data) data.append(node_data)
eid += 1 eid += 1
fmt = "" fmt = ""
......
...@@ -19,7 +19,6 @@ ...@@ -19,7 +19,6 @@
import os import os
import tempfile import tempfile
import shutil import shutil
from datetime import datetime
from tvm._ffi.base import string_types from tvm._ffi.base import string_types
from tvm._ffi.function import get_global_func from tvm._ffi.function import get_global_func
from tvm.contrib import graph_runtime from tvm.contrib import graph_runtime
...@@ -30,6 +29,7 @@ from . import debug_result ...@@ -30,6 +29,7 @@ from . import debug_result
_DUMP_ROOT_PREFIX = "tvmdbg_" _DUMP_ROOT_PREFIX = "tvmdbg_"
_DUMP_PATH_PREFIX = "_tvmdbg_" _DUMP_PATH_PREFIX = "_tvmdbg_"
def create(graph_json_str, libmod, ctx, dump_root=None): def create(graph_json_str, libmod, ctx, dump_root=None):
"""Create a runtime executor module given a graph and module. """Create a runtime executor module given a graph and module.
...@@ -62,17 +62,23 @@ def create(graph_json_str, libmod, ctx, dump_root=None): ...@@ -62,17 +62,23 @@ def create(graph_json_str, libmod, ctx, dump_root=None):
try: try:
fcreate = get_global_func("tvm.graph_runtime_debug.create") fcreate = get_global_func("tvm.graph_runtime_debug.create")
except ValueError: except ValueError:
raise ValueError("Please set '(USE_GRAPH_RUNTIME_DEBUG ON)' in " \ raise ValueError(
"config.cmake and rebuild TVM to enable debug mode") "Please set '(USE_GRAPH_RUNTIME_DEBUG ON)' in "
"config.cmake and rebuild TVM to enable debug mode"
)
ctx, num_rpc_ctx, device_type_id = graph_runtime.get_device_ctx(libmod, ctx) ctx, num_rpc_ctx, device_type_id = graph_runtime.get_device_ctx(libmod, ctx)
if num_rpc_ctx == len(ctx): if num_rpc_ctx == len(ctx):
libmod = rpc_base._ModuleHandle(libmod) libmod = rpc_base._ModuleHandle(libmod)
try: try:
fcreate = ctx[0]._rpc_sess.get_function("tvm.graph_runtime_debug.remote_create") fcreate = ctx[0]._rpc_sess.get_function(
"tvm.graph_runtime_debug.remote_create"
)
except ValueError: except ValueError:
raise ValueError("Please set '(USE_GRAPH_RUNTIME_DEBUG ON)' in " \ raise ValueError(
"config.cmake and rebuild TVM to enable debug mode") "Please set '(USE_GRAPH_RUNTIME_DEBUG ON)' in "
"config.cmake and rebuild TVM to enable debug mode"
)
func_obj = fcreate(graph_json_str, libmod, *device_type_id) func_obj = fcreate(graph_json_str, libmod, *device_type_id)
return GraphModuleDebug(func_obj, ctx, graph_json_str, dump_root) return GraphModuleDebug(func_obj, ctx, graph_json_str, dump_root)
...@@ -100,10 +106,10 @@ class GraphModuleDebug(graph_runtime.GraphModule): ...@@ -100,10 +106,10 @@ class GraphModuleDebug(graph_runtime.GraphModule):
To select which folder the outputs should be kept. To select which folder the outputs should be kept.
None will make a temp folder in /tmp/tvmdbg<rand_string> and does the dumping None will make a temp folder in /tmp/tvmdbg<rand_string> and does the dumping
""" """
def __init__(self, module, ctx, graph_json_str, dump_root): def __init__(self, module, ctx, graph_json_str, dump_root):
self._dump_root = dump_root self._dump_root = dump_root
self._dump_path = None self._dump_path = None
self._debug_run = module["debug_run"]
self._get_output_by_layer = module["get_output_by_layer"] self._get_output_by_layer = module["get_output_by_layer"]
self._run_individual = module["run_individual"] self._run_individual = module["run_individual"]
graph_runtime.GraphModule.__init__(self, module) graph_runtime.GraphModule.__init__(self, module)
...@@ -181,13 +187,10 @@ class GraphModuleDebug(graph_runtime.GraphModule): ...@@ -181,13 +187,10 @@ class GraphModuleDebug(graph_runtime.GraphModule):
Time consumed for each execution will be set as debug output. Time consumed for each execution will be set as debug output.
""" """
self.debug_datum._time_list = [] self.debug_datum._time_list = [
[float(t) * 1e-6] for t in self.run_individual(10, 1, 1)
]
for i, node in enumerate(self.debug_datum.get_graph_nodes()): for i, node in enumerate(self.debug_datum.get_graph_nodes()):
start_time = datetime.now().time()
time_stamp = self._debug_run(i)
end_time = datetime.now().time()
self.debug_datum._time_list.append([time_stamp, start_time, end_time])
num_outputs = self.debug_datum.get_graph_node_output_num(node) num_outputs = self.debug_datum.get_graph_node_output_num(node)
for j in range(num_outputs): for j in range(num_outputs):
out_tensor = self._get_output_by_layer(i, j) out_tensor = self._get_output_by_layer(i, j)
...@@ -212,8 +215,13 @@ class GraphModuleDebug(graph_runtime.GraphModule): ...@@ -212,8 +215,13 @@ class GraphModuleDebug(graph_runtime.GraphModule):
ret = output_tensors[node] ret = output_tensors[node]
except: except:
node_list = output_tensors.keys() node_list = output_tensors.keys()
raise RuntimeError("Node " + node + " not found, available nodes are: " raise RuntimeError(
+ str(node_list) + ".") "Node "
+ node
+ " not found, available nodes are: "
+ str(node_list)
+ "."
)
elif isinstance(node, int): elif isinstance(node, int):
output_tensors = self.debug_datum._output_tensor_list output_tensors = self.debug_datum._output_tensor_list
ret = output_tensors[node] ret = output_tensors[node]
...@@ -242,7 +250,9 @@ class GraphModuleDebug(graph_runtime.GraphModule): ...@@ -242,7 +250,9 @@ class GraphModuleDebug(graph_runtime.GraphModule):
self.debug_datum.display_debug_result() self.debug_datum.display_debug_result()
def run_individual(self, number, repeat=1, min_repeat_ms=0): def run_individual(self, number, repeat=1, min_repeat_ms=0):
self._run_individual(number, repeat, min_repeat_ms) ret = self._run_individual(number, repeat, min_repeat_ms)
return ret.strip(",").split(",") if ret else []
def exit(self): def exit(self):
"""Exits the dump folder and all its contents""" """Exits the dump folder and all its contents"""
......
...@@ -6,9 +6,9 @@ ...@@ -6,9 +6,9 @@
* to you under the Apache License, Version 2.0 (the * to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance * "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at * with the License. You may obtain a copy of the License at
* *
* http://www.apache.org/licenses/LICENSE-2.0 * http://www.apache.org/licenses/LICENSE-2.0
* *
* Unless required by applicable law or agreed to in writing, * Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an * software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
...@@ -24,7 +24,9 @@ ...@@ -24,7 +24,9 @@
#include <tvm/runtime/packed_func.h> #include <tvm/runtime/packed_func.h>
#include <tvm/runtime/registry.h> #include <tvm/runtime/registry.h>
#include <tvm/runtime/ndarray.h> #include <tvm/runtime/ndarray.h>
#include <chrono> #include <chrono>
#include <sstream>
#include "../graph_runtime.h" #include "../graph_runtime.h"
namespace tvm { namespace tvm {
...@@ -39,40 +41,23 @@ namespace runtime { ...@@ -39,40 +41,23 @@ namespace runtime {
class GraphRuntimeDebug : public GraphRuntime { class GraphRuntimeDebug : public GraphRuntime {
public: public:
/*! /*!
* \brief Run each operation and get the output. * \brief Run each operation in the graph and get the time per op for all ops.
* \param index The index of op which needs to be run.
* \return the elapsed time.
*/
double DebugRun(size_t index) {
CHECK(index < op_execs_.size());
TVMContext ctx = data_entry_[entry_id(index, 0)]->ctx;
auto tbegin = std::chrono::high_resolution_clock::now();
if (op_execs_[index]) {
op_execs_[index]();
}
TVMSynchronize(ctx.device_type, ctx.device_id, nullptr);
auto tend = std::chrono::high_resolution_clock::now();
double time = std::chrono::duration_cast<std::chrono::duration<double> >(
tend - tbegin).count();
return time;
}
/*!
* \brief Run each operation in the graph and print out the runtime per op.
* \param number The number of times to run this function for taking average. * \param number The number of times to run this function for taking average.
* \param repeat The number of times to repeat the measurement. * \param repeat The number of times to repeat the measurement.
In total, the function will be invoked (1 + number x repeat) times, * In total, the function will be invoked (1 + number x repeat) times,
where the first one is warmed up and will be discarded in case * where the first one is warmed up and will be discarded in case
there is lazy initialization. * there is lazy initialization.
* \param min_repeat_ms The minimum duration of one `repeat` in milliseconds. * \param min_repeat_ms The minimum duration of one `repeat` in milliseconds.
By default, one `repeat` contains `number` runs. If this parameter is set, * By default, one `repeat` contains `number` runs. If this parameter is set,
the parameters `number` will be dynamically adjusted to meet the * the parameters `number` will be dynamically adjusted to meet the
minimum duration requirement of one `repeat`. * minimum duration requirement of one `repeat`.
* \return Comma seperated string containing the elapsed time per op for the last
* iteration only, because returning a long string over rpc can be expensive.
*/ */
void RunIndividual(int number, int repeat, int min_repeat_ms) { std::string RunIndividual(int number, int repeat, int min_repeat_ms) {
// warmup run // warmup run
GraphRuntime::Run(); GraphRuntime::Run();
std::ostringstream os;
std::vector<double> time_per_op(op_execs_.size(), 0); std::vector<double> time_per_op(op_execs_.size(), 0);
for (int i = 0; i < repeat; ++i) { for (int i = 0; i < repeat; ++i) {
std::chrono::time_point< std::chrono::time_point<
...@@ -96,7 +81,7 @@ class GraphRuntimeDebug : public GraphRuntime { ...@@ -96,7 +81,7 @@ class GraphRuntimeDebug : public GraphRuntime {
auto op_tend = std::chrono::high_resolution_clock::now(); auto op_tend = std::chrono::high_resolution_clock::now();
double op_duration = std::chrono::duration_cast< double op_duration = std::chrono::duration_cast<
std::chrono::duration<double> >(op_tend - op_tbegin).count(); std::chrono::duration<double> >(op_tend - op_tbegin).count();
time_per_op[index] += op_duration * 1000; // ms time_per_op[index] += op_duration * 1e6; // us
} }
} }
} }
...@@ -105,16 +90,20 @@ class GraphRuntimeDebug : public GraphRuntime { ...@@ -105,16 +90,20 @@ class GraphRuntimeDebug : public GraphRuntime {
(tend - tbegin).count() * 1000; (tend - tbegin).count() * 1000;
} while (duration_ms < min_repeat_ms); } while (duration_ms < min_repeat_ms);
LOG(INFO) << "Repeat: " << i; LOG(INFO) << "Iteration: " << i;
int op = 0; int op = 0;
for (size_t index = 0; index < time_per_op.size(); index++) { for (size_t index = 0; index < time_per_op.size(); index++) {
if (op_execs_[index]) { if (op_execs_[index]) {
time_per_op[index] /= number; time_per_op[index] /= number;
LOG(INFO) << "Op #" << op++ << " " << GetNodeName(index) << ": " LOG(INFO) << "Op #" << op++ << " " << GetNodeName(index) << ": "
<< time_per_op[index] << " ms/iter"; << time_per_op[index] << " us/iter";
} }
} }
} }
for (size_t index = 0; index < time_per_op.size(); index++) {
os << time_per_op[index] << ",";
}
return os.str();
} }
/*! /*!
...@@ -182,11 +171,7 @@ PackedFunc GraphRuntimeDebug::GetFunction( ...@@ -182,11 +171,7 @@ PackedFunc GraphRuntimeDebug::GetFunction(
const std::string& name, const std::string& name,
const std::shared_ptr<ModuleNode>& sptr_to_self) { const std::shared_ptr<ModuleNode>& sptr_to_self) {
// return member functions during query. // return member functions during query.
if (name == "debug_run") { if (name == "get_output_by_layer") {
return PackedFunc([sptr_to_self, this](TVMArgs args, TVMRetValue* rv) {
*rv = this->DebugRun(static_cast<size_t>(args[0].operator int64_t()));
});
} else if (name == "get_output_by_layer") {
return PackedFunc([sptr_to_self, this](TVMArgs args, TVMRetValue* rv) { return PackedFunc([sptr_to_self, this](TVMArgs args, TVMRetValue* rv) {
*rv = this->GetOutputByLayer(args[0], args[1]); *rv = this->GetOutputByLayer(args[0], args[1]);
}); });
...@@ -206,7 +191,7 @@ PackedFunc GraphRuntimeDebug::GetFunction( ...@@ -206,7 +191,7 @@ PackedFunc GraphRuntimeDebug::GetFunction(
CHECK_GT(number, 0); CHECK_GT(number, 0);
CHECK_GT(repeat, 0); CHECK_GT(repeat, 0);
CHECK_GE(min_repeat_ms, 0); CHECK_GE(min_repeat_ms, 0);
this->RunIndividual(number, repeat, min_repeat_ms); *rv = this->RunIndividual(number, repeat, min_repeat_ms);
}); });
} else { } else {
return GraphRuntime::GetFunction(name, sptr_to_self); return GraphRuntime::GetFunction(name, sptr_to_self);
......
...@@ -100,9 +100,6 @@ def test_graph_simple(): ...@@ -100,9 +100,6 @@ def test_graph_simple():
out = mod.get_output(0, tvm.nd.empty((n,))) out = mod.get_output(0, tvm.nd.empty((n,)))
np.testing.assert_equal(out.asnumpy(), a + 1) np.testing.assert_equal(out.asnumpy(), a + 1)
#test individual run
mod.run_individual(20, 2, 1)
mod.exit() mod.exit()
#verify dump root delete after cleanup #verify dump root delete after cleanup
assert(not os.path.exists(directory)) assert(not os.path.exists(directory))
...@@ -129,7 +126,6 @@ def test_graph_simple(): ...@@ -129,7 +126,6 @@ def test_graph_simple():
mod.run(x=tvm.nd.array(a, ctx)) mod.run(x=tvm.nd.array(a, ctx))
out = tvm.nd.empty((n,), ctx=ctx) out = tvm.nd.empty((n,), ctx=ctx)
out = mod.get_output(0, out) out = mod.get_output(0, out)
mod.run_individual(20, 2, 1)
np.testing.assert_equal(out.asnumpy(), a + 1) np.testing.assert_equal(out.asnumpy(), a + 1)
check_verify() check_verify()
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment