Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
T
tic
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
wenyuanbo
tic
Commits
b1188485
Commit
b1188485
authored
Jan 01, 2019
by
Lianmin Zheng
Committed by
Tianqi Chen
Jan 01, 2019
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
[RUNTIME] Add min_repeat_ms to time_evaluator (#2200)
parent
1e78d41c
Show whitespace changes
Inline
Side-by-side
Showing
8 changed files
with
189 additions
and
111 deletions
+189
-111
python/tvm/autotvm/measure/measure.py
+4
-2
python/tvm/autotvm/measure/measure_methods.py
+47
-62
python/tvm/module.py
+20
-8
src/runtime/rpc/rpc_module.cc
+5
-4
src/runtime/rpc/rpc_session.cc
+29
-7
src/runtime/rpc/rpc_session.h
+35
-7
tests/python/unittest/test_autotvm_measure.py
+1
-21
tests/python/unittest/test_runtime_measure.py
+48
-0
No files found.
python/tvm/autotvm/measure/measure.py
View file @
b1188485
...
@@ -187,8 +187,10 @@ def measure_option(builder, runner):
...
@@ -187,8 +187,10 @@ def measure_option(builder, runner):
Note
Note
----
----
To make measurement results accurate, you should pick the correct value for the argument
To make measurement results accurate, you should pick the correct value for the argument
`number` and `repeat` in Runner(). Using `min_repeat_ms` can dynamically adjusts `number`,
`number` and `repeat` in Runner(). Some devices need a certain minimum running time to
so it is recommended. The typical value for NVIDIA GPU is 100 ms.
"warm up," such as GPUs that need time to reach a performance power state.
Using `min_repeat_ms` can dynamically adjusts `number`, so it is recommended.
The typical value for NVIDIA GPU is 150 ms.
"""
"""
from
.measure_methods
import
LocalBuilder
,
LocalRunner
from
.measure_methods
import
LocalBuilder
,
LocalRunner
...
...
python/tvm/autotvm/measure/measure_methods.py
View file @
b1188485
...
@@ -140,20 +140,22 @@ class RPCRunner(Runner):
...
@@ -140,20 +140,22 @@ class RPCRunner(Runner):
The host address of RPC Tracker
The host address of RPC Tracker
port: int
port: int
The port of RPC Tracker
The port of RPC Tracker
number : int, optional
number: int
Number of times to do measurement for tasking average
The number of times to run the generated code for taking average.
We call these runs as one `repeat` of measurement.
repeat : int, optional
repeat : int, optional
N
umber of times to repeat the measurement.
The n
umber of times to repeat the measurement.
In total, the generated code will be run (1 + number x repeat) times,
In total, the generated code will be run (1 + number x repeat) times,
where the first one is warm up. The returned result contains `repeat` costs,
where the first "1" is warm up and will be discarded.
min_repeat_ms : float, optional
The returned result contains `repeat` costs,
Minimum duration of a timer measurement in milliseconds.
each of which is an average of `number` costs.
When the run time of a measurement trial falls below this time, the
min_repeat_ms: int, optional
`number` parameter will be automatically increased.
The minimum duration of one `repeat` in milliseconds.
Set this to improve the accuracy of perf measurement, e.g., when timers
By default, one `repeat` contains `number` runs. If this parameter is set,
are not precise enough to capture short-running tasks. This parameter is
the parameters `number` will be dynamically adjusted to meet the
also critical when devices need a certain minimum running time to "warm
minimum duration requirement of one `repeat`.
up," such as GPUs that need time to reach a performance power state.
i.e., When the run time of one `repeat` falls below this time, the `number` parameter
will be automatically increased.
cooldown_interval: float, optional
cooldown_interval: float, optional
The cool down interval between two measurements.
The cool down interval between two measurements.
check_correctness: bool, optional
check_correctness: bool, optional
...
@@ -177,7 +179,6 @@ class RPCRunner(Runner):
...
@@ -177,7 +179,6 @@ class RPCRunner(Runner):
self
.
number
=
number
self
.
number
=
number
self
.
repeat
=
repeat
self
.
repeat
=
repeat
self
.
min_repeat_ms
=
min_repeat_ms
self
.
min_repeat_ms
=
min_repeat_ms
self
.
cur_number
=
number
self
.
ref_input
=
None
self
.
ref_input
=
None
self
.
ref_output
=
None
self
.
ref_output
=
None
...
@@ -188,7 +189,6 @@ class RPCRunner(Runner):
...
@@ -188,7 +189,6 @@ class RPCRunner(Runner):
def
set_task
(
self
,
task
):
def
set_task
(
self
,
task
):
self
.
task
=
task
self
.
task
=
task
self
.
cur_number
=
self
.
number
if
check_remote
(
task
.
target
,
self
.
key
,
self
.
host
,
self
.
port
):
if
check_remote
(
task
.
target
,
self
.
key
,
self
.
host
,
self
.
port
):
logger
.
info
(
"Get devices for measurement successfully!"
)
logger
.
info
(
"Get devices for measurement successfully!"
)
...
@@ -240,8 +240,9 @@ class RPCRunner(Runner):
...
@@ -240,8 +240,9 @@ class RPCRunner(Runner):
ret
=
self
.
executor
.
submit
(
run_through_rpc
,
ret
=
self
.
executor
.
submit
(
run_through_rpc
,
measure_inp
,
measure_inp
,
build_res
,
build_res
,
self
.
cur_
number
,
self
.
number
,
self
.
repeat
,
self
.
repeat
,
self
.
min_repeat_ms
,
self
.
cooldown_interval
,
self
.
cooldown_interval
,
remote_args
,
remote_args
,
self
.
ref_input
,
self
.
ref_input
,
...
@@ -256,32 +257,6 @@ class RPCRunner(Runner):
...
@@ -256,32 +257,6 @@ class RPCRunner(Runner):
else
:
else
:
results
.
append
(
res
)
results
.
append
(
res
)
# If some runs were too fast, do remeasure for them
# to meet the requirement of `min_repeat_ms`
remeasure
=
np
.
zeros
((
len
(
measure_inputs
),),
dtype
=
np
.
bool
)
pre_number
=
next_number
=
self
.
cur_number
min_repeat_duration
=
self
.
min_repeat_ms
/
1000.0
for
i
,
res
in
enumerate
(
results
):
if
res
.
error_no
==
MeasureErrorNo
.
NO_ERROR
:
if
np
.
mean
(
res
.
costs
)
*
pre_number
<=
min_repeat_duration
:
next_number
=
max
(
next_number
,
int
(
np
.
ceil
(
min_repeat_duration
/
np
.
mean
(
res
.
costs
))))
remeasure
[
i
]
=
True
if
pre_number
!=
next_number
:
self
.
cur_number
=
next_number
msg
=
"increasing number to
%
d"
%
self
.
cur_number
logger
.
info
(
msg
)
re_measure_inputs
=
[
x
for
i
,
x
in
enumerate
(
measure_inputs
)
if
remeasure
[
i
]]
re_build_results
=
[
x
for
i
,
x
in
enumerate
(
build_results
)
if
remeasure
[
i
]]
re_res
=
self
.
run
(
re_measure_inputs
,
re_build_results
)
ct
=
0
for
i
,
rerun
in
enumerate
(
remeasure
):
if
rerun
:
results
[
i
]
=
re_res
[
ct
]
ct
+=
1
return
results
return
results
class
LocalRunner
(
RPCRunner
):
class
LocalRunner
(
RPCRunner
):
...
@@ -291,21 +266,22 @@ class LocalRunner(RPCRunner):
...
@@ -291,21 +266,22 @@ class LocalRunner(RPCRunner):
----------
----------
timeout: float
timeout: float
The timeout of a compilation
The timeout of a compilation
number : int, optional
number: int
Number of times to do measurement for tasking average
The number of times to run the generated code for taking average.
We call these runs as one `repeat` of measurement.
repeat : int, optional
repeat : int, optional
N
umber of times to repeat the measurement.
The n
umber of times to repeat the measurement.
In total, the generated code will be run (1 + number x repeat) times,
In total, the generated code will be run (1 + number x repeat) times,
where the first one is warm up
. The returned result contains `repeat` costs,
where the first one is warm up
and will be discarded.
each of which is the average of `number` test run.
The returned result contains `repeat` costs,
min_repeat_ms : float, optional
each of which is an average of `number` costs.
Minimum duration of a timer measurement in milliseconds.
min_repeat_ms: int, optional
When the run time of a measurement trial falls below this time, the
The minimum duration of one `repeat` in milliseconds.
`number` parameter will be automatically increased.
By default, one `repeat` contains `number` runs. If this parameter is set,
Set this to improve the accuracy of perf measurement, e.g., when timers
the parameters `number` will be dynamically adjusted to meet the
are not precise enough to capture short-running tasks. This parameter is
minimum duration requirement of one `repeat`.
also critical when devices need a certain minimum running time to "warm
i.e., When the run time of one `repeat` falls below this time, the `number` parameter
up," such as GPUs that need time to reach a performance power state
.
will be automatically increased
.
cooldown_interval: float, optional
cooldown_interval: float, optional
The cool down interval between two measurements.
The cool down interval between two measurements.
check_correctness: bool, optional
check_correctness: bool, optional
...
@@ -416,7 +392,7 @@ def android_ndk_build_func(measure_input, tmp_dir, **kwargs):
...
@@ -416,7 +392,7 @@ def android_ndk_build_func(measure_input, tmp_dir, **kwargs):
def
run_through_rpc
(
measure_input
,
build_result
,
def
run_through_rpc
(
measure_input
,
build_result
,
number
,
repeat
,
cooldown_interval
,
number
,
repeat
,
min_repeat_ms
,
cooldown_interval
,
remote_args
,
ref_input
=
None
,
ref_output
=
None
):
remote_args
,
ref_input
=
None
,
ref_output
=
None
):
"""Run a generated library through rpc
"""Run a generated library through rpc
...
@@ -426,13 +402,22 @@ def run_through_rpc(measure_input, build_result,
...
@@ -426,13 +402,22 @@ def run_through_rpc(measure_input, build_result,
The raw measure input
The raw measure input
build_result: BuildResult
build_result: BuildResult
The result returned from Builder. This contains the path to the generated library.
The result returned from Builder. This contains the path to the generated library.
number : int, optional
number: int
Number of times to do measurement for tasking average
The number of times to run the generated code for taking average.
We call these runs as one `repeat` of measurement.
repeat : int, optional
repeat : int, optional
N
umber of times to repeat the measurement.
The n
umber of times to repeat the measurement.
In total, the generated code will be run (1 + number x repeat) times,
In total, the generated code will be run (1 + number x repeat) times,
where the first one is warm up. The returned result contains `repeat` costs,
where the first one is warm up and will be discarded.
each of which is the average of `number` test run.
The returned result contains `repeat` costs,
each of which is an average of `number` costs.
min_repeat_ms: int, optional
The minimum duration of one `repeat` in milliseconds.
By default, one `repeat` contains `number` runs. If this parameter is set,
the parameters `number` will be dynamically adjusted to meet the
minimum duration requirement of one `repeat`.
i.e., When the run time of one `repeat` falls below this time, the `number` parameter
will be automatically increased.
cooldown_interval: float
cooldown_interval: float
The cool down interval between two measurements
The cool down interval between two measurements
remote_args: Tuple
remote_args: Tuple
...
@@ -454,14 +439,14 @@ def run_through_rpc(measure_input, build_result,
...
@@ -454,14 +439,14 @@ def run_through_rpc(measure_input, build_result,
func
=
remote
.
load_module
(
os
.
path
.
split
(
build_result
.
filename
)[
1
])
func
=
remote
.
load_module
(
os
.
path
.
split
(
build_result
.
filename
)[
1
])
ctx
=
remote
.
context
(
str
(
measure_input
.
target
),
0
)
ctx
=
remote
.
context
(
str
(
measure_input
.
target
),
0
)
time_f
=
func
.
time_evaluator
(
time_f
=
func
.
time_evaluator
(
func
.
entry_name
,
ctx
,
number
=
number
,
repeat
=
repeat
)
func
.
entry_name
,
ctx
,
number
=
number
,
repeat
=
repeat
,
min_repeat_ms
=
min_repeat_ms
)
# set input
# set input
if
ref_input
:
if
ref_input
:
args
=
[
nd
.
array
(
x
,
ctx
=
ctx
)
for
x
in
ref_input
]
args
=
[
nd
.
array
(
x
,
ctx
=
ctx
)
for
x
in
ref_input
]
else
:
else
:
# create empty arrays on the remote device and copy them once.
# create empty arrays on the remote device and copy them once.
# This can avoid some memory issues that make the measurment results unreliable.
# This can avoid some memory issues that make the measur
e
ment results unreliable.
args
=
[
nd
.
empty
(
x
[
0
],
dtype
=
x
[
1
],
ctx
=
ctx
)
for
x
in
build_result
.
arg_info
]
args
=
[
nd
.
empty
(
x
[
0
],
dtype
=
x
[
1
],
ctx
=
ctx
)
for
x
in
build_result
.
arg_info
]
args
=
[
nd
.
array
(
x
,
ctx
=
ctx
)
for
x
in
args
]
args
=
[
nd
.
array
(
x
,
ctx
=
ctx
)
for
x
in
args
]
ctx
.
sync
()
ctx
.
sync
()
...
...
python/tvm/module.py
View file @
b1188485
...
@@ -127,7 +127,7 @@ class Module(ModuleBase):
...
@@ -127,7 +127,7 @@ class Module(ModuleBase):
kwargs
.
update
({
'options'
:
[
"-I"
+
path
for
path
in
find_include_path
()]})
kwargs
.
update
({
'options'
:
[
"-I"
+
path
for
path
in
find_include_path
()]})
fcompile
(
file_name
,
files
,
**
kwargs
)
fcompile
(
file_name
,
files
,
**
kwargs
)
def
time_evaluator
(
self
,
func_name
,
ctx
,
number
,
repeat
=
1
):
def
time_evaluator
(
self
,
func_name
,
ctx
,
number
=
10
,
repeat
=
1
,
min_repeat_ms
=
0
):
"""Get an evaluator that measures time cost of running function.
"""Get an evaluator that measures time cost of running function.
Parameters
Parameters
...
@@ -139,26 +139,38 @@ class Module(ModuleBase):
...
@@ -139,26 +139,38 @@ class Module(ModuleBase):
The context we should run this function on.
The context we should run this function on.
number: int
number: int
The number of steps used in measuring each time interval
The number of times to run this function for taking average.
We call these runs as one `repeat` of measurement.
repeat: int, optional
repeat: int, optional
Number of times to run the timer measurement
The number of times to repeat the measurement.
If repeat equals 3, then we will get 3 numbers in the ProfileResult.
In total, the function will be invoked (1 + number x repeat) times,
where the first one is warm up and will be discarded.
The returned result contains `repeat` costs,
each of which is an average of `number` costs.
min_repeat_ms: int, optional
The minimum duration of one `repeat` in milliseconds.
By default, one `repeat` contains `number` runs. If this parameter is set,
the parameters `number` will be dynamically adjusted to meet the
minimum duration requirement of one `repeat`.
i.e., When the run time of one `repeat` falls below this time, the `number` parameter
will be automatically increased.
Note
Note
----
----
The function will be invoked
repeat * number + 1
times,
The function will be invoked
(1 + number x repeat)
times,
with the first call discarded in case there is lazy initialization.
with the first call discarded in case there is lazy initialization.
Returns
Returns
-------
-------
ftimer : Function
ftimer : Function
The function that takes same argument as func
The function that takes same argument as func
and returns a ProfileResult.
and return a float representing seconds per function call
.
The ProfileResult reports `repeat` time costs in seconds
.
"""
"""
try
:
try
:
feval
=
_RPCTimeEvaluator
(
feval
=
_RPCTimeEvaluator
(
self
,
func_name
,
ctx
.
device_type
,
ctx
.
device_id
,
number
,
repeat
)
self
,
func_name
,
ctx
.
device_type
,
ctx
.
device_id
,
number
,
repeat
,
min_repeat_ms
)
def
evaluator
(
*
args
):
def
evaluator
(
*
args
):
"""Internal wrapped evaluator."""
"""Internal wrapped evaluator."""
...
...
src/runtime/rpc/rpc_module.cc
View file @
b1188485
...
@@ -124,10 +124,11 @@ class RPCModuleNode final : public ModuleNode {
...
@@ -124,10 +124,11 @@ class RPCModuleNode final : public ModuleNode {
PackedFunc
GetTimeEvaluator
(
const
std
::
string
&
name
,
PackedFunc
GetTimeEvaluator
(
const
std
::
string
&
name
,
TVMContext
ctx
,
TVMContext
ctx
,
int
number
,
int
number
,
int
repeat
)
{
int
repeat
,
int
min_repeat_ms
)
{
RPCFuncHandle
handle
=
GetFuncHandle
(
name
);
RPCFuncHandle
handle
=
GetFuncHandle
(
name
);
if
(
handle
==
nullptr
)
return
PackedFunc
();
if
(
handle
==
nullptr
)
return
PackedFunc
();
handle
=
sess_
->
GetTimeEvaluator
(
handle
,
ctx
,
number
,
repeat
);
handle
=
sess_
->
GetTimeEvaluator
(
handle
,
ctx
,
number
,
repeat
,
min_repeat_ms
);
return
WrapRemote
(
handle
);
return
WrapRemote
(
handle
);
}
}
...
@@ -203,10 +204,10 @@ TVM_REGISTER_GLOBAL("module._RPCTimeEvaluator")
...
@@ -203,10 +204,10 @@ TVM_REGISTER_GLOBAL("module._RPCTimeEvaluator")
ctx
.
device_id
=
args
[
3
];
ctx
.
device_id
=
args
[
3
];
if
(
tkey
==
"rpc"
)
{
if
(
tkey
==
"rpc"
)
{
*
rv
=
static_cast
<
RPCModuleNode
*>
(
m
.
operator
->
())
*
rv
=
static_cast
<
RPCModuleNode
*>
(
m
.
operator
->
())
->
GetTimeEvaluator
(
args
[
1
],
ctx
,
args
[
4
],
args
[
5
]);
->
GetTimeEvaluator
(
args
[
1
],
ctx
,
args
[
4
],
args
[
5
]
,
args
[
6
]
);
}
else
{
}
else
{
*
rv
=
WrapTimeEvaluator
(
*
rv
=
WrapTimeEvaluator
(
m
.
GetFunction
(
args
[
1
],
false
),
ctx
,
args
[
4
],
args
[
5
]);
m
.
GetFunction
(
args
[
1
],
false
),
ctx
,
args
[
4
],
args
[
5
]
,
args
[
6
]
);
}
}
});
});
...
...
src/runtime/rpc/rpc_session.cc
View file @
b1188485
...
@@ -13,6 +13,8 @@
...
@@ -13,6 +13,8 @@
#include <chrono>
#include <chrono>
#include <vector>
#include <vector>
#include <utility>
#include <utility>
#include <cmath>
#include <algorithm>
#include "rpc_session.h"
#include "rpc_session.h"
#include "../../common/ring_buffer.h"
#include "../../common/ring_buffer.h"
...
@@ -1002,9 +1004,9 @@ void RPCSession::CopyFromRemote(void* from,
...
@@ -1002,9 +1004,9 @@ void RPCSession::CopyFromRemote(void* from,
}
}
RPCFuncHandle
RPCSession
::
GetTimeEvaluator
(
RPCFuncHandle
RPCSession
::
GetTimeEvaluator
(
RPCFuncHandle
fhandle
,
TVMContext
ctx
,
int
number
,
int
repeat
)
{
RPCFuncHandle
fhandle
,
TVMContext
ctx
,
int
number
,
int
repeat
,
int
min_repeat_ms
)
{
return
this
->
CallRemote
(
return
this
->
CallRemote
(
RPCCode
::
kGetTimeEvaluator
,
fhandle
,
ctx
,
number
,
repeat
);
RPCCode
::
kGetTimeEvaluator
,
fhandle
,
ctx
,
number
,
repeat
,
min_repeat_ms
);
}
}
// Event handler functions
// Event handler functions
...
@@ -1138,7 +1140,7 @@ void RPCNDArrayFree(TVMArgs args, TVMRetValue *rv) {
...
@@ -1138,7 +1140,7 @@ void RPCNDArrayFree(TVMArgs args, TVMRetValue *rv) {
void
RPCGetTimeEvaluator
(
TVMArgs
args
,
TVMRetValue
*
rv
)
{
void
RPCGetTimeEvaluator
(
TVMArgs
args
,
TVMRetValue
*
rv
)
{
PackedFunc
*
pf
=
static_cast
<
PackedFunc
*>
(
args
[
0
].
operator
void
*
());
PackedFunc
*
pf
=
static_cast
<
PackedFunc
*>
(
args
[
0
].
operator
void
*
());
void
*
fhandle
=
new
PackedFunc
(
WrapTimeEvaluator
(
*
pf
,
args
[
1
],
args
[
2
],
args
[
3
]));
void
*
fhandle
=
new
PackedFunc
(
WrapTimeEvaluator
(
*
pf
,
args
[
1
],
args
[
2
],
args
[
3
]
,
args
[
4
]
));
delete
pf
;
delete
pf
;
*
rv
=
fhandle
;
*
rv
=
fhandle
;
}
}
...
@@ -1190,21 +1192,41 @@ void RPCSession::EventHandler::HandlePackedCall() {
...
@@ -1190,21 +1192,41 @@ void RPCSession::EventHandler::HandlePackedCall() {
CHECK_EQ
(
state_
,
kRecvCode
);
CHECK_EQ
(
state_
,
kRecvCode
);
}
}
PackedFunc
WrapTimeEvaluator
(
PackedFunc
pf
,
TVMContext
ctx
,
int
number
,
int
repeat
)
{
PackedFunc
WrapTimeEvaluator
(
PackedFunc
pf
,
auto
ftimer
=
[
pf
,
ctx
,
number
,
repeat
](
TVMArgs
args
,
TVMRetValue
*
rv
)
{
TVMContext
ctx
,
int
number
,
int
repeat
,
int
min_repeat_ms
)
{
auto
ftimer
=
[
pf
,
ctx
,
number
,
repeat
,
min_repeat_ms
](
TVMArgs
args
,
TVMRetValue
*
rv
)
mutable
{
TVMRetValue
temp
;
TVMRetValue
temp
;
std
::
ostringstream
os
;
std
::
ostringstream
os
;
// skip first time call, to activate lazy compilation components.
// skip first time call, to activate lazy compilation components.
pf
.
CallPacked
(
args
,
&
temp
);
pf
.
CallPacked
(
args
,
&
temp
);
DeviceAPI
::
Get
(
ctx
)
->
StreamSync
(
ctx
,
nullptr
);
DeviceAPI
::
Get
(
ctx
)
->
StreamSync
(
ctx
,
nullptr
);
for
(
int
i
=
0
;
i
<
repeat
;
++
i
)
{
for
(
int
i
=
0
;
i
<
repeat
;
++
i
)
{
std
::
chrono
::
time_point
<
std
::
chrono
::
system_clock
,
std
::
chrono
::
nanoseconds
>
tbegin
,
tend
;
double
duration_ms
=
0.0
;
do
{
if
(
duration_ms
>
0.0
)
{
number
=
static_cast
<
int
>
(
std
::
max
((
min_repeat_ms
/
(
duration_ms
/
number
)
+
1
),
number
*
1.618
));
// 1.618 is chosen by random
}
tbegin
=
std
::
chrono
::
high_resolution_clock
::
now
();
// start timing
// start timing
auto
tbegin
=
std
::
chrono
::
high_resolution_clock
::
now
();
for
(
int
i
=
0
;
i
<
number
;
++
i
)
{
for
(
int
i
=
0
;
i
<
number
;
++
i
)
{
pf
.
CallPacked
(
args
,
&
temp
);
pf
.
CallPacked
(
args
,
&
temp
);
}
}
DeviceAPI
::
Get
(
ctx
)
->
StreamSync
(
ctx
,
nullptr
);
DeviceAPI
::
Get
(
ctx
)
->
StreamSync
(
ctx
,
nullptr
);
auto
tend
=
std
::
chrono
::
high_resolution_clock
::
now
();
tend
=
std
::
chrono
::
high_resolution_clock
::
now
();
duration_ms
=
std
::
chrono
::
duration_cast
<
std
::
chrono
::
duration
<
double
>
>
(
tend
-
tbegin
).
count
()
*
1000
;
}
while
(
duration_ms
<
min_repeat_ms
);
double
speed
=
std
::
chrono
::
duration_cast
<
std
::
chrono
::
duration
<
double
>
>
(
double
speed
=
std
::
chrono
::
duration_cast
<
std
::
chrono
::
duration
<
double
>
>
(
tend
-
tbegin
).
count
()
/
number
;
tend
-
tbegin
).
count
()
/
number
;
os
.
write
(
reinterpret_cast
<
char
*>
(
&
speed
),
sizeof
(
speed
));
os
.
write
(
reinterpret_cast
<
char
*>
(
&
speed
),
sizeof
(
speed
));
...
...
src/runtime/rpc/rpc_session.h
View file @
b1188485
...
@@ -151,14 +151,26 @@ class RPCSession {
...
@@ -151,14 +151,26 @@ class RPCSession {
*
*
* \param fhandle The function handle.
* \param fhandle The function handle.
* \param ctx The ctx to run measurement on.
* \param ctx The ctx to run measurement on.
* \param number How many steps to run in each time evaluation
* \param number The number of times to run this function for taking average.
* \param repeat How many times to repeat the timer
We call these runs as one `repeat` of measurement.
* \param repeat The number of times to repeat the measurement.
In total, the function will be invoked (1 + number x repeat) times,
where the first one is warm up and will be discarded.
The returned result contains `repeat` costs,
each of which is an average of `number` costs.
* \param min_repeat_ms The minimum duration of one `repeat` in milliseconds.
By default, one `repeat` contains `number` runs. If this parameter is set,
the parameters `number` will be dynamically adjusted to meet the
minimum duration requirement of one `repeat`.
i.e., When the run time of one `repeat` falls below this time,
the `number` parameter will be automatically increased.
* \return A remote timer function
* \return A remote timer function
*/
*/
RPCFuncHandle
GetTimeEvaluator
(
RPCFuncHandle
fhandle
,
RPCFuncHandle
GetTimeEvaluator
(
RPCFuncHandle
fhandle
,
TVMContext
ctx
,
TVMContext
ctx
,
int
number
,
int
number
,
int
repeat
);
int
repeat
,
int
min_repeat_ms
);
/*!
/*!
* \brief Call a remote defined system function with arguments.
* \brief Call a remote defined system function with arguments.
* \param fcode The function code.
* \param fcode The function code.
...
@@ -221,13 +233,29 @@ class RPCSession {
...
@@ -221,13 +233,29 @@ class RPCSession {
};
};
/*!
/*!
* \brief Wrap a timer function
for
a given packed function.
* \brief Wrap a timer function
to measure the time cost of
a given packed function.
* \param f The function argument.
* \param f The function argument.
* \param ctx The context.
* \param ctx The context.
* \param number Number of steps in the inner iteration
* \param number The number of times to run this function for taking average.
* \param repeat How many steps to repeat the time evaluation.
We call these runs as one `repeat` of measurement.
* \param repeat The number of times to repeat the measurement.
In total, the function will be invoked (1 + number x repeat) times,
where the first one is warm up and will be discarded.
The returned result contains `repeat` costs,
each of which is an average of `number` costs.
* \param min_repeat_ms The minimum duration of one `repeat` in milliseconds.
By default, one `repeat` contains `number` runs. If this parameter is set,
the parameters `number` will be dynamically adjusted to meet the
minimum duration requirement of one `repeat`.
i.e., When the run time of one `repeat` falls below this time,
the `number` parameter will be automatically increased.
* \return f_timer A timer function.
*/
*/
PackedFunc
WrapTimeEvaluator
(
PackedFunc
f
,
TVMContext
ctx
,
int
number
,
int
repeat
);
PackedFunc
WrapTimeEvaluator
(
PackedFunc
f
,
TVMContext
ctx
,
int
number
,
int
repeat
,
int
min_repeat_ms
);
/*!
/*!
* \brief Create a Global RPC module that refers to the session.
* \brief Create a Global RPC module that refers to the session.
...
...
tests/python/unittest/test_autotvm_measure.py
View file @
b1188485
...
@@ -69,29 +69,9 @@ def test_check_correctness():
...
@@ -69,29 +69,9 @@ def test_check_correctness():
callbacks
=
[
_callback_wrong
])
callbacks
=
[
_callback_wrong
])
def
test_min_repeat_ms
():
task
,
target
=
get_sample_task
()
measure_option
=
autotvm
.
measure_option
(
builder
=
autotvm
.
LocalBuilder
(),
runner
=
autotvm
.
LocalRunner
(
number
=
1
,
min_repeat_ms
=
100
)
)
def
_callback
(
tuner
,
measure_inputs
,
measure_results
):
for
inp
,
res
in
zip
(
measure_inputs
,
measure_results
):
if
res
.
error_no
!=
0
:
continue
assert
1000
*
np
.
mean
(
res
.
costs
)
*
\
measure_option
[
'runner'
]
.
cur_number
>=
100
tuner
=
autotvm
.
tuner
.
RandomTuner
(
task
)
tuner
.
tune
(
n_trial
=
5
,
measure_option
=
measure_option
,
callbacks
=
[
_callback
])
if
__name__
==
'__main__'
:
if
__name__
==
'__main__'
:
logging
.
basicConfig
(
level
=
logging
.
INFO
)
logging
.
basicConfig
(
level
=
logging
.
INFO
)
test_task_tuner_without_measurement
()
test_task_tuner_without_measurement
()
test_check_correctness
()
test_check_correctness
()
test_min_repeat_ms
()
tests/python/unittest/test_runtime_measure.py
0 → 100644
View file @
b1188485
import
time
import
ctypes
import
tvm
from
tvm.contrib.util
import
tempdir
def
test_min_repeat_ms
():
tmp
=
tempdir
()
filename
=
tmp
.
relpath
(
"log"
)
@tvm.register_func
def
my_debug
(
filename
):
"""one call lasts for 100 ms and writes one character to a file"""
time
.
sleep
(
0.1
)
filename
=
ctypes
.
c_char_p
(
filename
.
value
)
.
value
with
open
(
filename
,
"a"
)
as
fout
:
fout
.
write
(
"c"
)
X
=
tvm
.
compute
((),
lambda
:
tvm
.
call_packed
(
"my_debug"
,
filename
))
s
=
tvm
.
create_schedule
(
X
.
op
)
func
=
tvm
.
build
(
s
,
[
X
])
x
=
tvm
.
nd
.
empty
((),
dtype
=
"int32"
)
ftimer
=
func
.
time_evaluator
(
func
.
entry_name
,
tvm
.
cpu
(),
number
=
1
,
repeat
=
1
)
ftimer
(
x
)
with
open
(
filename
,
"r"
)
as
fin
:
ct
=
len
(
fin
.
readline
())
assert
ct
==
2
ftimer
=
func
.
time_evaluator
(
func
.
entry_name
,
tvm
.
cpu
(),
number
=
1
,
repeat
=
1
,
min_repeat_ms
=
1000
)
ftimer
(
x
)
# make sure we get more than 10 calls
with
open
(
filename
,
"r"
)
as
fin
:
ct
=
len
(
fin
.
readline
())
assert
ct
>
10
+
2
if
__name__
==
"__main__"
:
test_min_repeat_ms
()
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment