Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
T
tic
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
wenyuanbo
tic
Commits
0f1e0ff0
Commit
0f1e0ff0
authored
Oct 22, 2017
by
Tianqi Chen
Committed by
GitHub
Oct 22, 2017
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
[PASS] More robust UnrollLoop configuratin (#576)
parent
69759c0c
Hide whitespace changes
Inline
Side-by-side
Showing
10 changed files
with
88 additions
and
45 deletions
+88
-45
NEWS.md
+4
-1
python/tvm/build_module.py
+6
-5
src/pass/unroll_loop.cc
+63
-20
tests/python/unittest/test_pass_unroll.py
+5
-2
topi/recipe/conv/depthwise_conv2d_test.py
+2
-5
topi/recipe/conv/test_conv2d_hwcn_map.py
+1
-2
topi/recipe/gemm/cuda_gemm_square.py
+3
-3
topi/recipe/rnn/matexp.py
+0
-1
topi/tests/python/test_topi_conv2d_hwcn.py
+2
-3
topi/tests/python/test_topi_conv2d_nchw.py
+2
-3
No files found.
NEWS.md
View file @
0f1e0ff0
...
...
@@ -3,8 +3,11 @@ TVM Change Log
This file records the changes in TVM library in reverse chronological order.
## 0.1rc
## On onging verison
-
UnrollLoop : more robust version of unroll loop, count maximum steps that can be unrolled.
## 0.1rc
-
Language runtime
-
python
-
javascript
...
...
python/tvm/build_module.py
View file @
0f1e0ff0
...
...
@@ -28,7 +28,7 @@ class BuildConfig(object):
current
=
None
defaults
=
{
"auto_unroll_max_step"
:
0
,
"auto_unroll_m
in_depth"
:
1
,
"auto_unroll_m
ax_depth"
:
4
,
"unroll_explicit"
:
True
,
"detect_global_barrier"
:
False
,
"offset_factor"
:
0
,
...
...
@@ -72,10 +72,11 @@ def build_config(**kwargs):
Parameters
----------
auto_unroll_max_step: int, default=0
Threshold of loop extent to be automatically unrolled.
Threshold of number of steps in the loop to be automatically unrolled.
This takes inner loop count into consideration.
auto_unroll_m
in_depth: int, default=1
The m
inimum loop nest level before the loop
can be automatically unrolled.
auto_unroll_m
ax_depth: int, default=4
The m
aximum nested level of loops that
can be automatically unrolled.
unroll_explicit: bool, default=True
Whether explicitly unroll the loop, if set false, the unroll hint will
...
...
@@ -221,7 +222,7 @@ def lower(sch,
stmt
=
ir_pass
.
UnrollLoop
(
stmt
,
cfg
.
auto_unroll_max_step
,
cfg
.
auto_unroll_m
in
_depth
,
cfg
.
auto_unroll_m
ax
_depth
,
cfg
.
unroll_explicit
)
for
f
in
lower_phase1
:
stmt
=
f
(
stmt
)
...
...
src/pass/unroll_loop.cc
View file @
0f1e0ff0
...
...
@@ -18,15 +18,16 @@ namespace ir {
class
LoopUnroller
:
public
IRMutator
{
public
:
explicit
LoopUnroller
(
int
auto_max_step
,
int
auto_m
in
_depth
,
int
auto_m
ax
_depth
,
bool
explicit_unroll
)
:
auto_max_step_
(
auto_max_step
),
auto_m
in_depth_
(
auto_min
_depth
),
auto_m
ax_depth_
(
auto_max
_depth
),
explicit_unroll_
(
explicit_unroll
)
{
}
Stmt
Mutate_
(
const
For
*
op
,
const
Stmt
&
s
)
{
Stmt
stmt
=
s
;
Stmt
stmt
=
IRMutator
::
Mutate_
(
op
,
s
);
op
=
stmt
.
as
<
For
>
();
// constant folding.
Expr
extent
=
ir
::
Simplify
(
op
->
extent
);
const
IntImm
*
v1
=
extent
.
as
<
IntImm
>
();
...
...
@@ -38,15 +39,27 @@ class LoopUnroller : public IRMutator {
if
(
v2
!=
nullptr
)
{
value
=
static_cast
<
int
>
(
v2
->
value
);
}
bool
auto_unroll
=
(
op
->
for_type
==
ForType
::
Serial
&&
value
>=
0
&&
value
<=
auto_max_step_
&&
loop_depth_
>=
auto_min_depth_
);
// condition for auto unroll
bool
auto_unroll
=
(
op
->
for_type
==
ForType
::
Serial
&&
normal_loop_depth_
==
0
&&
value
>=
0
&&
unroll_depth_
<=
auto_max_depth_
&&
value
*
step_count_
<=
auto_max_step_
);
if
(
op
->
for_type
==
ForType
::
Unrolled
)
{
CHECK_GE
(
value
,
0
)
<<
"Cannot unroll non-constant loop"
;
auto_unroll
=
true
;
}
if
(
auto_unroll
)
{
step_count_
*=
value
;
unroll_depth_
+=
1
;
}
else
{
normal_loop_depth_
+=
1
;
}
if
(
auto_unroll
&&
explicit_unroll_
)
{
using
arith
::
ComputeExpr
;
if
(
value
==
0
)
return
Evaluate
::
make
(
0
);
...
...
@@ -65,42 +78,72 @@ class LoopUnroller : public IRMutator {
unrolled
=
step
;
}
}
++
loop_depth_
;
Stmt
ret
=
this
->
Mutate
(
unrolled
);
--
loop_depth_
;
return
ret
;
return
unrolled
;
}
else
{
++
loop_depth_
;
Stmt
ret
=
IRMutator
::
Mutate_
(
op
,
stmt
);
if
(
auto_unroll
)
{
op
=
ret
.
as
<
For
>
();
if
(
op
->
for_type
!=
ForType
::
Unrolled
)
{
ret
=
For
::
make
(
ret
urn
For
::
make
(
op
->
loop_var
,
op
->
min
,
op
->
extent
,
ForType
::
Unrolled
,
op
->
device_api
,
op
->
body
);
}
}
--
loop_depth_
;
return
ret
;
return
stmt
;
}
}
Stmt
Mutate_
(
const
Store
*
op
,
const
Stmt
&
stmt
)
final
{
++
step_count_
;
return
IRMutator
::
Mutate_
(
op
,
stmt
);
}
Stmt
Mutate_
(
const
Evaluate
*
op
,
const
Stmt
&
stmt
)
final
{
++
step_count_
;
return
IRMutator
::
Mutate_
(
op
,
stmt
);
}
Stmt
Mutate_
(
const
Block
*
op
,
const
Stmt
&
stmt
)
final
{
Stmt
first
=
this
->
Mutate
(
op
->
first
);
// cleanup state
int
step_count
=
step_count_
;
int
unroll_depth
=
unroll_depth_
;
int
normal_loop_depth
=
normal_loop_depth_
;
step_count_
=
0
;
unroll_depth_
=
0
;
normal_loop_depth_
=
0
;
// work on rest part
Stmt
rest
=
this
->
Mutate
(
op
->
rest
);
step_count_
+=
step_count
;
normal_loop_depth_
=
std
::
max
(
normal_loop_depth
,
normal_loop_depth_
);
unroll_depth_
=
std
::
max
(
unroll_depth_
,
unroll_depth
);
if
(
first
.
same_as
(
op
->
first
)
&&
rest
.
same_as
(
op
->
rest
))
{
return
stmt
;
}
else
{
return
Block
::
make
(
first
,
rest
);
}
}
private
:
// maximum number of step to perform auto unroll.
int
auto_max_step_
;
int
auto_m
in
_depth_
;
int
auto_m
ax
_depth_
;
bool
explicit_unroll_
;
int
loop_depth_
{
0
};
// Number of normal loops in scope
int
normal_loop_depth_
{
0
};
// number of unrolled cases in current scope.
int
unroll_depth_
{
0
};
// Number of total steps unrolled
int
step_count_
{
0
};
};
Stmt
UnrollLoop
(
Stmt
stmt
,
int
auto_max_step
,
int
auto_m
in
_depth
,
int
auto_m
ax
_depth
,
bool
explicit_unroll
)
{
Stmt
ret
=
LoopUnroller
(
auto_max_step
,
auto_m
in
_depth
,
auto_m
ax
_depth
,
explicit_unroll
).
Mutate
(
stmt
);
if
(
!
ret
.
same_as
(
stmt
))
{
return
ConvertSSA
(
ret
);
...
...
tests/python/unittest/test_pass_unroll.py
View file @
0f1e0ff0
...
...
@@ -14,11 +14,14 @@ def test_unroll_loop():
tvm
.
make
.
Load
(
dtype
,
Ab
.
data
,
i
)
+
1
,
j
+
1
)))
assert
isinstance
(
stmt
,
tvm
.
stmt
.
For
)
ret
=
tvm
.
ir_pass
.
UnrollLoop
(
stmt
,
2
,
0
,
True
)
ret
=
tvm
.
ir_pass
.
UnrollLoop
(
stmt
,
16
,
8
,
True
)
assert
not
isinstance
(
ret
,
tvm
.
stmt
.
For
)
ret
=
tvm
.
ir_pass
.
UnrollLoop
(
stmt
,
4
,
0
,
False
)
ret
=
tvm
.
ir_pass
.
UnrollLoop
(
stmt
,
15
,
8
,
True
)
assert
isinstance
(
ret
,
tvm
.
stmt
.
For
)
ret
=
tvm
.
ir_pass
.
UnrollLoop
(
stmt
,
16
,
8
,
False
)
assert
isinstance
(
ret
,
tvm
.
stmt
.
For
)
assert
ret
.
for_type
==
tvm
.
stmt
.
For
.
Unrolled
if
__name__
==
"__main__"
:
test_unroll_loop
()
topi/recipe/conv/depthwise_conv2d_test.py
View file @
0f1e0ff0
...
...
@@ -112,8 +112,7 @@ def test_depthwise_conv2d_nchw():
print
(
"success"
)
for
device
in
[
'cuda'
,
'opencl'
,
'rocm'
]:
with
tvm
.
build_config
(
auto_unroll_max_step
=
32
,
auto_unroll_min_depth
=
0
,
with
tvm
.
build_config
(
auto_unroll_max_step
=
128
,
unroll_explicit
=
device
==
'rocm'
,
detect_global_barrier
=
False
,
restricted_func
=
True
):
...
...
@@ -202,9 +201,7 @@ def test_depthwise_conv2d_nhwc():
print
(
"success"
)
for
device
in
[
'cuda'
,
'opencl'
,
'rocm'
]:
with
tvm
.
build_config
(
auto_unroll_max_step
=
32
,
auto_unroll_min_depth
=
0
,
unroll_explicit
=
device
==
'rocm'
,
with
tvm
.
build_config
(
auto_unroll_max_step
=
128
,
detect_global_barrier
=
False
,
restricted_func
=
True
):
check_device
(
device
)
...
...
topi/recipe/conv/test_conv2d_hwcn_map.py
View file @
0f1e0ff0
...
...
@@ -60,8 +60,7 @@ def test_conv2d_hwcn_map():
w
=
tvm
.
nd
.
array
(
w_np
,
ctx
)
b
=
tvm
.
nd
.
array
(
np
.
zeros
(
get_const_tuple
(
B
.
shape
),
dtype
=
B
.
dtype
),
ctx
)
c
=
tvm
.
nd
.
array
(
np
.
zeros
(
get_const_tuple
(
C
.
shape
),
dtype
=
C
.
dtype
),
ctx
)
with
tvm
.
build_config
(
auto_unroll_max_step
=
32
,
auto_unroll_min_depth
=
0
,
with
tvm
.
build_config
(
auto_unroll_max_step
=
128
,
unroll_explicit
=
device
==
'rocm'
):
func1
=
tvm
.
build
(
s1
,
[
A
,
W
,
B
],
device
)
func1
(
a
,
w
,
b
)
...
...
topi/recipe/gemm/cuda_gemm_square.py
View file @
0f1e0ff0
...
...
@@ -80,6 +80,7 @@ def test_gemm():
s
[
CC
]
.
reorder
(
ko
,
kt
,
ki
,
yo
,
xo
)
s
[
AA
]
.
compute_at
(
s
[
CC
],
ko
)
s
[
BB
]
.
compute_at
(
s
[
CC
],
ko
)
s
[
CC
]
.
unroll
(
kt
)
s
[
AL
]
.
compute_at
(
s
[
CC
],
kt
)
s
[
BL
]
.
compute_at
(
s
[
CC
],
kt
)
# Schedule for A's shared memory load
...
...
@@ -125,9 +126,8 @@ def test_gemm():
GFLOPS
=
num_flops
/
(
t
*
1e3
)
/
1e6
print
(
"average time cost of
%
d runs =
%
g ms,
%
g GFLOPS."
%
(
num_runs
,
t
*
1e3
,
GFLOPS
))
for
device
in
[
"cuda"
,
"opencl"
,
"rocm"
]:
with
tvm
.
build_config
(
auto_unroll_max_step
=
32
,
auto_unroll_min_depth
=
0
,
for
device
in
[
"cuda"
,
"opencl"
,
"rocm"
,
"nvptx"
]:
with
tvm
.
build_config
(
auto_unroll_max_step
=
128
,
unroll_explicit
=
(
device
!=
"cuda"
)):
check_device
(
device
)
...
...
topi/recipe/rnn/matexp.py
View file @
0f1e0ff0
...
...
@@ -112,7 +112,6 @@ def rnn_matexp():
def
check_device
(
target
):
with
tvm
.
build_config
(
detect_global_barrier
=
detect_global_barrier
,
auto_unroll_min_depth
=
2
,
auto_unroll_max_step
=
128
,
unroll_explicit
=
False
):
f
=
tvm
.
build
(
s
,
[
s_scan
,
Whh
],
target
)
...
...
topi/tests/python/test_topi_conv2d_hwcn.py
View file @
0f1e0ff0
...
...
@@ -39,9 +39,8 @@ def verify_conv2d_hwcn(batch, in_channel, in_size, num_filter, kernel, stride, p
w
=
tvm
.
nd
.
array
(
w_np
,
ctx
)
b
=
tvm
.
nd
.
array
(
np
.
zeros
(
get_const_tuple
(
B
.
shape
),
dtype
=
B
.
dtype
),
ctx
)
c
=
tvm
.
nd
.
array
(
np
.
zeros
(
get_const_tuple
(
C
.
shape
),
dtype
=
C
.
dtype
),
ctx
)
with
tvm
.
build_config
(
auto_unroll_max_step
=
32
,
auto_unroll_min_depth
=
0
,
unroll_explicit
=
device
==
'rocm'
):
with
tvm
.
build_config
(
auto_unroll_max_step
=
128
,
unroll_explicit
=
(
device
!=
"cuda"
)):
func1
=
tvm
.
build
(
s1
,
[
A
,
W
,
B
],
device
)
func2
=
tvm
.
build
(
s2
,
[
A
,
W
,
C
],
device
)
func1
(
a
,
w
,
b
)
...
...
topi/tests/python/test_topi_conv2d_nchw.py
View file @
0f1e0ff0
...
...
@@ -41,9 +41,8 @@ def verify_conv2d_nchw(batch, in_channel, in_size, num_filter, kernel, stride, p
w
=
tvm
.
nd
.
array
(
w_np
,
ctx
)
b
=
tvm
.
nd
.
array
(
np
.
zeros
(
get_const_tuple
(
B
.
shape
),
dtype
=
B
.
dtype
),
ctx
)
c
=
tvm
.
nd
.
array
(
np
.
zeros
(
get_const_tuple
(
C
.
shape
),
dtype
=
C
.
dtype
),
ctx
)
with
tvm
.
build_config
(
auto_unroll_max_step
=
32
,
auto_unroll_min_depth
=
0
,
unroll_explicit
=
device
==
'rocm'
):
with
tvm
.
build_config
(
auto_unroll_max_step
=
128
,
unroll_explicit
=
(
device
!=
"cuda"
)):
func1
=
tvm
.
build
(
s1
,
[
A
,
W
,
B
],
device
)
func2
=
tvm
.
build
(
s2
,
[
A
,
W
,
C
],
device
)
func1
(
a
,
w
,
b
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment