Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
T
tic
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
wenyuanbo
tic
Commits
fc83c7f2
Commit
fc83c7f2
authored
Nov 09, 2018
by
Andrew Tulloch
Committed by
Tianqi Chen
Nov 09, 2018
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
[TVM] [NNPACK] Modernize and improve NNPACK bindings (#2084)
parent
9f441d81
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
7 changed files
with
216 additions
and
27 deletions
+216
-27
cmake/modules/contrib/NNPack.cmake
+4
-0
python/tvm/contrib/nnpack.py
+112
-8
src/contrib/nnpack/convolution.cc
+0
-0
src/contrib/nnpack/nnpack_utils.cc
+15
-5
src/contrib/nnpack/nnpack_utils.h
+1
-1
tests/lint/pylintrc
+2
-2
tests/python/contrib/test_nnpack.py
+82
-11
No files found.
cmake/modules/contrib/NNPack.cmake
View file @
fc83c7f2
...
...
@@ -9,6 +9,10 @@ if(USE_NNPACK)
include_directories
(
${
PTHREAD_POOL_PATH
}
/include
)
find_library
(
NNPACK_CONTRIB_LIB nnpack
${
NNPACK_PATH
}
/lib
)
find_library
(
NNPACK_PTHREAD_CONTRIB_LIB pthreadpool
${
NNPACK_PATH
}
/lib
)
find_library
(
NNPACK_CPUINFO_CONTRIB_LIB cpuinfo
${
NNPACK_PATH
}
/lib
)
find_library
(
NNPACK_CLOG_CONTRIB_LIB clog
${
NNPACK_PATH
}
/lib
)
list
(
APPEND TVM_RUNTIME_LINKER_LIBS
${
NNPACK_CONTRIB_LIB
}
)
list
(
APPEND TVM_RUNTIME_LINKER_LIBS
${
NNPACK_PTHREAD_CONTRIB_LIB
}
)
list
(
APPEND TVM_RUNTIME_LINKER_LIBS
${
NNPACK_CPUINFO_CONTRIB_LIB
}
)
list
(
APPEND TVM_RUNTIME_LINKER_LIBS
${
NNPACK_CLOG_CONTRIB_LIB
}
)
endif
(
USE_NNPACK
)
python/tvm/contrib/nnpack.py
View file @
fc83c7f2
...
...
@@ -63,14 +63,32 @@ def fully_connected_output(lhs, rhs, nthreads=1):
"tvm.contrib.nnpack.fully_connected_output"
,
ins
[
0
],
ins
[
1
],
outs
[
0
],
nthreads
),
name
=
"C"
)
def
convolution_inference
(
data
,
kernel
,
bias
,
padding
,
stride
,
nthreads
=
1
):
"""Create an extern op to do inference convolution of 3D tensor data and
class
ConvolutionAlgorithm
:
AUTO
=
0
FFT_8x8
=
1
FFT_16x16
=
2
WT_8x8
=
3
IMPLICIT_GEMM
=
4
DIRECT
=
5
WT_8x8_FP16
=
6
class
ConvolutionTransformStrategy
:
COMPUTE
=
1
PRECOMPUTE
=
2
def
convolution_inference
(
data
,
kernel
,
bias
,
padding
,
stride
,
nthreads
=
1
,
algorithm
=
ConvolutionAlgorithm
.
AUTO
):
"""Create an extern op to do inference convolution of 4D tensor data and
4D tensor kernel and 1D tensor bias with nnpack.
Parameters
----------
data : Tensor
data
3D tensor input
[input_channels][input_height][input_width] of
data
4D tensor input[batch]
[input_channels][input_height][input_width] of
FP32 elements.
kernel : Tensor
kernel 4D tensor kernel[output_channels][input_channels][kernel_height]
...
...
@@ -88,23 +106,108 @@ def convolution_inference(data, kernel, bias, padding, stride, nthreads=1):
Returns
-------
output : Tensor
output
3D tensor output
[output_channels][output_height][output_width]
output
4D tensor output[batch]
[output_channels][output_height][output_width]
of FP32 elements.
"""
assert
isinstance
(
padding
,
list
)
and
len
(
padding
)
==
4
assert
isinstance
(
stride
,
list
)
and
len
(
stride
)
==
2
_
,
input_height
,
input_width
=
data
.
shape
batch
,
_
,
input_height
,
input_width
=
data
.
shape
output_channels
,
_
,
kernel_height
,
kernel_width
=
kernel
.
shape
output_height
=
(
input_height
+
padding
[
0
]
+
padding
[
1
]
-
kernel_height
)
/
stride
[
0
]
+
1
output_width
=
(
input_width
+
padding
[
0
]
+
padding
[
1
]
-
kernel_width
)
/
stride
[
1
]
+
1
return
_api
.
extern
(
(
output_channels
,
output_height
,
output_width
),
[
data
,
kernel
,
bias
],
(
batch
,
output_channels
,
output_height
,
output_width
),
[
data
,
kernel
,
bias
]
if
bias
is
not
None
else
[
data
,
kernel
],
lambda
ins
,
outs
:
_intrin
.
call_packed
(
"tvm.contrib.nnpack.convolution_inference"
,
ins
[
0
],
ins
[
1
],
ins
[
2
],
"tvm.contrib.nnpack.convolution_inference"
,
ins
[
0
],
ins
[
1
],
ins
[
2
]
if
bias
is
not
None
else
0
,
outs
[
0
],
padding
[
0
],
padding
[
1
],
padding
[
2
],
padding
[
3
],
stride
[
0
],
stride
[
1
],
nthreads
),
name
=
"C"
)
stride
[
0
],
stride
[
1
],
nthreads
,
algorithm
),
name
=
"C"
)
def
convolution_inference_without_weight_transform
(
data
,
transformed_kernel
,
bias
,
padding
,
stride
,
nthreads
=
1
,
algorithm
=
ConvolutionAlgorithm
.
AUTO
):
"""Create an extern op to do inference convolution of 4D tensor data and
4D pre-transformed tensor kernel and 1D tensor bias with nnpack.
Parameters
----------
data : Tensor
data 4D tensor input[batch][input_channels][input_height][input_width] of
FP32 elements.
transformed_kernel : Tensor
transformed_kernel 4D tensor kernel[output_channels][input_channels][tile]
[tile] of FP32 elements.
bias : Tensor
bias 1D array bias[output_channels][input_channels][kernel_height]
[kernel_width] of FP32 elements.
padding : list
padding A 4-dim list of [pad_top, pad_bottom, pad_left, pad_right],
which indicates the padding around the feature map.
stride : list
stride A 2-dim list of [stride_height, stride_width], which indicates
the stride.
Returns
-------
output : Tensor
output 4D tensor output[batch][output_channels][output_height][output_width]
of FP32 elements.
"""
assert
algorithm
in
(
ConvolutionAlgorithm
.
WT_8x8
,
ConvolutionAlgorithm
.
WT_8x8_FP16
)
assert
isinstance
(
padding
,
list
)
and
len
(
padding
)
==
4
assert
isinstance
(
stride
,
list
)
and
len
(
stride
)
==
2
batch
,
_
,
input_height
,
input_width
=
data
.
shape
output_channels
,
_
,
_
,
_
=
transformed_kernel
.
shape
kernel_height
,
kernel_width
=
(
3
,
3
)
output_height
=
(
input_height
+
padding
[
0
]
+
padding
[
1
]
-
kernel_height
)
/
stride
[
0
]
+
1
output_width
=
(
input_width
+
padding
[
0
]
+
padding
[
1
]
-
kernel_width
)
/
stride
[
1
]
+
1
return
_api
.
extern
(
(
batch
,
output_channels
,
output_height
,
output_width
),
[
data
,
transformed_kernel
,
bias
]
if
bias
is
not
None
else
[
data
,
transformed_kernel
],
lambda
ins
,
outs
:
_intrin
.
call_packed
(
"tvm.contrib.nnpack.convolution_inference_without_weight_transform"
,
ins
[
0
],
ins
[
1
],
ins
[
2
]
if
bias
is
not
None
else
0
,
outs
[
0
],
padding
[
0
],
padding
[
1
],
padding
[
2
],
padding
[
3
],
stride
[
0
],
stride
[
1
],
nthreads
,
algorithm
),
name
=
"C"
)
def
convolution_inference_weight_transform
(
kernel
,
nthreads
=
1
,
algorithm
=
ConvolutionAlgorithm
.
AUTO
):
"""Create an extern op to do inference convolution of 3D tensor data and
4D tensor kernel and 1D tensor bias with nnpack.
Parameters
----------
kernel : Tensor
kernel 4D tensor kernel[output_channels][input_channels][kernel_height]
[kernel_width] of FP32 elements.
Returns
-------
output : Tensor
output 4D tensor output[output_channels][input_channels][tile][tile]
of FP32 elements.
"""
assert
algorithm
in
(
ConvolutionAlgorithm
.
WT_8x8
,
ConvolutionAlgorithm
.
WT_8x8_FP16
)
output_channels
,
input_channels
,
_
,
_
=
kernel
.
shape
transform_tile_size
=
8
return
_api
.
extern
(
(
output_channels
,
input_channels
,
transform_tile_size
,
transform_tile_size
),
[
kernel
],
lambda
ins
,
outs
:
_intrin
.
call_packed
(
"tvm.contrib.nnpack.convolution_inference_weight_transform"
,
ins
[
0
],
outs
[
0
],
nthreads
,
algorithm
),
name
=
"transform_kernel"
)
def
convolution_output
(
data
,
kernel
,
bias
,
padding
,
nthreads
=
1
):
"""Create an extern op to compute convolution of 4D tensor data and
...
...
@@ -144,4 +247,5 @@ def convolution_output(data, kernel, bias, padding, nthreads=1):
"tvm.contrib.nnpack.convolution_output"
,
ins
[
0
],
ins
[
1
],
ins
[
2
],
outs
[
0
],
padding
[
0
],
padding
[
1
],
padding
[
2
],
padding
[
3
],
nthreads
),
name
=
"C"
)
_init_api
(
"tvm.contrib.nnpack"
)
src/contrib/nnpack/convolution.cc
View file @
fc83c7f2
This diff is collapsed.
Click to expand it.
src/contrib/nnpack/nnpack_utils.cc
View file @
fc83c7f2
...
...
@@ -10,20 +10,30 @@ using namespace runtime;
typedef
dmlc
::
ThreadLocalStore
<
NNPackThreadLocalEntry
>
NNPackThreadLocalStore
;
NNPackThreadLocalEntry
*
NNPackThreadLocalEntry
::
ThreadLocal
()
{
return
NNPackThreadLocalStore
::
Get
();
}
bool
NNPackConfig
(
uint64_t
nthreads
)
{
NNPackThreadLocalEntry
*
entry
=
NNPackThreadLocalEntry
::
ThreadLocal
();
if
(
entry
->
threadpool
!=
NULL
&&
pthreadpool_get_threads_count
(
entry
->
threadpool
)
!=
nthreads
)
{
if
(
entry
->
threadpool
&&
pthreadpool_get_threads_count
(
entry
->
threadpool
)
==
nthreads
)
{
CHECK_NE
(
nthreads
,
1
);
return
true
;
}
if
(
entry
->
threadpool
)
{
pthreadpool_destroy
(
entry
->
threadpool
);
entry
->
threadpool
=
NULL
;
entry
->
threadpool
=
nullptr
;
}
if
(
entry
->
threadpool
==
NULL
)
{
entry
->
threadpool
=
pthreadpool_create
(
nthreads
);
if
(
nthreads
==
1
)
{
// a null threadpool means the function is invoked on the calling thread,
// which is the desired logic for nthreads == 1
CHECK
(
!
entry
->
threadpool
);
return
true
;
}
entry
->
threadpool
=
pthreadpool_create
(
nthreads
);
return
true
;
}
...
...
src/contrib/nnpack/nnpack_utils.h
View file @
fc83c7f2
...
...
@@ -15,7 +15,7 @@ namespace contrib {
using
namespace
runtime
;
struct
NNPackThreadLocalEntry
{
pthreadpool_t
threadpool
{
NULL
};
pthreadpool_t
threadpool
{
nullptr
};
static
NNPackThreadLocalEntry
*
ThreadLocal
();
};
...
...
tests/lint/pylintrc
View file @
fc83c7f2
...
...
@@ -290,10 +290,10 @@ variable-rgx=[a-z_][a-z0-9_]{2,30}$
variable-name-hint=[a-z_][a-z0-9_]{2,30}$
# Regular expression matching correct function names
function-rgx=[a-z_][a-z0-9_]{2,
30
}$
function-rgx=[a-z_][a-z0-9_]{2,
48
}$
# Naming hint for function names
function-name-hint=[a-z_][a-z0-9_]{2,
30
}$
function-name-hint=[a-z_][a-z0-9_]{2,
48
}$
# Regular expression matching correct class names
class-rgx=[A-Z_][a-zA-Z0-9]+$
...
...
tests/python/contrib/test_nnpack.py
View file @
fc83c7f2
...
...
@@ -100,7 +100,7 @@ def np_conv(na, nw, padding, stride=1):
return
nb
def
test_convolution_inference
():
BATCH
=
32
BATCH
=
8
IH
=
48
IW
=
48
IC
=
16
...
...
@@ -111,19 +111,17 @@ def test_convolution_inference():
OH
=
(
IH
+
2
*
PAD
-
K
)
+
1
OW
=
(
IW
+
2
*
PAD
-
K
)
+
1
dshape
=
(
IC
,
IH
,
IW
)
dshape
=
(
BATCH
,
IC
,
IH
,
IW
)
kshape
=
(
OC
,
IC
,
K
,
K
)
bshape
=
(
OC
,
)
oshape
=
(
OC
,
OH
,
OW
)
oshape
=
(
BATCH
,
OC
,
OH
,
OW
)
data
=
tvm
.
placeholder
(
dshape
,
name
=
'data'
)
kernel
=
tvm
.
placeholder
(
kshape
,
name
=
'kernel'
)
bias
=
tvm
.
placeholder
(
bshape
,
name
=
'bias'
)
output
=
nnpack
.
convolution_inference
(
data
,
kernel
,
bias
,
[
PAD
,
PAD
,
PAD
,
PAD
],
[
STRIDE
,
STRIDE
])
s
=
tvm
.
create_schedule
(
output
.
op
)
def
verify
(
target
=
"llvm"
):
def
verify
(
target
=
"llvm"
,
algorithm
=
nnpack
.
ConvolutionAlgorithm
.
AUTO
,
with_bias
=
True
):
if
not
tvm
.
module
.
enabled
(
target
):
print
(
"skip because
%
s is not enabled..."
%
target
)
return
...
...
@@ -131,6 +129,12 @@ def test_convolution_inference():
print
(
"skip because extern function is not available"
)
return
ctx
=
tvm
.
cpu
(
0
)
output
=
nnpack
.
convolution_inference
(
data
,
kernel
,
bias
if
with_bias
else
None
,
[
PAD
,
PAD
,
PAD
,
PAD
],
[
STRIDE
,
STRIDE
],
algorithm
=
algorithm
)
s
=
tvm
.
create_schedule
(
output
.
op
)
f
=
tvm
.
build
(
s
,
[
data
,
kernel
,
bias
,
output
],
target
)
na
=
np
.
random
.
uniform
(
size
=
dshape
)
.
astype
(
data
.
dtype
)
...
...
@@ -141,10 +145,77 @@ def test_convolution_inference():
tc
=
tvm
.
nd
.
array
(
nc
,
ctx
)
td
=
tvm
.
nd
.
array
(
np
.
zeros
(
oshape
,
dtype
=
output
.
dtype
),
ctx
)
f
(
ta
,
tb
,
tc
,
td
)
nd
=
np_conv
(
np
.
reshape
(
na
,
(
1
,
IC
,
IH
,
IW
)),
nb
,
PAD
,
STRIDE
)
nd
=
np_conv
(
np
.
reshape
(
na
,
(
BATCH
,
IC
,
IH
,
IW
)),
nb
,
PAD
,
STRIDE
)
+
nc
.
reshape
(
1
,
bshape
[
0
],
1
,
1
)
tvm
.
testing
.
assert_allclose
(
td
.
asnumpy
(),
nd
.
reshape
(
IC
,
IH
,
IW
),
rtol
=
1e-5
)
verify
()
td
.
asnumpy
(),
nd
.
reshape
(
BATCH
,
IC
,
IH
,
IW
),
rtol
=
1e-5
)
for
algorithm
in
[
nnpack
.
ConvolutionAlgorithm
.
AUTO
,
nnpack
.
ConvolutionAlgorithm
.
FFT_8x8
,
nnpack
.
ConvolutionAlgorithm
.
FFT_16x16
,
nnpack
.
ConvolutionAlgorithm
.
WT_8x8
,
nnpack
.
ConvolutionAlgorithm
.
IMPLICIT_GEMM
,
nnpack
.
ConvolutionAlgorithm
.
WT_8x8_FP16
,
]:
for
with_bias
in
[
True
,
False
]:
verify
(
algorithm
=
algorithm
,
with_bias
=
with_bias
)
def
test_convolution_inference_without_weight_transform
():
BATCH
=
6
IH
=
48
IW
=
48
IC
=
16
OC
=
16
K
=
3
PAD
=
1
STRIDE
=
1
OH
=
(
IH
+
2
*
PAD
-
K
)
+
1
OW
=
(
IW
+
2
*
PAD
-
K
)
+
1
dshape
=
(
BATCH
,
IC
,
IH
,
IW
)
kshape
=
(
OC
,
IC
,
K
,
K
)
bshape
=
(
OC
,
)
oshape
=
(
BATCH
,
OC
,
OH
,
OW
)
data
=
tvm
.
placeholder
(
dshape
,
name
=
'data'
)
kernel
=
tvm
.
placeholder
(
kshape
,
name
=
'kernel'
)
bias
=
tvm
.
placeholder
(
bshape
,
name
=
'bias'
)
def
verify
(
target
=
"llvm"
,
algorithm
=
nnpack
.
ConvolutionAlgorithm
.
AUTO
,
with_bias
=
True
):
if
not
tvm
.
module
.
enabled
(
target
):
print
(
"skip because
%
s is not enabled..."
%
target
)
return
if
not
tvm
.
get_global_func
(
"tvm.contrib.nnpack.fully_connected_inference"
,
True
):
print
(
"skip because extern function is not available"
)
return
ctx
=
tvm
.
cpu
(
0
)
transformed_kernel
=
nnpack
.
convolution_inference_weight_transform
(
kernel
,
algorithm
=
algorithm
)
output
=
nnpack
.
convolution_inference_without_weight_transform
(
data
,
transformed_kernel
,
bias
if
with_bias
else
None
,
[
PAD
,
PAD
,
PAD
,
PAD
],
[
STRIDE
,
STRIDE
],
algorithm
=
algorithm
)
s
=
tvm
.
create_schedule
(
output
.
op
)
f
=
tvm
.
build
(
s
,
[
data
,
kernel
,
bias
,
output
],
target
)
na
=
np
.
random
.
uniform
(
size
=
dshape
)
.
astype
(
data
.
dtype
)
nb
=
np
.
random
.
uniform
(
size
=
kshape
)
.
astype
(
kernel
.
dtype
)
nc
=
np
.
random
.
uniform
(
size
=
bshape
)
.
astype
(
bias
.
dtype
)
if
with_bias
else
np
.
zeros
(
bshape
,
dtype
=
bias
.
dtype
)
ta
=
tvm
.
nd
.
array
(
na
,
ctx
)
tb
=
tvm
.
nd
.
array
(
nb
,
ctx
)
tc
=
tvm
.
nd
.
array
(
nc
,
ctx
)
td
=
tvm
.
nd
.
array
(
np
.
zeros
(
oshape
,
dtype
=
output
.
dtype
),
ctx
)
f
(
ta
,
tb
,
tc
,
td
)
nd
=
np_conv
(
np
.
reshape
(
na
,
(
BATCH
,
IC
,
IH
,
IW
)),
nb
,
PAD
,
STRIDE
)
+
nc
.
reshape
(
1
,
bshape
[
0
],
1
,
1
)
tvm
.
testing
.
assert_allclose
(
td
.
asnumpy
(),
nd
.
reshape
(
BATCH
,
IC
,
IH
,
IW
),
rtol
=
1e-5
)
for
algorithm
in
[
nnpack
.
ConvolutionAlgorithm
.
WT_8x8
]:
for
with_bias
in
[
True
,
False
]:
verify
(
algorithm
=
algorithm
,
with_bias
=
with_bias
)
def
test_convolution_output
():
BATCH
=
32
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment