Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
T
tic
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
wenyuanbo
tic
Commits
f2b91392
Commit
f2b91392
authored
7 years ago
by
Tianqi Chen
Committed by
GitHub
7 years ago
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Support rank-0 tensor (#687)
* Support rank-0 tensor * fix lint
parent
df4962e2
Hide whitespace changes
Inline
Side-by-side
Showing
21 changed files
with
143 additions
and
57 deletions
+143
-57
include/tvm/buffer.h
+5
-0
include/tvm/packed_func_ext.h
+4
-0
include/tvm/tensor.h
+1
-1
python/tvm/_ffi/ndarray.py
+4
-3
python/tvm/api.py
+0
-1
python/tvm/tensor.py
+7
-1
src/arithmetic/compute_expr.h
+9
-3
src/lang/buffer.cc
+19
-18
src/pass/arg_binder.cc
+11
-9
src/pass/inject_double_buffer.cc
+2
-1
src/pass/inject_virtual_thread.cc
+2
-1
src/pass/storage_flatten.cc
+11
-4
src/pass/storage_rewrite.cc
+1
-1
src/runtime/c_runtime_api.cc
+7
-3
src/runtime/graph/graph_runtime.cc
+4
-2
src/schedule/schedule_dataflow_rewrite.cc
+3
-3
tests/python/unittest/test_codegen_device.py
+5
-4
tests/python/unittest/test_codegen_llvm.py
+25
-0
tests/python/unittest/test_lang_tensor.py
+13
-0
tests/python/unittest/test_runtime_packed_func.py
+8
-0
topi/python/topi/nn/dense.py
+2
-2
No files found.
include/tvm/buffer.h
View file @
f2b91392
...
...
@@ -124,6 +124,11 @@ class BufferNode : public Node {
v
->
Visit
(
"offset_factor"
,
&
offset_factor
);
}
/*! \return preferred index type for this buffer node */
Type
DefaultIndexType
()
const
{
return
shape
.
size
()
!=
0
?
shape
[
0
].
type
()
:
Int
(
32
);
}
// User can specify data_alignment and offset_factor to be 0
// A default value will be picked.
TVM_DLL
static
Buffer
make
(
Var
ptr
,
...
...
This diff is collapsed.
Click to expand it.
include/tvm/packed_func_ext.h
View file @
f2b91392
...
...
@@ -14,6 +14,7 @@
#include "./base.h"
#include "./expr.h"
#include "./tensor.h"
#include "./runtime/packed_func.h"
namespace
tvm
{
...
...
@@ -116,6 +117,9 @@ inline TVMArgValue::operator Halide::Expr() const {
if
(
sptr
->
is_type
<
IterVarNode
>
())
{
return
IterVar
(
sptr
)
->
var
;
}
if
(
sptr
->
is_type
<
TensorNode
>
())
{
return
Tensor
(
sptr
)();
}
CHECK
(
NodeTypeChecker
<
Expr
>::
Check
(
sptr
.
get
()))
<<
"Expected type "
<<
NodeTypeName
<
Expr
>
()
<<
" but get "
<<
sptr
->
type_key
();
...
...
This diff is collapsed.
Click to expand it.
include/tvm/tensor.h
View file @
f2b91392
...
...
@@ -188,7 +188,7 @@ inline bool Tensor::operator==(const Tensor& other) const {
#define DEFINE_OVERLOAD_SLICE_UNARY_OP(Op) \
inline Expr operator Op (const Tensor::Slice& a) { \
return Op a.operator Expr() ; \
}
}
\
#define DEFINE_OVERLOAD_SLICE_BINARY_OP(Op) \
template<typename T> \
...
...
This diff is collapsed.
Click to expand it.
python/tvm/_ffi/ndarray.py
View file @
f2b91392
...
...
@@ -177,13 +177,14 @@ class NDArrayBase(_NDArrayBase):
shape
=
shape
+
(
t
.
lanes
,)
t
.
lanes
=
1
dtype
=
str
(
t
)
source_array
=
np
.
ascontiguousarray
(
source_array
,
dtype
=
dtype
)
if
source_array
.
shape
!=
shape
:
raise
ValueError
(
"array shape do not match the shape of NDArray {0} vs {1}"
.
format
(
source_array
.
shape
,
shape
))
source_array
=
np
.
ascontiguousarray
(
source_array
,
dtype
=
dtype
)
assert
source_array
.
flags
[
'C_CONTIGUOUS'
]
data
=
source_array
.
ctypes
.
data_as
(
ctypes
.
c_void_p
)
nbytes
=
ctypes
.
c_size_t
(
np
.
prod
(
source_array
.
shape
)
*
source_array
.
dtype
.
itemsize
)
nbytes
=
ctypes
.
c_size_t
(
source_array
.
size
*
source_array
.
dtype
.
itemsize
)
check_call
(
_LIB
.
TVMArrayCopyFromBytes
(
self
.
handle
,
data
,
nbytes
))
return
self
...
...
@@ -212,7 +213,7 @@ class NDArrayBase(_NDArrayBase):
np_arr
=
np
.
empty
(
shape
,
dtype
=
dtype
)
assert
np_arr
.
flags
[
'C_CONTIGUOUS'
]
data
=
np_arr
.
ctypes
.
data_as
(
ctypes
.
c_void_p
)
nbytes
=
ctypes
.
c_size_t
(
np
.
prod
(
np_arr
.
shape
)
*
np_arr
.
dtype
.
itemsize
)
nbytes
=
ctypes
.
c_size_t
(
np
_arr
.
size
*
np_arr
.
dtype
.
itemsize
)
check_call
(
_LIB
.
TVMArrayCopyToBytes
(
self
.
handle
,
data
,
nbytes
))
return
np_arr
...
...
This diff is collapsed.
Click to expand it.
python/tvm/api.py
View file @
f2b91392
...
...
@@ -462,7 +462,6 @@ def decl_buffer(shape,
elem_offset
=
var
(
'
%
s_elem_offset'
%
name
,
shape
[
0
]
.
dtype
)
if
data
is
None
:
data
=
var
(
name
,
"handle"
)
return
_api_internal
.
_Buffer
(
data
,
dtype
,
shape
,
strides
,
elem_offset
,
name
,
scope
,
data_alignment
,
offset_factor
)
...
...
This diff is collapsed.
Click to expand it.
python/tvm/tensor.py
View file @
f2b91392
...
...
@@ -32,7 +32,7 @@ class TensorSlice(NodeGeneric, _expr.ExprOp):
itervar_cls
=
None
@register_node
class
Tensor
(
NodeBase
):
class
Tensor
(
NodeBase
,
_expr
.
ExprOp
):
"""Tensor object, to construct, see function.Tensor"""
def
__call__
(
self
,
*
indices
):
ndim
=
self
.
ndim
...
...
@@ -60,7 +60,13 @@ class Tensor(NodeBase):
def
__eq__
(
self
,
other
):
if
not
isinstance
(
other
,
Tensor
):
if
isinstance
(
other
,
_expr
.
ExprOp
):
return
_expr
.
EqualOp
(
self
,
other
)
return
False
if
self
.
ndim
==
0
and
other
.
ndim
==
0
:
raise
ValueError
(
"Equal == comparison among rank-0 tensor is ambiguous, "
"use Tensor.equal for content expression equvalence, "
"use Tensor.same_as for exact reference comparison"
)
return
_api_internal
.
_TensorEqual
(
self
,
other
)
@property
...
...
This diff is collapsed.
Click to expand it.
src/arithmetic/compute_expr.h
View file @
f2b91392
...
...
@@ -33,11 +33,14 @@ inline Expr ComputeExpr(Expr lhs, Expr rhs) {
/*!
* \brief Compute an reduction with Op
* \param values The input values.
* \param empty_value The value when return if it is empty, can be Expr()
* which will cause an error to be rasied.
* \tparam Op The computation operator
* \return The result.
*/
template
<
typename
Op
>
inline
Expr
ComputeReduce
(
const
Array
<
Expr
>&
values
);
inline
Expr
ComputeReduce
(
const
Array
<
Expr
>&
values
,
Expr
empty_value
);
template
<
typename
T
>
inline
bool
GetConst
(
Expr
e
,
T
*
out
);
...
...
@@ -139,8 +142,11 @@ inline Expr ComputeExpr<ir::Min>(Expr a, Expr b) {
}
template
<
typename
Op
>
inline
Expr
ComputeReduce
(
const
Array
<
Expr
>&
values
)
{
CHECK_NE
(
values
.
size
(),
0U
);
inline
Expr
ComputeReduce
(
const
Array
<
Expr
>&
values
,
Expr
empty_value
)
{
if
(
values
.
size
()
==
0U
)
{
CHECK
(
empty_value
.
defined
());
return
empty_value
;
}
Expr
res
=
values
[
0
];
for
(
size_t
i
=
1
;
i
<
values
.
size
();
++
i
)
{
res
=
ComputeExpr
<
Op
>
(
res
,
values
[
i
]);
...
...
This diff is collapsed.
Click to expand it.
src/lang/buffer.cc
View file @
f2b91392
...
...
@@ -11,15 +11,6 @@
namespace
tvm
{
Array
<
Expr
>
GetStrides
(
Array
<
Expr
>
shape
)
{
CHECK_NE
(
shape
.
size
(),
0U
);
std
::
vector
<
Expr
>
vec
{
make_const
(
shape
[
0
].
type
(),
1
)};
for
(
size_t
i
=
shape
.
size
()
-
1
;
i
!=
0
;
--
i
)
{
vec
.
push_back
(
shape
[
i
-
1
]
*
vec
.
back
());
}
return
Array
<
Expr
>
(
vec
.
rbegin
(),
vec
.
rend
());
}
Array
<
Expr
>
SimplifyArray
(
Array
<
Expr
>
array
)
{
for
(
size_t
i
=
0
;
i
<
array
.
size
();
++
i
)
{
array
.
Set
(
i
,
ir
::
Simplify
(
array
[
i
]));
...
...
@@ -235,10 +226,12 @@ inline Expr ElemOffset(const BufferNode* n, Array<Expr> index) {
Expr
base
=
n
->
elem_offset
;
if
(
n
->
strides
.
size
()
==
0
)
{
CHECK_EQ
(
n
->
shape
.
size
(),
index
.
size
());
if
(
is_zero
(
base
))
{
base
=
index
[
0
];
}
else
{
base
=
base
+
index
[
0
];
if
(
n
->
shape
.
size
()
!=
0
)
{
if
(
is_zero
(
base
))
{
base
=
index
[
0
];
}
else
{
base
=
base
+
index
[
0
];
}
}
base
=
MergeMulMod
(
base
);
for
(
size_t
i
=
1
;
i
<
index
.
size
();
++
i
)
{
...
...
@@ -294,9 +287,10 @@ Stmt Buffer::vstore(Array<Expr> begin, Expr value) const {
Buffer
Buffer
::
MakeStrideView
()
const
{
if
((
*
this
)
->
strides
.
size
()
!=
0
)
return
*
this
;
if
((
*
this
)
->
shape
.
size
()
==
0
)
return
*
this
;
std
::
vector
<
Expr
>
temp
;
auto
n
=
std
::
make_shared
<
BufferNode
>
(
*
operator
->
());
Expr
acc
=
make_const
(
n
->
shape
[
0
].
t
ype
(),
1
);
Expr
acc
=
make_const
(
n
->
DefaultIndexT
ype
(),
1
);
for
(
size_t
i
=
n
->
shape
.
size
();
i
!=
0
;
--
i
)
{
temp
.
push_back
(
acc
);
acc
=
acc
*
n
->
shape
[
i
-
1
];
...
...
@@ -344,9 +338,16 @@ Buffer Buffer::MakeSlice(Array<Expr> begins, Array<Expr> extents) const {
Expr
Buffer
::
access_ptr
(
int
access_mask
,
Type
ptr_type
,
int
content_lanes
)
const
{
const
BufferNode
*
self
=
operator
->
();
Expr
e_dtype
;
Expr
extent
=
(
self
->
strides
.
size
()
==
self
->
shape
.
size
()
?
arith
::
ComputeExpr
<
ir
::
Mul
>
(
self
->
strides
[
0
],
self
->
shape
[
0
])
:
arith
::
ComputeReduce
<
ir
::
Mul
>
(
self
->
shape
));
Expr
extent
;
if
(
self
->
shape
.
size
()
==
0
)
{
extent
=
make_const
(
self
->
DefaultIndexType
(),
1
);
}
else
if
(
self
->
strides
.
size
()
==
self
->
shape
.
size
())
{
int
highest_dim
=
0
;
extent
=
arith
::
ComputeExpr
<
ir
::
Mul
>
(
self
->
strides
[
highest_dim
],
self
->
shape
[
highest_dim
]);
}
else
{
extent
=
arith
::
ComputeReduce
<
ir
::
Mul
>
(
self
->
shape
,
Expr
());
}
Expr
elem_offset
=
self
->
elem_offset
;
if
(
content_lanes
>
1
)
{
e_dtype
=
make_zero
(
self
->
dtype
.
with_lanes
(
content_lanes
));
...
...
@@ -383,7 +384,7 @@ Buffer BufferNode::make(Var data,
}
n
->
scope
=
std
::
move
(
scope
);
if
(
!
elem_offset
.
defined
())
{
elem_offset
=
make_const
(
n
->
shape
[
0
].
t
ype
(),
0
);
elem_offset
=
make_const
(
n
->
DefaultIndexT
ype
(),
0
);
}
if
(
data_alignment
<=
0
)
{
data_alignment
=
runtime
::
kAllocAlignment
;
...
...
This diff is collapsed.
Click to expand it.
src/pass/arg_binder.cc
View file @
f2b91392
...
...
@@ -196,7 +196,7 @@ void ArgBinder::BindDLTensor(const Buffer& buffer,
nop
));
if
(
buffer
->
strides
.
size
()
==
0
)
{
// Assert the buffer is compact
Type
stype
=
buffer
->
shape
[
0
].
t
ype
();
Type
stype
=
buffer
->
DefaultIndexT
ype
();
Expr
expect_stride
=
make_const
(
stype
,
1
);
Array
<
Expr
>
conds
;
for
(
size_t
i
=
buffer
->
shape
.
size
();
i
!=
0
;
--
i
)
{
...
...
@@ -211,14 +211,16 @@ void ArgBinder::BindDLTensor(const Buffer& buffer,
std
::
ostringstream
stride_err_msg
;
stride_err_msg
<<
arg_name
<<
".strides:"
<<
" expected to be compact array"
;
Stmt
check
=
AssertStmt
::
make
(
arith
::
ComputeReduce
<
ir
::
And
>
(
conds
),
stride_err_msg
.
str
(),
Evaluate
::
make
(
0
));
Expr
is_null
=
Call
::
make
(
Bool
(
1
),
intrinsic
::
tvm_handle_is_null
,
{
v_strides
},
Call
::
PureIntrinsic
);
check
=
IfThenElse
::
make
(
Not
::
make
(
is_null
),
check
,
Stmt
());
init_nest_
.
emplace_back
(
Block
::
make
(
check
,
Evaluate
::
make
(
0
)));
if
(
conds
.
size
()
!=
0
)
{
Stmt
check
=
AssertStmt
::
make
(
arith
::
ComputeReduce
<
ir
::
And
>
(
conds
,
Expr
()),
stride_err_msg
.
str
(),
Evaluate
::
make
(
0
));
Expr
is_null
=
Call
::
make
(
Bool
(
1
),
intrinsic
::
tvm_handle_is_null
,
{
v_strides
},
Call
::
PureIntrinsic
);
check
=
IfThenElse
::
make
(
Not
::
make
(
is_null
),
check
,
Stmt
());
init_nest_
.
emplace_back
(
Block
::
make
(
check
,
Evaluate
::
make
(
0
)));
}
}
else
{
for
(
size_t
k
=
0
;
k
<
buffer
->
strides
.
size
();
++
k
)
{
std
::
ostringstream
field_name
;
...
...
This diff is collapsed.
Click to expand it.
src/pass/inject_double_buffer.cc
View file @
f2b91392
...
...
@@ -81,7 +81,8 @@ class DoubleBufferInjector : public IRMutator {
Stmt
Mutate_
(
const
Allocate
*
op
,
const
Stmt
&
s
)
final
{
auto
it
=
dbuffer_info_
.
find
(
op
->
buffer_var
.
get
());
if
(
it
!=
dbuffer_info_
.
end
())
{
it
->
second
.
stride
=
arith
::
ComputeReduce
<
Mul
>
(
op
->
extents
)
*
op
->
type
.
lanes
();
it
->
second
.
stride
=
arith
::
ComputeReduce
<
Mul
>
(
op
->
extents
,
Expr
())
*
op
->
type
.
lanes
();
Stmt
stmt
=
IRMutator
::
Mutate_
(
op
,
s
);
op
=
stmt
.
as
<
Allocate
>
();
Array
<
Expr
>
new_extents
{
make_const
(
op
->
extents
[
0
].
type
(),
2
)};
...
...
This diff is collapsed.
Click to expand it.
src/pass/inject_virtual_thread.cc
View file @
f2b91392
...
...
@@ -376,7 +376,8 @@ class VTInjector : public IRMutator {
// always rewrite if not allow sharing.
if
(
touched_var_
.
count
(
op
->
buffer_var
.
get
())
||
!
allow_share_
)
{
// place v on highest dimension.
Expr
stride
=
arith
::
ComputeReduce
<
Mul
>
(
op
->
extents
)
*
op
->
type
.
lanes
();
Expr
stride
=
arith
::
ComputeReduce
<
Mul
>
(
op
->
extents
,
Expr
())
*
op
->
type
.
lanes
();
Array
<
Expr
>
other
;
other
.
push_back
(
make_const
(
op
->
extents
[
0
].
type
(),
num_threads_
));
for
(
Expr
e
:
extents
)
{
...
...
This diff is collapsed.
Click to expand it.
src/pass/storage_flatten.cc
View file @
f2b91392
...
...
@@ -147,10 +147,11 @@ class StorageFlattener : public IRMutator {
}
}
Array
<
Expr
>
strides
;
if
(
dim_align_
.
count
(
key
)
!=
0
)
{
if
(
dim_align_
.
count
(
key
)
!=
0
&&
shape
.
size
()
!=
0
)
{
std
::
vector
<
Expr
>
rstrides
;
const
std
::
vector
<
DimAlignInfo
>&
avec
=
dim_align_
[
key
];
Expr
stride
=
make_const
(
shape
[
0
].
type
(),
1
);
int
first_dim
=
0
;
Expr
stride
=
make_const
(
shape
[
first_dim
].
type
(),
1
);
for
(
size_t
i
=
shape
.
size
();
i
!=
0
;
--
i
)
{
size_t
dim
=
i
-
1
;
if
(
dim
<
avec
.
size
()
&&
avec
[
dim
].
align_factor
!=
0
)
{
...
...
@@ -164,6 +165,7 @@ class StorageFlattener : public IRMutator {
}
strides
=
Array
<
Expr
>
(
rstrides
.
rbegin
(),
rstrides
.
rend
());
}
e
.
buffer
=
BufferNode
::
make
(
Var
(
key
.
GetName
(),
Handle
()),
op
->
type
,
shape
,
strides
,
Expr
(),
...
...
@@ -176,13 +178,18 @@ class StorageFlattener : public IRMutator {
Stmt
ret
;
if
(
strides
.
size
()
!=
0
)
{
int
first_dim
=
0
;
ret
=
Allocate
::
make
(
e
.
buffer
->
data
,
e
.
buffer
->
dtype
,
{
arith
::
ComputeExpr
<
Mul
>
(
e
.
buffer
->
strides
[
0
],
e
.
buffer
->
shape
[
0
])},
{
arith
::
ComputeExpr
<
Mul
>
(
e
.
buffer
->
strides
[
first_dim
],
e
.
buffer
->
shape
[
first_dim
])},
make_const
(
Bool
(
e
.
buffer
->
dtype
.
lanes
()),
true
),
body
);
}
else
{
shape
=
e
.
buffer
->
shape
;
if
(
shape
.
size
()
==
0
)
{
shape
.
push_back
(
make_const
(
Int
(
32
),
1
));
}
ret
=
Allocate
::
make
(
e
.
buffer
->
data
,
e
.
buffer
->
dtype
,
e
.
buffer
->
shape
,
e
.
buffer
->
data
,
e
.
buffer
->
dtype
,
shape
,
make_const
(
Bool
(
e
.
buffer
->
dtype
.
lanes
()),
true
),
body
);
}
ret
=
AttrStmt
::
make
(
...
...
This diff is collapsed.
Click to expand it.
src/pass/storage_rewrite.cc
View file @
f2b91392
...
...
@@ -405,7 +405,7 @@ class StoragePlanRewriter : public IRMutator {
// Build a merged allocation
Expr
combo_size
;
for
(
const
Allocate
*
op
:
e
->
allocs
)
{
Expr
sz
=
arith
::
ComputeReduce
<
Mul
>
(
op
->
extents
);
Expr
sz
=
arith
::
ComputeReduce
<
Mul
>
(
op
->
extents
,
make_const
(
Int
(
32
),
1
)
);
if
(
alloc_type
.
lanes
()
!=
op
->
type
.
lanes
())
{
sz
=
(
sz
*
make_const
(
sz
.
type
(),
op
->
type
.
lanes
())
+
make_const
(
sz
.
type
(),
alloc_type
.
lanes
()
-
1
))
/
...
...
This diff is collapsed.
Click to expand it.
src/runtime/c_runtime_api.cc
View file @
f2b91392
...
...
@@ -352,9 +352,13 @@ int TVMArrayAlloc(const tvm_index_t* shape,
arr
->
dtype
.
code
=
static_cast
<
uint8_t
>
(
dtype_code
);
arr
->
dtype
.
bits
=
static_cast
<
uint8_t
>
(
dtype_bits
);
arr
->
dtype
.
lanes
=
static_cast
<
uint16_t
>
(
dtype_lanes
);
tvm_index_t
*
shape_copy
=
new
tvm_index_t
[
ndim
];
std
::
copy
(
shape
,
shape
+
ndim
,
shape_copy
);
arr
->
shape
=
shape_copy
;
if
(
ndim
!=
0
)
{
tvm_index_t
*
shape_copy
=
new
tvm_index_t
[
ndim
];
std
::
copy
(
shape
,
shape
+
ndim
,
shape_copy
);
arr
->
shape
=
shape_copy
;
}
else
{
arr
->
shape
=
nullptr
;
}
// ctx
arr
->
ctx
.
device_type
=
static_cast
<
DLDeviceType
>
(
device_type
);
arr
->
ctx
.
device_id
=
device_id
;
...
...
This diff is collapsed.
Click to expand it.
src/runtime/graph/graph_runtime.cc
View file @
f2b91392
...
...
@@ -370,8 +370,10 @@ void GraphRuntime::LoadDLTensor(dmlc::Stream* strm, DLTensor* dst) {
CHECK
(
strm
->
Read
(
&
tensor
.
dtype
,
sizeof
(
tensor
.
dtype
)))
<<
"Invalid DLTensor file format"
;
std
::
vector
<
int64_t
>
shape
(
tensor
.
ndim
);
CHECK
(
strm
->
Read
(
&
shape
[
0
],
sizeof
(
int64_t
)
*
tensor
.
ndim
))
<<
"Invalid DLTensor file format"
;
if
(
tensor
.
ndim
!=
0
)
{
CHECK
(
strm
->
Read
(
&
shape
[
0
],
sizeof
(
int64_t
)
*
tensor
.
ndim
))
<<
"Invalid DLTensor file format"
;
}
CHECK_EQ
(
tensor
.
ndim
,
dst
->
ndim
)
<<
"param dimension mismatch"
;
CHECK
(
tensor
.
dtype
.
bits
==
dst
->
dtype
.
bits
&&
tensor
.
dtype
.
code
==
dst
->
dtype
.
code
&&
...
...
This diff is collapsed.
Click to expand it.
src/schedule/schedule_dataflow_rewrite.cc
View file @
f2b91392
...
...
@@ -47,10 +47,10 @@ Expr InjectPredicate(const Array<Expr>& predicates,
const
Reduce
*
reduce
=
body
.
as
<
Reduce
>
();
if
(
reduce
)
{
std
::
shared_ptr
<
Reduce
>
n
=
std
::
make_shared
<
Reduce
>
(
*
reduce
);
n
->
condition
=
n
->
condition
&&
arith
::
ComputeReduce
<
ir
::
And
>
(
predicates
);
n
->
condition
=
n
->
condition
&&
arith
::
ComputeReduce
<
ir
::
And
>
(
predicates
,
Expr
()
);
return
Expr
(
n
);
}
return
Select
::
make
(
arith
::
ComputeReduce
<
ir
::
And
>
(
predicates
),
return
Select
::
make
(
arith
::
ComputeReduce
<
ir
::
And
>
(
predicates
,
Expr
()
),
body
,
make_zero
(
body
.
type
()));
}
...
...
@@ -467,7 +467,7 @@ Array<Tensor> Schedule::rfactor(const Tensor& tensor,
const
Reduce
*
reduce
=
compute_op
->
body
[
idx
].
as
<
Reduce
>
();
CHECK
(
reduce
)
<<
"Can only rfactor non-inline reductions"
;
predicates
.
push_back
(
reduce
->
condition
);
Expr
predicate
=
arith
::
ComputeReduce
<
ir
::
And
>
(
predicates
);
Expr
predicate
=
arith
::
ComputeReduce
<
ir
::
And
>
(
predicates
,
Expr
()
);
std
::
unordered_map
<
const
Variable
*
,
Expr
>
vsub
;
...
...
This diff is collapsed.
Click to expand it.
tests/python/unittest/test_codegen_device.py
View file @
f2b91392
...
...
@@ -5,8 +5,8 @@ import numpy as np
def
test_add_pipeline
():
n
=
tvm
.
var
(
'n'
)
A
=
tvm
.
placeholder
((
n
,),
name
=
'A'
)
B
=
tvm
.
placeholder
((
n
,
),
name
=
'B'
)
C
=
tvm
.
compute
(
A
.
shape
,
lambda
*
i
:
A
(
*
i
)
+
B
(
*
i
),
name
=
'C'
)
B
=
tvm
.
placeholder
((),
name
=
'B'
)
C
=
tvm
.
compute
(
A
.
shape
,
lambda
*
i
:
A
(
*
i
)
+
B
(),
name
=
'C'
)
D
=
tvm
.
compute
(
A
.
shape
,
lambda
*
i
:
C
(
*
i
)
+
1
,
name
=
'D'
)
s
=
tvm
.
create_schedule
(
D
.
op
)
...
...
@@ -48,7 +48,7 @@ def test_add_pipeline():
# launch the kernel.
n
=
1027
a
=
tvm
.
nd
.
array
(
np
.
random
.
uniform
(
size
=
n
)
.
astype
(
Ab
.
dtype
),
ctx
)
b
=
tvm
.
nd
.
array
(
np
.
random
.
uniform
(
size
=
n
)
.
astype
(
Bb
.
dtype
),
ctx
)
b
=
tvm
.
nd
.
array
(
np
.
random
.
uniform
(
size
=
()
)
.
astype
(
Bb
.
dtype
),
ctx
)
d
=
tvm
.
nd
.
array
(
np
.
zeros
(
n
,
dtype
=
Db
.
dtype
),
ctx
)
f
(
a
,
b
,
d
)
np
.
testing
.
assert_allclose
(
...
...
@@ -72,7 +72,7 @@ def test_add_pipeline():
# launch the kernel.
n
=
1027
a
=
tvm
.
nd
.
array
(
np
.
random
.
uniform
(
size
=
n
)
.
astype
(
Ab
.
dtype
),
ctx
)
b
=
tvm
.
nd
.
array
(
np
.
random
.
uniform
(
size
=
n
)
.
astype
(
Bb
.
dtype
),
ctx
)
b
=
tvm
.
nd
.
array
(
np
.
random
.
uniform
(
size
=
()
)
.
astype
(
Bb
.
dtype
),
ctx
)
d
=
tvm
.
nd
.
array
(
np
.
zeros
(
n
,
dtype
=
Db
.
dtype
),
ctx
)
f
(
a
,
b
,
d
)
np
.
testing
.
assert_allclose
(
...
...
@@ -84,5 +84,6 @@ def test_add_pipeline():
check_target
(
"nvptx"
,
host
=
"llvm"
)
check_target
(
"rocm"
,
host
=
"llvm"
)
if
__name__
==
"__main__"
:
test_add_pipeline
()
This diff is collapsed.
Click to expand it.
tests/python/unittest/test_codegen_llvm.py
View file @
f2b91392
...
...
@@ -273,7 +273,32 @@ def test_llvm_bool():
check_llvm
(
64
)
def
test_rank_zero
():
def
check_llvm
(
n
):
if
not
tvm
.
module
.
enabled
(
"llvm"
):
return
A
=
tvm
.
placeholder
((
n
,
),
name
=
'A'
)
scale
=
tvm
.
placeholder
((),
name
=
'scale'
)
k
=
tvm
.
reduce_axis
((
0
,
n
),
name
=
"k"
)
C
=
tvm
.
compute
((),
lambda
:
tvm
.
sum
(
A
[
k
]
*
scale
,
axis
=
k
),
name
=
"C"
)
D
=
tvm
.
compute
((),
lambda
:
C
+
1
)
s
=
tvm
.
create_schedule
(
D
.
op
)
# build and invoke the kernel.
f
=
tvm
.
build
(
s
,
[
A
,
scale
,
D
],
"llvm"
)
ctx
=
tvm
.
cpu
(
0
)
# launch the kernel.
a
=
tvm
.
nd
.
array
(
np
.
random
.
randint
(
0
,
2
,
size
=
(
n
,))
.
astype
(
A
.
dtype
),
ctx
)
sc
=
tvm
.
nd
.
array
(
np
.
random
.
randint
(
0
,
2
,
size
=
())
.
astype
(
scale
.
dtype
),
ctx
)
d
=
tvm
.
nd
.
empty
((),
D
.
dtype
,
ctx
)
f
(
a
,
sc
,
d
)
d_np
=
np
.
sum
(
a
.
asnumpy
())
*
sc
.
asnumpy
()
+
1
np
.
testing
.
assert_allclose
(
d
.
asnumpy
(),
d_np
)
check_llvm
(
64
)
if
__name__
==
"__main__"
:
test_rank_zero
()
test_llvm_bool
()
test_llvm_persist_parallel
()
test_llvm_select
()
...
...
This diff is collapsed.
Click to expand it.
tests/python/unittest/test_lang_tensor.py
View file @
f2b91392
...
...
@@ -19,6 +19,17 @@ def test_tensor():
assert
(
T
[
0
][
0
][
0
]
.
astype
(
'float16'
)
.
dtype
==
'float16'
)
def
test_rank_zero
():
m
=
tvm
.
var
(
'm'
)
A
=
tvm
.
placeholder
((
m
,),
name
=
'A'
)
scale
=
tvm
.
placeholder
((),
name
=
's'
)
k
=
tvm
.
reduce_axis
((
0
,
m
),
name
=
"k"
)
T
=
tvm
.
compute
((),
lambda
:
tvm
.
sum
(
A
[
k
]
*
scale
(),
axis
=
k
))
print
(
T
)
print
(
T
.
op
.
body
)
assert
(
tuple
(
T
.
shape
)
==
())
def
test_conv1d
():
n
=
tvm
.
var
(
'n'
)
A
=
tvm
.
placeholder
((
n
+
2
),
name
=
'A'
)
...
...
@@ -173,7 +184,9 @@ def test_tensor_inputs():
y
=
tvm
.
compute
(
x
.
shape
,
lambda
i
:
x
[
i
]
+
x
[
i
])
assert
tuple
(
y
.
op
.
input_tensors
)
==
(
x
,)
if
__name__
==
"__main__"
:
test_rank_zero
()
test_tensor_inputs
()
test_tensor_reduce_multi_axis
()
test_conv1d
()
...
...
This diff is collapsed.
Click to expand it.
tests/python/unittest/test_runtime_packed_func.py
View file @
f2b91392
...
...
@@ -63,7 +63,15 @@ def test_byte_array():
f
(
a
)
def
test_empty_array
():
def
myfunc
(
ss
):
assert
tuple
(
ss
)
==
()
x
=
tvm
.
convert
(())
tvm
.
convert
(
myfunc
)(
x
)
if
__name__
==
"__main__"
:
test_empty_array
()
test_get_global
()
test_get_callback_with_node
()
test_convert
()
...
...
This diff is collapsed.
Click to expand it.
topi/python/topi/nn/dense.py
View file @
f2b91392
...
...
@@ -25,7 +25,7 @@ def dense(data, weight, bias=None):
"""
assert
len
(
data
.
shape
)
==
2
and
len
(
weight
.
shape
)
==
2
,
\
"only support 2-dim dense"
if
bias
:
if
bias
is
not
None
:
assert
len
(
bias
.
shape
)
==
1
batch
,
in_dim
=
data
.
shape
out_dim
,
_
=
weight
.
shape
...
...
@@ -33,7 +33,7 @@ def dense(data, weight, bias=None):
matmul
=
tvm
.
compute
((
batch
,
out_dim
),
\
lambda
i
,
j
:
tvm
.
sum
(
data
[
i
,
k
]
*
weight
[
j
,
k
],
axis
=
k
),
\
tag
=
'dense'
)
if
bias
:
if
bias
is
not
None
:
matmul
=
tvm
.
compute
((
batch
,
out_dim
),
\
lambda
i
,
j
:
matmul
[
i
,
j
]
+
bias
[
j
],
\
tag
=
tag
.
BROADCAST
)
...
...
This diff is collapsed.
Click to expand it.
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment