Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
T
tic
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
wenyuanbo
tic
Commits
2c512ca7
Commit
2c512ca7
authored
Mar 02, 2017
by
Tianqi Chen
Committed by
GitHub
Mar 02, 2017
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
[LLVM] Vectorized load/store (#60)
parent
2111bbf3
Show whitespace changes
Inline
Side-by-side
Showing
7 changed files
with
396 additions
and
39 deletions
+396
-39
src/arithmetic/compute_expr.h
+1
-1
src/arithmetic/modular.cc
+10
-1
src/arithmetic/modular.h
+8
-0
src/codegen/llvm/codegen_llvm.cc
+290
-30
src/codegen/llvm/codegen_llvm.h
+22
-1
src/pass/vectorize_loop.cc
+3
-1
tests/python/unittest/test_codegen_llvm.py
+62
-5
No files found.
src/arithmetic/compute_expr.h
View file @
2c512ca7
...
...
@@ -80,7 +80,7 @@ inline bool GetConstInt(Expr e, int* out) {
} \
uint64_t ua = 0, ub = 0; \
if (GetConst(a, &ua) && GetConst(b, &ub)) { \
return ir::UIntImm::make(a.type(), ua
+ ub);
\
return ir::UIntImm::make(a.type(), ua
OP ub);
\
} \
template
<>
...
...
src/arithmetic/modular.cc
View file @
2c512ca7
...
...
@@ -113,7 +113,7 @@ class ModularEvaluator
private
:
const
std
::
unordered_map
<
const
Variable
*
,
ModularEntry
>&
mod_map_
;
friend
struct
ModularEntry
;
// simplify the base by putting it in range.
static
int
BaseSimplify
(
int
base
,
int
coeff
)
{
if
(
coeff
==
0
)
return
base
;
...
...
@@ -136,6 +136,15 @@ class ModularEvaluator
}
};
ModularEntry
ModularEntry
::
Add
(
const
ModularEntry
&
a
,
const
ModularEntry
&
b
)
{
ModularEntry
ret
;
ret
.
coeff
=
ModularEvaluator
::
ZeroAwareGCD
(
a
.
coeff
,
b
.
coeff
);
ret
.
base
=
ModularEvaluator
::
BaseSimplify
(
a
.
base
+
b
.
base
,
ret
.
coeff
);
return
ret
;
}
ModularEntry
EvalModular
(
const
Expr
&
e
,
const
std
::
unordered_map
<
const
Variable
*
,
ModularEntry
>&
mod_map
)
{
...
...
src/arithmetic/modular.h
View file @
2c512ca7
...
...
@@ -37,6 +37,14 @@ struct ModularEntry {
e
.
base
=
0
;
e
.
coeff
=
1
;
return
e
;
}
/*!
* \brief Add two modular entries together to get a new modular entry.
* \param a The left operand.
* \param b The right operand.
* \return The combined modular entry.
*/
static
ModularEntry
Add
(
const
ModularEntry
&
a
,
const
ModularEntry
&
b
);
};
/*!
...
...
src/codegen/llvm/codegen_llvm.cc
View file @
2c512ca7
...
...
@@ -102,8 +102,14 @@ void CodeGenLLVM::InitGlobalContext() {
gv_mod_ctx_
->
setInitializer
(
llvm
::
Constant
::
getNullValue
(
t_void_p_
));
}
void
CodeGenLLVM
::
AddFunction
(
const
LoweredFunc
&
f
)
{
void
CodeGenLLVM
::
InitFuncState
(
)
{
var_map_
.
clear
();
align_map_
.
clear
();
alloc_storage_scope_
.
clear
();
}
void
CodeGenLLVM
::
AddFunction
(
const
LoweredFunc
&
f
)
{
this
->
InitFuncState
();
CHECK
(
!
module_
->
getFunction
(
f
->
name
))
<<
"Function "
<<
f
->
name
<<
"already exists in module"
;
std
::
vector
<
llvm
::
Type
*>
arg_type
;
...
...
@@ -163,6 +169,7 @@ class FPassManager : public llvm::legacy::FunctionPassManager {
llvm
::
legacy
::
FunctionPassManager
::
add
(
p
);
}
};
class
MPassManager
:
public
llvm
::
legacy
::
PassManager
{
public
:
// override add to allow messaging
...
...
@@ -245,6 +252,7 @@ void CodeGenLLVM::AddAliasInfo(
int
base
=
0
,
width
=
0
;
// create meta-data for alias analysis
// Use a group of binary tree ranges.
if
(
index
.
defined
())
{
const
Ramp
*
ramp
=
index
.
as
<
Ramp
>
();
if
(
ramp
)
{
int
base
,
stride
;
...
...
@@ -263,7 +271,7 @@ void CodeGenLLVM::AddAliasInfo(
}
else
{
if
(
arith
::
GetConstInt
(
index
,
&
base
))
width
=
1
;
}
}
llvm
::
MDNode
*
meta
=
md_tbaa_root_
;
std
::
ostringstream
buffer_addr
;
buffer_addr
<<
buffer
;
...
...
@@ -283,12 +291,12 @@ void CodeGenLLVM::AddAliasInfo(
}
llvm
::
Value
*
CodeGenLLVM
::
CreateBroadcast
(
llvm
::
Value
*
value
,
int
lanes
)
{
llvm
::
Constant
*
init
=
llvm
::
UndefValue
::
get
(
llvm
::
Constant
*
undef
=
llvm
::
UndefValue
::
get
(
llvm
::
VectorType
::
get
(
value
->
getType
(),
lanes
));
llvm
::
Constant
*
zero
=
ConstInt32
(
0
);
value
=
builder_
->
CreateInsertElement
(
init
,
value
,
zero
);
value
=
builder_
->
CreateInsertElement
(
undef
,
value
,
zero
);
llvm
::
Constant
*
mask
=
llvm
::
ConstantVector
::
getSplat
(
lanes
,
zero
);
return
builder_
->
CreateShuffleVector
(
value
,
init
,
mask
);
return
builder_
->
CreateShuffleVector
(
value
,
undef
,
mask
);
}
llvm
::
Value
*
CodeGenLLVM
::
CreateBufferPtr
(
...
...
@@ -684,6 +692,38 @@ llvm::Value* CodeGenLLVM::CreateIntrinstic(const Call* op) {
return
nullptr
;
}
int
CodeGenLLVM
::
NativeVectorBits
(
const
std
::
string
&
storage_scope
)
const
{
// By default, we ask the buffer to be aligned to 64 bytes
return
64
*
8
;
}
void
CodeGenLLVM
::
GetAlignment
(
Type
t
,
const
Variable
*
buf_var
,
const
Expr
&
index
,
int
*
p_alignment
,
int
*
p_native_bits
)
{
int
&
alignment
=
*
p_alignment
;
int
&
native_bits
=
*
p_native_bits
;
// The storage scope.
std
::
string
scope
;
auto
it
=
alloc_storage_scope_
.
find
(
buf_var
);
if
(
it
!=
alloc_storage_scope_
.
end
())
{
scope
=
it
->
second
;
}
arith
::
ModularEntry
m
=
EvalModular
(
index
,
align_map_
);
native_bits
=
NativeVectorBits
(
scope
);
alignment
=
t
.
element_of
().
bits
();
// find alignment
while
((
m
.
coeff
&
1
)
==
0
&&
(
m
.
base
&
1
)
==
0
&&
alignment
<
native_bits
)
{
m
.
coeff
/=
2
;
m
.
base
/=
2
;
alignment
*=
2
;
}
CHECK_EQ
(
alignment
%
8
,
0
)
<<
"Load from memory that does not align to 8 bits"
;
alignment
/=
8
;
}
// visitor overrides
llvm
::
Value
*
CodeGenLLVM
::
VisitExpr_
(
const
Variable
*
op
)
{
return
GetVarValue
(
op
);
...
...
@@ -849,7 +889,9 @@ llvm::Value* CodeGenLLVM::VisitExpr_(const Select* op) {
llvm
::
Value
*
CodeGenLLVM
::
VisitExpr_
(
const
Let
*
op
)
{
llvm
::
Value
*
v
=
MakeValue
(
op
->
value
);
CHECK
(
!
var_map_
.
count
(
op
->
var
.
get
()));
CHECK
(
!
align_map_
.
count
(
op
->
var
.
get
()));
var_map_
[
op
->
var
.
get
()]
=
v
;
align_map_
[
op
->
var
.
get
()]
=
arith
::
EvalModular
(
op
->
value
,
align_map_
);
return
MakeValue
(
op
->
body
);
}
...
...
@@ -872,35 +914,206 @@ llvm::Value* CodeGenLLVM::VisitExpr_(const Ramp* op) {
return
value
;
}
void
CodeGenLLVM
::
Scalarize
(
const
Expr
&
e
,
std
::
function
<
void
(
int
i
,
llvm
::
Value
*
v
)
>
f
)
{
const
Ramp
*
ramp
=
e
.
as
<
Ramp
>
();
Type
t
=
e
.
type
();
if
(
ramp
)
{
for
(
int
i
=
0
;
i
<
t
.
lanes
();
++
i
)
{
Expr
offset
=
arith
::
ComputeExpr
<
Add
>
(
ramp
->
base
,
arith
::
ComputeExpr
<
Mul
>
(
ramp
->
stride
,
i
));
f
(
i
,
MakeValue
(
offset
));
}
}
else
{
llvm
::
Value
*
index
=
MakeValue
(
e
);
for
(
int
i
=
0
;
i
<
t
.
lanes
();
++
i
)
{
f
(
i
,
builder_
->
CreateExtractElement
(
index
,
ConstInt32
(
i
)));
}
}
}
llvm
::
Value
*
CodeGenLLVM
::
CreateVecFlip
(
llvm
::
Value
*
vec
)
{
int
lanes
=
static_cast
<
int
>
(
vec
->
getType
()
->
getVectorNumElements
());
std
::
vector
<
llvm
::
Constant
*>
indices
;
for
(
int
i
=
lanes
;
i
!=
0
;
--
i
)
{
indices
.
push_back
(
ConstInt32
(
i
-
1
));
}
llvm
::
Constant
*
undef
=
llvm
::
UndefValue
::
get
(
vec
->
getType
());
return
builder_
->
CreateShuffleVector
(
vec
,
undef
,
llvm
::
ConstantVector
::
get
(
indices
));
}
llvm
::
Value
*
CodeGenLLVM
::
CreateVecSlice
(
llvm
::
Value
*
vec
,
int
begin
,
int
lanes
)
{
int
total_lanes
=
static_cast
<
int
>
(
vec
->
getType
()
->
getVectorNumElements
());
CHECK_LE
(
begin
+
lanes
,
total_lanes
);
if
(
lanes
==
total_lanes
&&
begin
==
0
)
return
vec
;
std
::
vector
<
llvm
::
Constant
*>
indices
;
for
(
int
i
=
0
;
i
<
lanes
;
++
i
)
{
indices
.
push_back
(
ConstInt32
(
begin
+
i
));
}
llvm
::
Constant
*
undef
=
llvm
::
UndefValue
::
get
(
vec
->
getType
());
return
builder_
->
CreateShuffleVector
(
vec
,
undef
,
llvm
::
ConstantVector
::
get
(
indices
));
}
llvm
::
Value
*
CodeGenLLVM
::
CreateVecPad
(
llvm
::
Value
*
vec
,
int
target_lanes
)
{
int
lanes
=
static_cast
<
int
>
(
vec
->
getType
()
->
getVectorNumElements
());
if
(
target_lanes
==
lanes
)
return
vec
;
CHECK_GT
(
target_lanes
,
lanes
);
int
pad_lanes
=
target_lanes
-
lanes
;
llvm
::
Constant
*
undef
=
llvm
::
UndefValue
::
get
(
llvm
::
VectorType
::
get
(
vec
->
getType
()
->
getVectorElementType
(),
pad_lanes
));
std
::
vector
<
llvm
::
Constant
*>
indices
;
for
(
int
i
=
0
;
i
<
target_lanes
;
++
i
)
{
indices
.
push_back
(
ConstInt32
(
i
));
}
return
builder_
->
CreateShuffleVector
(
vec
,
undef
,
llvm
::
ConstantVector
::
get
(
indices
));
}
llvm
::
Value
*
CodeGenLLVM
::
CreateVecConcat
(
std
::
vector
<
llvm
::
Value
*>
vec
)
{
CHECK_NE
(
vec
.
size
(),
0U
);
int
target_lanes
=
0
;
for
(
llvm
::
Value
*
v
:
vec
)
{
target_lanes
+=
static_cast
<
int
>
(
v
->
getType
()
->
getVectorNumElements
());
}
// tree shape merging
while
(
vec
.
size
()
!=
1
)
{
std
::
vector
<
llvm
::
Value
*>
merged
;
for
(
size_t
i
=
0
;
i
<
vec
.
size
()
-
1
;
i
+=
2
)
{
llvm
::
Value
*
v1
=
vec
[
i
];
llvm
::
Value
*
v2
=
vec
[
i
+
1
];
int
w1
=
static_cast
<
int
>
(
v1
->
getType
()
->
getVectorNumElements
());
int
w2
=
static_cast
<
int
>
(
v2
->
getType
()
->
getVectorNumElements
());
int
w
=
std
::
max
(
w1
,
w2
);
v1
=
CreateVecPad
(
v1
,
w
);
v2
=
CreateVecPad
(
v2
,
w
);
std
::
vector
<
llvm
::
Constant
*>
indices
;
for
(
int
i
=
0
;
i
<
w
*
2
;
++
i
)
{
indices
.
push_back
(
ConstInt32
(
i
));
}
merged
.
push_back
(
builder_
->
CreateShuffleVector
(
v1
,
v2
,
llvm
::
ConstantVector
::
get
(
indices
)));
}
if
(
vec
.
size
()
%
2
==
1
)
{
merged
.
push_back
(
vec
.
back
());
}
vec
=
merged
;
}
return
CreateVecSlice
(
vec
[
0
],
0
,
target_lanes
);
}
llvm
::
Value
*
CodeGenLLVM
::
VisitExpr_
(
const
Load
*
op
)
{
Type
t
=
op
->
type
;
CHECK
(
!
t
.
is_vector
()
);
const
Ramp
*
ramp
=
op
->
index
.
as
<
Ramp
>
(
);
llvm
::
Value
*
buf
=
GetVarValue
(
op
->
buffer_var
.
get
());
if
(
t
.
is_scalar
())
{
llvm
::
LoadInst
*
inst
=
builder_
->
CreateAlignedLoad
(
CreateBufferPtr
(
t
,
GetVarValue
(
op
->
buffer_var
.
get
()),
MakeValue
(
op
->
index
)),
CreateBufferPtr
(
t
,
buf
,
MakeValue
(
op
->
index
)),
data_layout_
->
getTypeAllocSize
(
LLVMType
(
t
)));
AddAliasInfo
(
inst
,
op
->
buffer_var
.
get
(),
op
->
index
);
return
inst
;
}
else
if
(
ramp
&&
is_one
(
ramp
->
stride
))
{
int
alignment
,
native_bits
;
GetAlignment
(
t
,
op
->
buffer_var
.
get
(),
ramp
->
base
,
&
alignment
,
&
native_bits
);
int
total_lanes
=
t
.
lanes
();
int
step
=
native_bits
/
t
.
bits
();
std
::
vector
<
llvm
::
Value
*>
loads
;
for
(
int
offset
=
0
;
offset
<
total_lanes
;
offset
+=
step
)
{
int
lanes
=
std
::
min
(
step
,
total_lanes
-
offset
);
Expr
base
=
arith
::
ComputeExpr
<
Add
>
(
ramp
->
base
,
make_const
(
ramp
->
base
.
type
(),
offset
));
llvm
::
Value
*
ptr
=
CreateBufferPtr
(
t
.
element_of
(),
buf
,
MakeValue
(
base
));
llvm
::
Type
*
vtype
=
llvm
::
VectorType
::
get
(
LLVMType
(
t
.
element_of
()),
lanes
)
->
getPointerTo
();
llvm
::
LoadInst
*
inst
=
builder_
->
CreateAlignedLoad
(
builder_
->
CreatePointerCast
(
ptr
,
vtype
),
alignment
);
AddAliasInfo
(
inst
,
op
->
buffer_var
.
get
(),
Ramp
::
make
(
base
,
make_const
(
base
.
type
(),
1
),
lanes
));
loads
.
push_back
(
inst
);
}
return
CreateVecConcat
(
loads
);
}
else
if
(
ramp
&&
is_const
(
ramp
->
stride
,
2
))
{
int
alignment
,
native_bits
;
GetAlignment
(
t
,
op
->
buffer_var
.
get
(),
ramp
->
base
,
&
alignment
,
&
native_bits
);
arith
::
ModularEntry
e
=
arith
::
EvalModular
(
ramp
->
base
,
align_map_
);
Type
bt
=
ramp
->
base
.
type
();
int
first_shift
,
next_shift
;
// If it is even base, and native alignments is bigger than twice
// of the type, to ensure safe loading.
if
(
e
.
coeff
%
2
==
0
&&
e
.
base
%
2
==
0
&&
native_bits
>=
t
.
bits
()
*
2
)
{
first_shift
=
0
;
next_shift
=
0
;
}
else
if
(
e
.
coeff
%
2
==
0
&&
e
.
base
%
2
==
1
)
{
// odd base, shift both to left.
first_shift
=
-
1
;
next_shift
=
-
1
;
}
else
{
LOG
(
FATAL
)
<<
"not yet supported"
;
return
nullptr
;
// save option, right part, safe option.
first_shift
=
0
;
next_shift
=
-
1
;
}
}
llvm
::
Value
*
CodeGenLLVM
::
VisitExpr_
(
const
Call
*
op
)
{
if
(
op
->
is_intrinsic
(
intrinsic
::
tvm_call_packed
))
{
return
CreateCallPacked
(
op
);
}
else
if
(
op
->
call_type
==
Call
::
Intrinsic
||
op
->
call_type
==
Call
::
PureIntrinsic
)
{
return
CreateIntrinstic
(
op
);
llvm
::
Value
*
first
=
MakeValue
(
Load
::
make
(
t
,
op
->
buffer_var
,
Ramp
::
make
(
arith
::
ComputeExpr
<
Add
>
(
ramp
->
base
,
make_const
(
bt
,
first_shift
)),
make_const
(
bt
,
1
),
ramp
->
lanes
)));
llvm
::
Value
*
next
=
MakeValue
(
Load
::
make
(
t
,
op
->
buffer_var
,
Ramp
::
make
(
arith
::
ComputeExpr
<
Add
>
(
ramp
->
base
,
make_const
(
bt
,
ramp
->
lanes
+
next_shift
)),
make_const
(
bt
,
1
),
ramp
->
lanes
)));
// shuffle
std
::
vector
<
llvm
::
Constant
*>
indices
;
int
target_index
=
0
;
for
(
int
i
=
0
;
i
<
ramp
->
lanes
;
++
i
)
{
int
idx
=
first_shift
+
i
;
if
(
idx
==
target_index
)
{
indices
.
push_back
(
ConstInt32
(
i
));
target_index
+=
2
;
}
}
for
(
int
i
=
0
;
i
<
ramp
->
lanes
;
++
i
)
{
int
idx
=
ramp
->
lanes
+
next_shift
+
i
;
if
(
idx
==
target_index
)
{
indices
.
push_back
(
ConstInt32
(
i
+
ramp
->
lanes
));
target_index
+=
2
;
}
}
CHECK_EQ
(
indices
.
size
(),
static_cast
<
size_t
>
(
ramp
->
lanes
));
return
builder_
->
CreateShuffleVector
(
first
,
next
,
llvm
::
ConstantVector
::
get
(
indices
));
}
else
if
(
ramp
&&
is_const
(
ramp
->
stride
,
-
1
))
{
int
lanes
=
ramp
->
type
.
lanes
();
Expr
neg_ramp
=
Ramp
::
make
(
arith
::
ComputeExpr
<
Sub
>
(
ramp
->
base
,
make_const
(
ramp
->
base
.
type
(),
lanes
-
1
)),
make_const
(
ramp
->
base
.
type
(),
1
),
lanes
);
// load value then flip
llvm
::
Value
*
v
=
MakeValue
(
Load
::
make
(
t
,
op
->
buffer_var
,
neg_ramp
));
return
CreateVecFlip
(
v
);
}
else
{
CHECK
(
op
->
call_type
==
Call
::
Extern
||
op
->
call_type
==
Call
::
PureExtern
);
return
CreateCallExtern
(
op
);
llvm
::
Value
*
ret
=
llvm
::
UndefValue
::
get
(
LLVMType
(
t
));
Scalarize
(
op
->
index
,
[
&
](
int
i
,
llvm
::
Value
*
offset
)
{
llvm
::
Value
*
ptr
=
CreateBufferPtr
(
t
.
element_of
(),
buf
,
offset
);
llvm
::
LoadInst
*
inst
=
builder_
->
CreateAlignedLoad
(
ptr
,
data_layout_
->
getTypeAllocSize
(
LLVMType
(
t
)));
AddAliasInfo
(
inst
,
op
->
buffer_var
.
get
(),
Expr
());
ret
=
builder_
->
CreateInsertElement
(
ret
,
inst
,
ConstInt32
(
i
));
});
return
ret
;
}
}
...
...
@@ -908,21 +1121,61 @@ llvm::Value* CodeGenLLVM::VisitExpr_(const Call* op) {
void
CodeGenLLVM
::
VisitStmt_
(
const
Store
*
op
)
{
llvm
::
Value
*
value
=
MakeValue
(
op
->
value
);
Type
t
=
op
->
value
.
type
();
CHECK
(
!
t
.
is_vector
());
const
Ramp
*
ramp
=
op
->
index
.
as
<
Ramp
>
();
llvm
::
Value
*
buf
=
GetVarValue
(
op
->
buffer_var
.
get
());
if
(
t
.
is_scalar
())
{
llvm
::
StoreInst
*
inst
=
builder_
->
CreateAlignedStore
(
value
,
CreateBufferPtr
(
t
,
GetVarValue
(
op
->
buffer_var
.
get
()),
MakeValue
(
op
->
index
)),
CreateBufferPtr
(
t
,
buf
,
MakeValue
(
op
->
index
)),
data_layout_
->
getTypeAllocSize
(
value
->
getType
()));
AddAliasInfo
(
inst
,
op
->
buffer_var
.
get
(),
op
->
index
);
}
else
if
(
ramp
&&
is_one
(
ramp
->
stride
))
{
int
alignment
,
native_bits
;
GetAlignment
(
t
,
op
->
buffer_var
.
get
(),
ramp
->
base
,
&
alignment
,
&
native_bits
);
int
total_lanes
=
t
.
lanes
();
int
step
=
native_bits
/
t
.
bits
();
// vector store.
for
(
int
offset
=
0
;
offset
<
total_lanes
;
offset
+=
step
)
{
int
lanes
=
std
::
min
(
step
,
total_lanes
-
offset
);
Expr
base
=
arith
::
ComputeExpr
<
Add
>
(
ramp
->
base
,
make_const
(
ramp
->
base
.
type
(),
offset
));
llvm
::
Value
*
ptr
=
CreateBufferPtr
(
t
.
element_of
(),
buf
,
MakeValue
(
base
));
llvm
::
Type
*
vtype
=
llvm
::
VectorType
::
get
(
LLVMType
(
t
.
element_of
()),
lanes
)
->
getPointerTo
();
llvm
::
StoreInst
*
inst
=
builder_
->
CreateAlignedStore
(
CreateVecSlice
(
value
,
offset
,
lanes
),
builder_
->
CreatePointerCast
(
ptr
,
vtype
),
alignment
);
AddAliasInfo
(
inst
,
op
->
buffer_var
.
get
(),
Ramp
::
make
(
base
,
make_const
(
base
.
type
(),
1
),
lanes
));
}
}
else
{
LOG
(
FATAL
)
<<
"not yet supported"
;
Scalarize
(
op
->
index
,
[
&
](
int
i
,
llvm
::
Value
*
offset
)
{
llvm
::
Value
*
ptr
=
CreateBufferPtr
(
t
.
element_of
(),
buf
,
offset
);
llvm
::
StoreInst
*
inst
=
builder_
->
CreateAlignedStore
(
builder_
->
CreateExtractElement
(
value
,
ConstInt32
(
i
)),
ptr
,
data_layout_
->
getTypeAllocSize
(
LLVMType
(
t
)));
AddAliasInfo
(
inst
,
op
->
buffer_var
.
get
(),
Expr
());
});
}
}
llvm
::
Value
*
CodeGenLLVM
::
VisitExpr_
(
const
Call
*
op
)
{
if
(
op
->
is_intrinsic
(
intrinsic
::
tvm_call_packed
))
{
return
CreateCallPacked
(
op
);
}
else
if
(
op
->
call_type
==
Call
::
Intrinsic
||
op
->
call_type
==
Call
::
PureIntrinsic
)
{
return
CreateIntrinstic
(
op
);
}
else
{
CHECK
(
op
->
call_type
==
Call
::
Extern
||
op
->
call_type
==
Call
::
PureExtern
);
return
CreateCallExtern
(
op
);
}
}
void
CodeGenLLVM
::
VisitStmt_
(
const
For
*
op
)
{
CHECK
(
is_zero
(
op
->
min
));
if
(
op
->
for_type
==
ForType
::
Serial
)
{
...
...
@@ -986,6 +1239,11 @@ void CodeGenLLVM::VisitStmt_(const Allocate* op) {
}
void
CodeGenLLVM
::
VisitStmt_
(
const
AttrStmt
*
op
)
{
if
(
op
->
type_key
==
ir
::
attr
::
storage_scope
)
{
const
Variable
*
v
=
op
->
node
.
as
<
Variable
>
();
CHECK
(
v
);
alloc_storage_scope_
[
v
]
=
op
->
value
.
as
<
StringImm
>
()
->
value
;
}
this
->
VisitStmt
(
op
->
body
);
}
...
...
@@ -1014,7 +1272,9 @@ void CodeGenLLVM::VisitStmt_(const AssertStmt* op) {
void
CodeGenLLVM
::
VisitStmt_
(
const
LetStmt
*
op
)
{
llvm
::
Value
*
v
=
MakeValue
(
op
->
value
);
CHECK
(
!
var_map_
.
count
(
op
->
var
.
get
()));
CHECK
(
!
align_map_
.
count
(
op
->
var
.
get
()));
var_map_
[
op
->
var
.
get
()]
=
v
;
align_map_
[
op
->
var
.
get
()]
=
arith
::
EvalModular
(
op
->
value
,
align_map_
);
this
->
VisitStmt
(
op
->
body
);
}
void
CodeGenLLVM
::
VisitStmt_
(
const
Block
*
op
)
{
...
...
src/codegen/llvm/codegen_llvm.h
View file @
2c512ca7
...
...
@@ -14,6 +14,7 @@
#include <vector>
#include <string>
#include "./llvm_common.h"
#include "../../arithmetic/modular.h"
namespace
tvm
{
namespace
codegen
{
...
...
@@ -109,18 +110,29 @@ class CodeGenLLVM :
virtual
llvm
::
Value
*
CreateCallExtern
(
const
Call
*
op
);
// create call into tvm packed function.
virtual
llvm
::
Value
*
CreateCallPacked
(
const
Call
*
op
);
// Scalarize e by iterating elements of e.
// f is a callback that takes index and v.
virtual
void
Scalarize
(
const
Expr
&
e
,
std
::
function
<
void
(
int
i
,
llvm
::
Value
*
v
)
>
f
);
protected
:
/*!
* \param t The original type.
* \return LLVM type of t
*/
llvm
::
Type
*
LLVMType
(
const
Type
&
t
)
const
;
// initialize the function state.
void
InitFuncState
();
// Get alignment given index.
void
GetAlignment
(
Type
t
,
const
Variable
*
buf_var
,
const
Expr
&
index
,
int
*
p_alignment
,
int
*
p_native_bits
);
// do a scalarize call with f
llvm
::
Value
*
CreateScalarizedCall
(
const
Call
*
op
,
llvm
::
Function
*
f
,
const
std
::
vector
<
llvm
::
Value
*>&
args
);
// apply optimization on the module.
virtual
void
Optimize
();
// Get the maximim storage align bits of buffer pointer given storage scope.
virtual
int
NativeVectorBits
(
const
std
::
string
&
storage_scope
)
const
;
// The IRBuilder.
using
IRBuilder
=
llvm
::
IRBuilder
<
llvm
::
ConstantFolder
,
llvm
::
IRBuilderDefaultInserter
>
;
// The current function
...
...
@@ -162,6 +174,8 @@ class CodeGenLLVM :
llvm
::
Function
*
f_tvm_parallel_for_
{
nullptr
};
// The acting body
llvm
::
BasicBlock
*
block_
{
nullptr
};
/*! \brief the storage scope of allocation */
std
::
unordered_map
<
const
Variable
*
,
std
::
string
>
alloc_storage_scope_
;
private
:
// comparison op
...
...
@@ -178,6 +192,11 @@ class CodeGenLLVM :
llvm
::
Value
*
CreateBufferPtr
(
Type
t
,
llvm
::
Value
*
buffer
,
llvm
::
Value
*
index
);
llvm
::
Value
*
CreateCast
(
Type
from
,
Type
to
,
llvm
::
Value
*
value
);
llvm
::
Value
*
GetPackedFuncHandle
(
const
std
::
string
&
str
);
// Vector concatenation.
llvm
::
Value
*
CreateVecSlice
(
llvm
::
Value
*
vec
,
int
begin
,
int
extent
);
llvm
::
Value
*
CreateVecFlip
(
llvm
::
Value
*
vec
);
llvm
::
Value
*
CreateVecConcat
(
std
::
vector
<
llvm
::
Value
*>
vecs
);
llvm
::
Value
*
CreateVecPad
(
llvm
::
Value
*
vec
,
int
target_lanes
);
// Create parallel for.
void
CreateParallelFor
(
const
For
*
op
);
// Create serial for
...
...
@@ -197,6 +216,8 @@ class CodeGenLLVM :
std
::
unordered_map
<
const
Variable
*
,
llvm
::
Value
*>
var_map_
;
// global strings
std
::
unordered_map
<
std
::
string
,
llvm
::
Constant
*>
str_map_
;
// The alignment information
std
::
unordered_map
<
const
Variable
*
,
arith
::
ModularEntry
>
align_map_
;
// The local module_context
llvm
::
GlobalVariable
*
gv_mod_ctx_
{
nullptr
};
// global to packed function handle
...
...
src/pass/vectorize_loop.cc
View file @
2c512ca7
...
...
@@ -355,7 +355,9 @@ class Vectorizer : public IRMutator {
const
Ramp
*
a_ramp
=
a
.
as
<
Ramp
>
();
if
(
a
.
type
().
lanes
()
==
1
&&
b_ramp
)
{
return
Ramp
::
make
(
arith
::
ComputeExpr
<
T
>
(
a
,
b_ramp
->
base
),
b_ramp
->
stride
,
b_ramp
->
lanes
);
arith
::
ComputeExpr
<
T
>
(
a
,
b_ramp
->
base
),
arith
::
ComputeExpr
<
T
>
(
make_zero
(
b_ramp
->
stride
.
type
()),
b_ramp
->
stride
),
b_ramp
->
lanes
);
}
if
(
b
.
type
().
lanes
()
==
1
&&
a_ramp
)
{
return
Ramp
::
make
(
...
...
tests/python/unittest/test_codegen_llvm.py
View file @
2c512ca7
...
...
@@ -2,13 +2,15 @@ import tvm
import
numpy
as
np
def
test_llvm_add_pipeline
():
n
=
tvm
.
Var
(
'n'
)
nn
=
1024
n
=
tvm
.
convert
(
nn
)
A
=
tvm
.
placeholder
((
n
,),
name
=
'A'
)
B
=
tvm
.
placeholder
((
n
,),
name
=
'B'
)
C
=
tvm
.
compute
(
A
.
shape
,
lambda
*
i
:
A
(
*
i
)
+
B
(
*
i
),
name
=
'C'
)
s
=
tvm
.
Schedule
(
C
.
op
)
s
[
C
]
.
parallel
(
C
.
op
.
axis
[
0
])
xo
,
xi
=
s
[
C
]
.
split
(
C
.
op
.
axis
[
0
],
factor
=
4
)
s
[
C
]
.
parallel
(
xo
)
s
[
C
]
.
vectorize
(
xi
)
def
check_llvm
():
if
not
tvm
.
codegen
.
enabled
(
"llvm"
):
return
...
...
@@ -16,16 +18,71 @@ def test_llvm_add_pipeline():
f
=
tvm
.
build
(
s
,
[
A
,
B
,
C
],
"llvm"
)
ctx
=
tvm
.
cpu
(
0
)
# launch the kernel.
n
=
1027
*
1024
n
=
nn
a
=
tvm
.
nd
.
array
(
np
.
random
.
uniform
(
size
=
n
)
.
astype
(
A
.
dtype
),
ctx
)
b
=
tvm
.
nd
.
array
(
np
.
random
.
uniform
(
size
=
n
)
.
astype
(
B
.
dtype
),
ctx
)
c
=
tvm
.
nd
.
array
(
np
.
zeros
(
n
,
dtype
=
C
.
dtype
),
ctx
)
for
i
in
range
(
1000
):
f
(
a
,
b
,
c
)
np
.
testing
.
assert_allclose
(
c
.
asnumpy
(),
a
.
asnumpy
()
+
b
.
asnumpy
())
check_llvm
()
def
test_llvm_flip_pipeline
():
def
check_llvm
(
nn
,
base
):
if
not
tvm
.
codegen
.
enabled
(
"llvm"
):
return
n
=
tvm
.
convert
(
nn
)
A
=
tvm
.
placeholder
((
n
+
base
),
name
=
'A'
)
C
=
tvm
.
compute
((
n
,),
lambda
i
:
A
(
nn
+
base
-
i
-
1
),
name
=
'C'
)
s
=
tvm
.
Schedule
(
C
.
op
)
xo
,
xi
=
s
[
C
]
.
split
(
C
.
op
.
axis
[
0
],
factor
=
4
)
s
[
C
]
.
parallel
(
xo
)
s
[
C
]
.
vectorize
(
xi
)
# build and invoke the kernel.
f
=
tvm
.
build
(
s
,
[
A
,
C
],
"llvm"
)
ctx
=
tvm
.
cpu
(
0
)
# launch the kernel.
n
=
nn
a
=
tvm
.
nd
.
array
(
np
.
random
.
uniform
(
size
=
(
n
+
base
))
.
astype
(
A
.
dtype
),
ctx
)
c
=
tvm
.
nd
.
array
(
np
.
zeros
(
n
,
dtype
=
C
.
dtype
),
ctx
)
f
(
a
,
c
)
np
.
testing
.
assert_allclose
(
c
.
asnumpy
(),
a
.
asnumpy
()[::
-
1
][:
n
])
check_llvm
(
4
,
0
)
check_llvm
(
128
,
8
)
check_llvm
(
3
,
0
)
check_llvm
(
128
,
1
)
def
test_llvm_madd_pipeline
():
def
check_llvm
(
nn
,
base
,
stride
):
if
not
tvm
.
codegen
.
enabled
(
"llvm"
):
return
n
=
tvm
.
convert
(
nn
)
A
=
tvm
.
placeholder
((
n
+
base
,
stride
),
name
=
'A'
)
C
=
tvm
.
compute
((
n
,
stride
),
lambda
i
,
j
:
A
(
base
+
i
,
j
)
+
1
,
name
=
'C'
)
s
=
tvm
.
Schedule
(
C
.
op
)
xo
,
xi
=
s
[
C
]
.
split
(
C
.
op
.
axis
[
0
],
factor
=
4
)
s
[
C
]
.
parallel
(
xo
)
s
[
C
]
.
vectorize
(
xi
)
# build and invoke the kernel.
f
=
tvm
.
build
(
s
,
[
A
,
C
],
"llvm"
)
ctx
=
tvm
.
cpu
(
0
)
# launch the kernel.
n
=
nn
a
=
tvm
.
nd
.
array
(
np
.
random
.
uniform
(
size
=
(
n
+
base
,
stride
))
.
astype
(
A
.
dtype
),
ctx
)
c
=
tvm
.
nd
.
array
(
np
.
zeros
((
n
,
stride
),
dtype
=
C
.
dtype
),
ctx
)
f
(
a
,
c
)
np
.
testing
.
assert_allclose
(
c
.
asnumpy
(),
a
.
asnumpy
()[
base
:]
+
1
)
check_llvm
(
64
,
0
,
2
)
check_llvm
(
4
,
0
,
1
)
check_llvm
(
4
,
0
,
3
)
if
__name__
==
"__main__"
:
test_llvm_add_pipeline
()
test_llvm_flip_pipeline
()
test_llvm_madd_pipeline
()
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment