Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
T
tic
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
wenyuanbo
tic
Commits
2c512ca7
Commit
2c512ca7
authored
Mar 02, 2017
by
Tianqi Chen
Committed by
GitHub
Mar 02, 2017
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
[LLVM] Vectorized load/store (#60)
parent
2111bbf3
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
7 changed files
with
107 additions
and
10 deletions
+107
-10
src/arithmetic/compute_expr.h
+1
-1
src/arithmetic/modular.cc
+10
-1
src/arithmetic/modular.h
+8
-0
src/codegen/llvm/codegen_llvm.cc
+0
-0
src/codegen/llvm/codegen_llvm.h
+22
-1
src/pass/vectorize_loop.cc
+3
-1
tests/python/unittest/test_codegen_llvm.py
+63
-6
No files found.
src/arithmetic/compute_expr.h
View file @
2c512ca7
...
...
@@ -80,7 +80,7 @@ inline bool GetConstInt(Expr e, int* out) {
} \
uint64_t ua = 0, ub = 0; \
if (GetConst(a, &ua) && GetConst(b, &ub)) { \
return ir::UIntImm::make(a.type(), ua
+ ub);
\
return ir::UIntImm::make(a.type(), ua
OP ub);
\
} \
template
<>
...
...
src/arithmetic/modular.cc
View file @
2c512ca7
...
...
@@ -113,7 +113,7 @@ class ModularEvaluator
private
:
const
std
::
unordered_map
<
const
Variable
*
,
ModularEntry
>&
mod_map_
;
friend
struct
ModularEntry
;
// simplify the base by putting it in range.
static
int
BaseSimplify
(
int
base
,
int
coeff
)
{
if
(
coeff
==
0
)
return
base
;
...
...
@@ -136,6 +136,15 @@ class ModularEvaluator
}
};
ModularEntry
ModularEntry
::
Add
(
const
ModularEntry
&
a
,
const
ModularEntry
&
b
)
{
ModularEntry
ret
;
ret
.
coeff
=
ModularEvaluator
::
ZeroAwareGCD
(
a
.
coeff
,
b
.
coeff
);
ret
.
base
=
ModularEvaluator
::
BaseSimplify
(
a
.
base
+
b
.
base
,
ret
.
coeff
);
return
ret
;
}
ModularEntry
EvalModular
(
const
Expr
&
e
,
const
std
::
unordered_map
<
const
Variable
*
,
ModularEntry
>&
mod_map
)
{
...
...
src/arithmetic/modular.h
View file @
2c512ca7
...
...
@@ -37,6 +37,14 @@ struct ModularEntry {
e
.
base
=
0
;
e
.
coeff
=
1
;
return
e
;
}
/*!
* \brief Add two modular entries together to get a new modular entry.
* \param a The left operand.
* \param b The right operand.
* \return The combined modular entry.
*/
static
ModularEntry
Add
(
const
ModularEntry
&
a
,
const
ModularEntry
&
b
);
};
/*!
...
...
src/codegen/llvm/codegen_llvm.cc
View file @
2c512ca7
This diff is collapsed.
Click to expand it.
src/codegen/llvm/codegen_llvm.h
View file @
2c512ca7
...
...
@@ -14,6 +14,7 @@
#include <vector>
#include <string>
#include "./llvm_common.h"
#include "../../arithmetic/modular.h"
namespace
tvm
{
namespace
codegen
{
...
...
@@ -109,18 +110,29 @@ class CodeGenLLVM :
virtual
llvm
::
Value
*
CreateCallExtern
(
const
Call
*
op
);
// create call into tvm packed function.
virtual
llvm
::
Value
*
CreateCallPacked
(
const
Call
*
op
);
// Scalarize e by iterating elements of e.
// f is a callback that takes index and v.
virtual
void
Scalarize
(
const
Expr
&
e
,
std
::
function
<
void
(
int
i
,
llvm
::
Value
*
v
)
>
f
);
protected
:
/*!
* \param t The original type.
* \return LLVM type of t
*/
llvm
::
Type
*
LLVMType
(
const
Type
&
t
)
const
;
// initialize the function state.
void
InitFuncState
();
// Get alignment given index.
void
GetAlignment
(
Type
t
,
const
Variable
*
buf_var
,
const
Expr
&
index
,
int
*
p_alignment
,
int
*
p_native_bits
);
// do a scalarize call with f
llvm
::
Value
*
CreateScalarizedCall
(
const
Call
*
op
,
llvm
::
Function
*
f
,
const
std
::
vector
<
llvm
::
Value
*>&
args
);
// apply optimization on the module.
virtual
void
Optimize
();
// Get the maximim storage align bits of buffer pointer given storage scope.
virtual
int
NativeVectorBits
(
const
std
::
string
&
storage_scope
)
const
;
// The IRBuilder.
using
IRBuilder
=
llvm
::
IRBuilder
<
llvm
::
ConstantFolder
,
llvm
::
IRBuilderDefaultInserter
>
;
// The current function
...
...
@@ -162,6 +174,8 @@ class CodeGenLLVM :
llvm
::
Function
*
f_tvm_parallel_for_
{
nullptr
};
// The acting body
llvm
::
BasicBlock
*
block_
{
nullptr
};
/*! \brief the storage scope of allocation */
std
::
unordered_map
<
const
Variable
*
,
std
::
string
>
alloc_storage_scope_
;
private
:
// comparison op
...
...
@@ -178,6 +192,11 @@ class CodeGenLLVM :
llvm
::
Value
*
CreateBufferPtr
(
Type
t
,
llvm
::
Value
*
buffer
,
llvm
::
Value
*
index
);
llvm
::
Value
*
CreateCast
(
Type
from
,
Type
to
,
llvm
::
Value
*
value
);
llvm
::
Value
*
GetPackedFuncHandle
(
const
std
::
string
&
str
);
// Vector concatenation.
llvm
::
Value
*
CreateVecSlice
(
llvm
::
Value
*
vec
,
int
begin
,
int
extent
);
llvm
::
Value
*
CreateVecFlip
(
llvm
::
Value
*
vec
);
llvm
::
Value
*
CreateVecConcat
(
std
::
vector
<
llvm
::
Value
*>
vecs
);
llvm
::
Value
*
CreateVecPad
(
llvm
::
Value
*
vec
,
int
target_lanes
);
// Create parallel for.
void
CreateParallelFor
(
const
For
*
op
);
// Create serial for
...
...
@@ -197,6 +216,8 @@ class CodeGenLLVM :
std
::
unordered_map
<
const
Variable
*
,
llvm
::
Value
*>
var_map_
;
// global strings
std
::
unordered_map
<
std
::
string
,
llvm
::
Constant
*>
str_map_
;
// The alignment information
std
::
unordered_map
<
const
Variable
*
,
arith
::
ModularEntry
>
align_map_
;
// The local module_context
llvm
::
GlobalVariable
*
gv_mod_ctx_
{
nullptr
};
// global to packed function handle
...
...
src/pass/vectorize_loop.cc
View file @
2c512ca7
...
...
@@ -355,7 +355,9 @@ class Vectorizer : public IRMutator {
const
Ramp
*
a_ramp
=
a
.
as
<
Ramp
>
();
if
(
a
.
type
().
lanes
()
==
1
&&
b_ramp
)
{
return
Ramp
::
make
(
arith
::
ComputeExpr
<
T
>
(
a
,
b_ramp
->
base
),
b_ramp
->
stride
,
b_ramp
->
lanes
);
arith
::
ComputeExpr
<
T
>
(
a
,
b_ramp
->
base
),
arith
::
ComputeExpr
<
T
>
(
make_zero
(
b_ramp
->
stride
.
type
()),
b_ramp
->
stride
),
b_ramp
->
lanes
);
}
if
(
b
.
type
().
lanes
()
==
1
&&
a_ramp
)
{
return
Ramp
::
make
(
...
...
tests/python/unittest/test_codegen_llvm.py
View file @
2c512ca7
...
...
@@ -2,13 +2,15 @@ import tvm
import
numpy
as
np
def
test_llvm_add_pipeline
():
n
=
tvm
.
Var
(
'n'
)
nn
=
1024
n
=
tvm
.
convert
(
nn
)
A
=
tvm
.
placeholder
((
n
,),
name
=
'A'
)
B
=
tvm
.
placeholder
((
n
,),
name
=
'B'
)
C
=
tvm
.
compute
(
A
.
shape
,
lambda
*
i
:
A
(
*
i
)
+
B
(
*
i
),
name
=
'C'
)
s
=
tvm
.
Schedule
(
C
.
op
)
s
[
C
]
.
parallel
(
C
.
op
.
axis
[
0
])
xo
,
xi
=
s
[
C
]
.
split
(
C
.
op
.
axis
[
0
],
factor
=
4
)
s
[
C
]
.
parallel
(
xo
)
s
[
C
]
.
vectorize
(
xi
)
def
check_llvm
():
if
not
tvm
.
codegen
.
enabled
(
"llvm"
):
return
...
...
@@ -16,16 +18,71 @@ def test_llvm_add_pipeline():
f
=
tvm
.
build
(
s
,
[
A
,
B
,
C
],
"llvm"
)
ctx
=
tvm
.
cpu
(
0
)
# launch the kernel.
n
=
1027
*
1024
n
=
nn
a
=
tvm
.
nd
.
array
(
np
.
random
.
uniform
(
size
=
n
)
.
astype
(
A
.
dtype
),
ctx
)
b
=
tvm
.
nd
.
array
(
np
.
random
.
uniform
(
size
=
n
)
.
astype
(
B
.
dtype
),
ctx
)
c
=
tvm
.
nd
.
array
(
np
.
zeros
(
n
,
dtype
=
C
.
dtype
),
ctx
)
for
i
in
range
(
1000
):
f
(
a
,
b
,
c
)
f
(
a
,
b
,
c
)
np
.
testing
.
assert_allclose
(
c
.
asnumpy
(),
a
.
asnumpy
()
+
b
.
asnumpy
())
check_llvm
()
def
test_llvm_flip_pipeline
():
def
check_llvm
(
nn
,
base
):
if
not
tvm
.
codegen
.
enabled
(
"llvm"
):
return
n
=
tvm
.
convert
(
nn
)
A
=
tvm
.
placeholder
((
n
+
base
),
name
=
'A'
)
C
=
tvm
.
compute
((
n
,),
lambda
i
:
A
(
nn
+
base
-
i
-
1
),
name
=
'C'
)
s
=
tvm
.
Schedule
(
C
.
op
)
xo
,
xi
=
s
[
C
]
.
split
(
C
.
op
.
axis
[
0
],
factor
=
4
)
s
[
C
]
.
parallel
(
xo
)
s
[
C
]
.
vectorize
(
xi
)
# build and invoke the kernel.
f
=
tvm
.
build
(
s
,
[
A
,
C
],
"llvm"
)
ctx
=
tvm
.
cpu
(
0
)
# launch the kernel.
n
=
nn
a
=
tvm
.
nd
.
array
(
np
.
random
.
uniform
(
size
=
(
n
+
base
))
.
astype
(
A
.
dtype
),
ctx
)
c
=
tvm
.
nd
.
array
(
np
.
zeros
(
n
,
dtype
=
C
.
dtype
),
ctx
)
f
(
a
,
c
)
np
.
testing
.
assert_allclose
(
c
.
asnumpy
(),
a
.
asnumpy
()[::
-
1
][:
n
])
check_llvm
(
4
,
0
)
check_llvm
(
128
,
8
)
check_llvm
(
3
,
0
)
check_llvm
(
128
,
1
)
def
test_llvm_madd_pipeline
():
def
check_llvm
(
nn
,
base
,
stride
):
if
not
tvm
.
codegen
.
enabled
(
"llvm"
):
return
n
=
tvm
.
convert
(
nn
)
A
=
tvm
.
placeholder
((
n
+
base
,
stride
),
name
=
'A'
)
C
=
tvm
.
compute
((
n
,
stride
),
lambda
i
,
j
:
A
(
base
+
i
,
j
)
+
1
,
name
=
'C'
)
s
=
tvm
.
Schedule
(
C
.
op
)
xo
,
xi
=
s
[
C
]
.
split
(
C
.
op
.
axis
[
0
],
factor
=
4
)
s
[
C
]
.
parallel
(
xo
)
s
[
C
]
.
vectorize
(
xi
)
# build and invoke the kernel.
f
=
tvm
.
build
(
s
,
[
A
,
C
],
"llvm"
)
ctx
=
tvm
.
cpu
(
0
)
# launch the kernel.
n
=
nn
a
=
tvm
.
nd
.
array
(
np
.
random
.
uniform
(
size
=
(
n
+
base
,
stride
))
.
astype
(
A
.
dtype
),
ctx
)
c
=
tvm
.
nd
.
array
(
np
.
zeros
((
n
,
stride
),
dtype
=
C
.
dtype
),
ctx
)
f
(
a
,
c
)
np
.
testing
.
assert_allclose
(
c
.
asnumpy
(),
a
.
asnumpy
()[
base
:]
+
1
)
check_llvm
(
64
,
0
,
2
)
check_llvm
(
4
,
0
,
1
)
check_llvm
(
4
,
0
,
3
)
if
__name__
==
"__main__"
:
test_llvm_add_pipeline
()
test_llvm_flip_pipeline
()
test_llvm_madd_pipeline
()
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment