Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
R
riscv-gcc-1
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
lvzhengyang
riscv-gcc-1
Commits
d7943c8b
Commit
d7943c8b
authored
Oct 13, 2011
by
Richard Henderson
Committed by
Richard Henderson
Oct 13, 2011
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Expand vector permutation with vec_perm and vec_perm_const.
From-SVN: r179958
parent
5066ab2e
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
192 additions
and
56 deletions
+192
-56
gcc/ChangeLog
+11
-0
gcc/doc/md.texi
+6
-0
gcc/genopinit.c
+1
-0
gcc/optabs.c
+166
-50
gcc/optabs.h
+7
-5
gcc/tree-vect-generic.c
+1
-1
No files found.
gcc/ChangeLog
View file @
d7943c8b
2011-10-13 Richard Henderson <rth@redhat.com>
* doc/md.texi (vec_perm): Document fallback to byte permutation.
* genopinit.c (optabs): Add vec_perm_const.
* optabs.c (can_vec_perm_expr_p): Rename from expand_vec_perm_expr_p.
Reject non-vector modes. Allow fallback to byte permutation.
(expand_vec_perm_expr_1): Split out from ...
(expand_vec_perm_expr): ... here. Allow fallback to byte permutation.
* optabs.h (DOI_vec_perm_const, vec_perm_const_optab): New.
* tree-vect-generic.c (lower_vec_perm): Update for name change.
2011-10-13 Richard Henderson <rth@redhat.com>
* config/rs6000/altivec.md (vec_permv16qi): New pattern.
* config/rs6000/spu.md (vec_permv16qi): New pattern.
gcc/doc/md.texi
View file @
d7943c8b
...
...
@@ -4041,6 +4041,12 @@ be computed modulo @math{2*@var{N}}. Note that if
@code
{
rtx_equal_p
(
operand1
,
operand2
)},
this
can
be
implemented
with
just
operand
1
and
selector
elements
modulo
@var
{
N
}.
In
order
to
make
things
easy
for
a
number
of
targets
,
if
there
is
no
@samp
{
vec_perm
}
pattern
for
mode
@var
{
m
},
but
there
is
for
mode
@var
{
q
}
where
@var
{
q
}
is
a
vector
of
@code
{
QImode
}
of
the
same
width
as
@var
{
m
},
the
middle
-
end
will
lower
the
mode
@var
{
m
}
@code
{
VEC_PERM_EXPR
}
to
mode
@var
{
q
}.
@cindex
@code
{
vec_perm_const
@var
{
m
}}
instruction
pattern
@item
@samp
{
vec_perm_const
@var
{
m
}}
Like
@samp
{
vec_perm
}
except
that
the
permutation
is
a
compile
-
time
...
...
gcc/genopinit.c
View file @
d7943c8b
...
...
@@ -254,6 +254,7 @@ static const char * const optabs[] =
"set_optab_handler (vec_shr_optab, $A, CODE_FOR_$(vec_shr_$a$))"
,
"set_optab_handler (vec_realign_load_optab, $A, CODE_FOR_$(vec_realign_load_$a$))"
,
"set_direct_optab_handler (vec_perm_optab, $A, CODE_FOR_$(vec_perm$a$))"
,
"set_direct_optab_handler (vec_perm_const_optab, $A, CODE_FOR_$(vec_perm_const$a$))"
,
"set_convert_optab_handler (vcond_optab, $A, $B, CODE_FOR_$(vcond$a$b$))"
,
"set_convert_optab_handler (vcondu_optab, $A, $B, CODE_FOR_$(vcondu$a$b$))"
,
"set_optab_handler (ssum_widen_optab, $A, CODE_FOR_$(widen_ssum$I$a3$))"
,
...
...
gcc/optabs.c
View file @
d7943c8b
...
...
@@ -6687,87 +6687,203 @@ vector_compare_rtx (tree cond, bool unsignedp, enum insn_code icode)
/* Return true if VEC_PERM_EXPR can be expanded using SIMD extensions
of the CPU. */
bool
expand_vec_perm_expr_p
(
enum
machine_mode
mode
,
tree
v0
,
tree
v1
,
tree
mask
)
can_vec_perm_expr_p
(
tree
type
,
tree
sel
)
{
int
v0_mode_s
=
GET_MODE_BITSIZE
(
TYPE_MODE
(
TREE_TYPE
(
TREE_TYPE
(
v0
))));
int
mask_mode_s
=
GET_MODE_BITSIZE
(
TYPE_MODE
(
TREE_TYPE
(
TREE_TYPE
(
mask
))));
enum
machine_mode
mode
,
qimode
;
mode
=
TYPE_MODE
(
type
);
/* If the target doesn't implement a vector mode for the vector type,
then no operations are supported. */
if
(
!
VECTOR_MODE_P
(
mode
))
return
false
;
if
(
TREE_CODE
(
sel
)
==
VECTOR_CST
)
{
if
(
direct_optab_handler
(
vec_perm_const_optab
,
mode
)
!=
CODE_FOR_nothing
&&
targetm
.
vectorize
.
builtin_vec_perm_ok
(
type
,
sel
))
return
true
;
}
if
(
TREE_CODE
(
mask
)
==
VECTOR_CST
&&
targetm
.
vectorize
.
builtin_vec_perm_ok
(
TREE_TYPE
(
v0
),
mask
))
if
(
direct_optab_handler
(
vec_perm_optab
,
mode
)
!=
CODE_FOR_nothing
)
return
true
;
if
(
v0_mode_s
!=
mask_mode_s
||
TYPE_VECTOR_SUBPARTS
(
TREE_TYPE
(
v0
))
!=
TYPE_VECTOR_SUBPARTS
(
TREE_TYPE
(
mask
))
||
TYPE_VECTOR_SUBPARTS
(
TREE_TYPE
(
v1
))
!=
TYPE_VECTOR_SUBPARTS
(
TREE_TYPE
(
mask
)))
/* We allow fallback to a QI vector mode, and adjust the mask. */
qimode
=
mode_for_vector
(
QImode
,
GET_MODE_SIZE
(
mode
));
if
(
!
VECTOR_MODE_P
(
qimode
))
return
false
;
return
direct_optab_handler
(
vec_perm_optab
,
mode
)
!=
CODE_FOR_nothing
;
/* ??? For completeness, we ought to check the QImode version of
vec_perm_const_optab. But all users of this implicit lowering
feature implement the variable vec_perm_optab. */
if
(
direct_optab_handler
(
vec_perm_optab
,
qimode
)
==
CODE_FOR_nothing
)
return
false
;
/* In order to support the lowering of non-constant permutations,
we need to support shifts and adds. */
if
(
TREE_CODE
(
sel
)
!=
VECTOR_CST
)
{
if
(
GET_MODE_UNIT_SIZE
(
mode
)
>
2
&&
optab_handler
(
ashl_optab
,
mode
)
==
CODE_FOR_nothing
&&
optab_handler
(
vashl_optab
,
mode
)
==
CODE_FOR_nothing
)
return
false
;
if
(
optab_handler
(
add_optab
,
qimode
)
==
CODE_FOR_nothing
)
return
false
;
}
return
true
;
}
/* Generate instructions for VEC_COND_EXPR given its type and three
operands. */
rtx
expand_vec_perm_expr
(
tree
type
,
tree
v0
,
tree
v1
,
tree
mask
,
rtx
target
)
/* A subroutine of expand_vec_perm_expr for expanding one vec_perm insn. */
static
rtx
expand_vec_perm_expr_1
(
enum
insn_code
icode
,
rtx
target
,
rtx
v0
,
rtx
v1
,
rtx
sel
)
{
enum
machine_mode
tmode
=
GET_MODE
(
target
);
enum
machine_mode
smode
=
GET_MODE
(
sel
);
struct
expand_operand
ops
[
4
];
enum
insn_code
icode
;
enum
machine_mode
mode
=
TYPE_MODE
(
type
);
gcc_checking_assert
(
expand_vec_perm_expr_p
(
mode
,
v0
,
v1
,
mask
));
create_output_operand
(
&
ops
[
0
],
target
,
tmode
);
create_input_operand
(
&
ops
[
3
],
sel
,
smode
);
if
(
TREE_CODE
(
mask
)
==
VECTOR_CST
)
/* Make an effort to preserve v0 == v1. The target expander is able to
rely on this to determine if we're permuting a single input operand. */
if
(
rtx_equal_p
(
v0
,
v1
))
{
tree
m_type
,
call
;
tree
fn
=
targetm
.
vectorize
.
builtin_vec_perm
(
TREE_TYPE
(
v0
),
&
m_type
);
if
(
!
insn_operand_matches
(
icode
,
1
,
v0
))
v0
=
force_reg
(
tmode
,
v0
);
gcc_checking_assert
(
insn_operand_matches
(
icode
,
1
,
v0
));
gcc_checking_assert
(
insn_operand_matches
(
icode
,
2
,
v0
));
if
(
!
fn
)
goto
vec_perm
;
create_fixed_operand
(
&
ops
[
1
],
v0
);
create_fixed_operand
(
&
ops
[
2
],
v0
);
}
else
{
create_input_operand
(
&
ops
[
1
],
v0
,
tmode
);
create_input_operand
(
&
ops
[
2
],
v1
,
tmode
);
}
if
(
m_type
!=
TREE_TYPE
(
TREE_TYPE
(
mask
)))
{
int
units
=
TYPE_VECTOR_SUBPARTS
(
TREE_TYPE
(
mask
));
tree
cvt
=
build_vector_type
(
m_type
,
units
);
mask
=
fold_convert
(
cvt
,
mask
);
}
if
(
maybe_expand_insn
(
icode
,
4
,
ops
))
return
ops
[
0
].
value
;
return
NULL_RTX
;
}
call
=
fold_build1
(
ADDR_EXPR
,
build_pointer_type
(
TREE_TYPE
(
fn
)),
fn
);
call
=
build_call_nary
(
type
,
call
,
3
,
v0
,
v1
,
mask
);
/* Generate instructions for VEC_PERM_EXPR given its type and three
operands. */
rtx
expand_vec_perm_expr
(
tree
type
,
tree
v0
,
tree
v1
,
tree
sel
,
rtx
target
)
{
enum
insn_code
icode
;
enum
machine_mode
mode
=
TYPE_MODE
(
type
);
enum
machine_mode
qimode
;
rtx
v0_rtx
,
v1_rtx
,
sel_rtx
,
*
vec
,
vt
,
tmp
;
unsigned
int
i
,
w
,
e
,
u
;
return
expand_expr_real_1
(
call
,
target
,
VOIDmode
,
EXPAND_NORMAL
,
NULL
);
if
(
!
target
)
target
=
gen_reg_rtx
(
mode
);
v0_rtx
=
expand_normal
(
v0
);
if
(
operand_equal_p
(
v0
,
v1
,
0
))
v1_rtx
=
v0_rtx
;
else
v1_rtx
=
expand_normal
(
v1
);
sel_rtx
=
expand_normal
(
sel
);
/* If the input is a constant, expand it specially. */
if
(
CONSTANT_P
(
sel_rtx
))
{
icode
=
direct_optab_handler
(
vec_perm_const_optab
,
mode
);
if
(
icode
!=
CODE_FOR_nothing
&&
targetm
.
vectorize
.
builtin_vec_perm_ok
(
TREE_TYPE
(
v0
),
sel
)
&&
(
tmp
=
expand_vec_perm_expr_1
(
icode
,
target
,
v0_rtx
,
v1_rtx
,
sel_rtx
))
!=
NULL
)
return
tmp
;
}
vec_perm
:
/* Otherwise fall back to a fully variable permuation. */
icode
=
direct_optab_handler
(
vec_perm_optab
,
mode
);
if
(
icode
!=
CODE_FOR_nothing
&&
(
tmp
=
expand_vec_perm_expr_1
(
icode
,
target
,
v0_rtx
,
v1_rtx
,
sel_rtx
))
!=
NULL
)
return
tmp
;
/* As a special case to aid several targets, lower the element-based
permutation to a byte-based permutation and try again. */
qimode
=
mode_for_vector
(
QImode
,
GET_MODE_SIZE
(
mode
));
if
(
!
VECTOR_MODE_P
(
qimode
))
return
NULL_RTX
;
/* ??? For completeness, we ought to check the QImode version of
vec_perm_const_optab. But all users of this implicit lowering
feature implement the variable vec_perm_optab. */
icode
=
direct_optab_handler
(
vec_perm_optab
,
qimode
);
if
(
icode
==
CODE_FOR_nothing
)
return
0
;
return
NULL_RTX
;
create_output_operand
(
&
ops
[
0
],
target
,
mode
);
create_input_operand
(
&
ops
[
3
],
expand_normal
(
mask
),
TYPE_MODE
(
TREE_TYPE
(
mask
)));
w
=
GET_MODE_SIZE
(
mode
);
e
=
GET_MODE_NUNITS
(
mode
);
u
=
GET_MODE_UNIT_SIZE
(
mode
);
vec
=
XALLOCAVEC
(
rtx
,
w
);
if
(
operand_equal_p
(
v0
,
v1
,
0
))
if
(
CONSTANT_P
(
sel_rtx
))
{
rtx
rtx_v0
=
expand_normal
(
v0
);
if
(
!
insn_operand_matches
(
icode
,
1
,
rtx_v0
))
rtx_v0
=
force_reg
(
mode
,
rtx_v0
);
gcc_checking_assert
(
insn_operand_matches
(
icode
,
2
,
rtx_v0
));
unsigned
int
j
;
for
(
i
=
0
;
i
<
e
;
++
i
)
{
unsigned
int
this_e
=
INTVAL
(
XVECEXP
(
sel_rtx
,
0
,
i
));
this_e
&=
2
*
e
-
1
;
this_e
*=
u
;
create_fixed_operand
(
&
ops
[
1
],
rtx_v0
);
create_fixed_operand
(
&
ops
[
2
],
rtx_v0
);
for
(
j
=
0
;
j
<
u
;
++
j
)
vec
[
i
*
e
+
j
]
=
GEN_INT
(
this_e
+
j
);
}
sel_rtx
=
gen_rtx_CONST_VECTOR
(
qimode
,
gen_rtvec_v
(
w
,
vec
));
}
else
{
create_input_operand
(
&
ops
[
1
],
expand_normal
(
v0
),
mode
);
create_input_operand
(
&
ops
[
2
],
expand_normal
(
v1
),
mode
);
}
/* Multiply each element by its byte size. */
if
(
u
==
2
)
sel_rtx
=
expand_simple_binop
(
mode
,
PLUS
,
sel_rtx
,
sel_rtx
,
sel_rtx
,
0
,
OPTAB_DIRECT
);
else
sel_rtx
=
expand_simple_binop
(
mode
,
ASHIFT
,
sel_rtx
,
GEN_INT
(
exact_log2
(
u
)),
sel_rtx
,
0
,
OPTAB_DIRECT
);
gcc_assert
(
sel_rtx
);
expand_insn
(
icode
,
4
,
ops
);
return
ops
[
0
].
value
;
/* Broadcast the low byte each element into each of its bytes. */
for
(
i
=
0
;
i
<
w
;
++
i
)
{
int
this_e
=
i
/
u
*
u
;
if
(
BYTES_BIG_ENDIAN
)
this_e
+=
u
-
1
;
vec
[
i
]
=
GEN_INT
(
this_e
);
}
vt
=
gen_rtx_CONST_VECTOR
(
qimode
,
gen_rtvec_v
(
w
,
vec
));
sel_rtx
=
gen_lowpart
(
qimode
,
sel_rtx
);
sel_rtx
=
expand_vec_perm_expr_1
(
icode
,
gen_reg_rtx
(
qimode
),
sel_rtx
,
sel_rtx
,
vt
);
gcc_assert
(
sel_rtx
!=
NULL
);
/* Add the byte offset to each byte element. */
/* Note that the definition of the indicies here is memory ordering,
so there should be no difference between big and little endian. */
for
(
i
=
0
;
i
<
w
;
++
i
)
vec
[
i
]
=
GEN_INT
(
i
%
u
);
vt
=
gen_rtx_CONST_VECTOR
(
qimode
,
gen_rtvec_v
(
w
,
vec
));
sel_rtx
=
expand_simple_binop
(
qimode
,
PLUS
,
sel_rtx
,
vt
,
NULL_RTX
,
0
,
OPTAB_DIRECT
);
gcc_assert
(
sel_rtx
);
}
tmp
=
expand_vec_perm_expr_1
(
icode
,
gen_lowpart
(
qimode
,
target
),
gen_lowpart
(
qimode
,
v0_rtx
),
gen_lowpart
(
qimode
,
v1_rtx
),
sel_rtx
);
gcc_assert
(
tmp
!=
NULL
);
return
gen_lowpart
(
mode
,
tmp
);
}
...
...
gcc/optabs.h
View file @
d7943c8b
...
...
@@ -638,9 +638,6 @@ enum direct_optab_index
DOI_reload_in
,
DOI_reload_out
,
/* Vector shuffling. */
DOI_vec_perm
,
/* Block move operation. */
DOI_movmem
,
...
...
@@ -688,6 +685,10 @@ enum direct_optab_index
/* Atomic clear with release semantics. */
DOI_sync_lock_release
,
/* Vector permutation. */
DOI_vec_perm
,
DOI_vec_perm_const
,
DOI_MAX
};
...
...
@@ -704,7 +705,6 @@ typedef struct direct_optab_d *direct_optab;
#endif
#define reload_in_optab (&direct_optab_table[(int) DOI_reload_in])
#define reload_out_optab (&direct_optab_table[(int) DOI_reload_out])
#define vec_perm_optab (&direct_optab_table[(int) DOI_vec_perm])
#define movmem_optab (&direct_optab_table[(int) DOI_movmem])
#define setmem_optab (&direct_optab_table[(int) DOI_setmem])
#define cmpstr_optab (&direct_optab_table[(int) DOI_cmpstr])
...
...
@@ -734,6 +734,8 @@ typedef struct direct_optab_d *direct_optab;
(&direct_optab_table[(int) DOI_sync_lock_test_and_set])
#define sync_lock_release_optab \
(&direct_optab_table[(int) DOI_sync_lock_release])
#define vec_perm_optab (&direct_optab_table[DOI_vec_perm])
#define vec_perm_const_optab (&direct_optab_table[(int) DOI_vec_perm_const])
/* Target-dependent globals. */
struct
target_optabs
{
...
...
@@ -889,7 +891,7 @@ extern rtx expand_vec_cond_expr (tree, tree, tree, tree, rtx);
extern
rtx
expand_vec_shift_expr
(
sepops
,
rtx
);
/* Return tree if target supports vector operations for VEC_PERM_EXPR. */
bool
expand_vec_perm_expr_p
(
enum
machine_mode
,
tree
,
tree
,
tree
);
extern
bool
can_vec_perm_expr_p
(
tree
,
tree
);
/* Generate code for VEC_PERM_EXPR. */
extern
rtx
expand_vec_perm_expr
(
tree
,
tree
,
tree
,
tree
,
rtx
);
...
...
gcc/tree-vect-generic.c
View file @
d7943c8b
...
...
@@ -641,7 +641,7 @@ lower_vec_perm (gimple_stmt_iterator *gsi)
location_t
loc
=
gimple_location
(
gsi_stmt
(
*
gsi
));
unsigned
i
;
if
(
expand_vec_perm_expr_p
(
TYPE_MODE
(
vect_type
),
vec0
,
vec1
,
mask
))
if
(
can_vec_perm_expr_p
(
vect_type
,
mask
))
return
;
warning_at
(
loc
,
OPT_Wvector_operation_performance
,
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment