Commit d10cff95 by Carl Love Committed by Carl Love

rs6000-builtin.def (ST_ELEMREV_V1TI, [...]): Add macro expansion.

gcc/ChangeLog:

2018-01-22 Carl Love <cel@us.ibm.com>

	* config/rs6000/rs6000-builtin.def (ST_ELEMREV_V1TI, LD_ELEMREV_V1TI,
	LVX_V1TI): Add macro expansion.
	* config/rs6000/rs6000-c.c (altivec_builtin_types): Add argument
	definitions for VSX_BUILTIN_VEC_XST_BE, VSX_BUILTIN_VEC_ST,
	VSX_BUILTIN_VEC_XL, LD_ELEMREV_V1TI builtins.
	* config/rs6000/rs6000-p8swap.c (insn_is_swappable_p);
	Change check to determine if the instruction is a byte reversing
	entry.  Fix typo in comment.
	* config/rs6000/rs6000.c (altivec_expand_builtin): Add case entry
	for VSX_BUILTIN_ST_ELEMREV_V1TI and VSX_BUILTIN_LD_ELEMREV_V1TI.
	Add def_builtin calls for new builtins.
	* config/rs6000/vsx.md (vsx_st_elemrev_v1ti, vsx_ld_elemrev_v1ti):
	Add define_insn expansion.

gcc/testsuite/ChangeLog:

2018-01-22  Carl Love  <cel@us.ibm.com>
	* gcc.target/powerpc/powerpc.exp: Add torture tests for
	builtins-4-runnable.c, builtins-6-runnable.c,
	builtins-5-p9-runnable.c, builtins-6-p9-runnable.c.
	* gcc.target/powerpc/builtins-6-runnable.c: New test file.
	* gcc.target/powerpc/builtins-4-runnable.c: Add additional tests
	for signed/unsigned 128-bit and long long int loads.

From-SVN: r256952
parent f25d7e06
2018-01-22 Carl Love <cel@us.ibm.com>
* config/rs6000/rs6000-builtin.def (ST_ELEMREV_V1TI, LD_ELEMREV_V1TI,
LVX_V1TI): Add macro expansion.
* config/rs6000/rs6000-c.c (altivec_builtin_types): Add argument
definitions for VSX_BUILTIN_VEC_XST_BE, VSX_BUILTIN_VEC_ST,
VSX_BUILTIN_VEC_XL, LD_ELEMREV_V1TI builtins.
* config/rs6000/rs6000-p8swap.c (insn_is_swappable_p);
Change check to determine if the instruction is a byte reversing
entry. Fix typo in comment.
* config/rs6000/rs6000.c (altivec_expand_builtin): Add case entry
for VSX_BUILTIN_ST_ELEMREV_V1TI and VSX_BUILTIN_LD_ELEMREV_V1TI.
Add def_builtin calls for new builtins.
* config/rs6000/vsx.md (vsx_st_elemrev_v1ti, vsx_ld_elemrev_v1ti):
Add define_insn expansion.
2018-01-22 Sebastian Perta <sebastian.perta@renesas.com>
* config/rl78/rl78.md: New define_expand "umaxdi3".
......
......@@ -1242,6 +1242,7 @@ BU_ALTIVEC_X (LVXL_V8HI, "lvxl_v8hi", MEM)
BU_ALTIVEC_X (LVXL_V16QI, "lvxl_v16qi", MEM)
BU_ALTIVEC_X (LVX, "lvx", MEM)
BU_ALTIVEC_X (LVX_V2DF, "lvx_v2df", MEM)
BU_ALTIVEC_X (LVX_V1TI, "lvx_v1ti", MEM)
BU_ALTIVEC_X (LVX_V2DI, "lvx_v2di", MEM)
BU_ALTIVEC_X (LVX_V4SF, "lvx_v4sf", MEM)
BU_ALTIVEC_X (LVX_V4SI, "lvx_v4si", MEM)
......@@ -1792,12 +1793,14 @@ BU_VSX_X (STXVW4X_V4SF, "stxvw4x_v4sf", MEM)
BU_VSX_X (STXVW4X_V4SI, "stxvw4x_v4si", MEM)
BU_VSX_X (STXVW4X_V8HI, "stxvw4x_v8hi", MEM)
BU_VSX_X (STXVW4X_V16QI, "stxvw4x_v16qi", MEM)
BU_VSX_X (LD_ELEMREV_V1TI, "ld_elemrev_v1ti", MEM)
BU_VSX_X (LD_ELEMREV_V2DF, "ld_elemrev_v2df", MEM)
BU_VSX_X (LD_ELEMREV_V2DI, "ld_elemrev_v2di", MEM)
BU_VSX_X (LD_ELEMREV_V4SF, "ld_elemrev_v4sf", MEM)
BU_VSX_X (LD_ELEMREV_V4SI, "ld_elemrev_v4si", MEM)
BU_VSX_X (LD_ELEMREV_V8HI, "ld_elemrev_v8hi", MEM)
BU_VSX_X (LD_ELEMREV_V16QI, "ld_elemrev_v16qi", MEM)
BU_VSX_X (ST_ELEMREV_V1TI, "st_elemrev_v1ti", MEM)
BU_VSX_X (ST_ELEMREV_V2DF, "st_elemrev_v2df", MEM)
BU_VSX_X (ST_ELEMREV_V2DI, "st_elemrev_v2di", MEM)
BU_VSX_X (ST_ELEMREV_V4SF, "st_elemrev_v4sf", MEM)
......
......@@ -3162,16 +3162,27 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = {
RS6000_BTI_V2DF, RS6000_BTI_INTSI, ~RS6000_BTI_V2DF, 0 },
{ VSX_BUILTIN_VEC_XL, VSX_BUILTIN_LXVD2X_V2DF,
RS6000_BTI_V2DF, RS6000_BTI_INTSI, ~RS6000_BTI_double, 0 },
{ VSX_BUILTIN_VEC_XL, VSX_BUILTIN_LXVD2X_V1TI,
RS6000_BTI_V1TI, RS6000_BTI_INTSI, ~RS6000_BTI_INTTI, 0 },
{ VSX_BUILTIN_VEC_XL, VSX_BUILTIN_LXVD2X_V1TI,
RS6000_BTI_V1TI, RS6000_BTI_INTSI, ~RS6000_BTI_V1TI, 0 },
{ VSX_BUILTIN_VEC_XL, VSX_BUILTIN_LXVD2X_V1TI,
RS6000_BTI_unsigned_V1TI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTTI, 0 },
{ VSX_BUILTIN_VEC_XL, VSX_BUILTIN_LXVD2X_V2DI,
RS6000_BTI_V2DI, RS6000_BTI_INTSI, ~RS6000_BTI_V2DI, 0 },
{ VSX_BUILTIN_VEC_XL, VSX_BUILTIN_LXVD2X_V2DI,
RS6000_BTI_V2DI, RS6000_BTI_INTSI, ~RS6000_BTI_long_long, 0 },
{ VSX_BUILTIN_VEC_XL, VSX_BUILTIN_LXVD2X_V2DI,
RS6000_BTI_V2DI, RS6000_BTI_INTSI, ~RS6000_BTI_INTDI, 0 },
{ VSX_BUILTIN_VEC_XL, VSX_BUILTIN_LXVD2X_V2DI,
RS6000_BTI_unsigned_V2DI, RS6000_BTI_INTSI,
~RS6000_BTI_unsigned_V2DI, 0 },
{ VSX_BUILTIN_VEC_XL, VSX_BUILTIN_LXVD2X_V2DI,
RS6000_BTI_unsigned_V2DI, RS6000_BTI_INTSI,
~RS6000_BTI_unsigned_long_long, 0 },
{ VSX_BUILTIN_VEC_XL, VSX_BUILTIN_LXVD2X_V2DI,
RS6000_BTI_V2DI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTDI, 0 },
{ VSX_BUILTIN_VEC_XL, VSX_BUILTIN_LXVW4X_V4SF,
RS6000_BTI_V4SF, RS6000_BTI_INTSI, ~RS6000_BTI_V4SF, 0 },
{ VSX_BUILTIN_VEC_XL, VSX_BUILTIN_LXVW4X_V4SF,
......@@ -3206,6 +3217,10 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = {
RS6000_BTI_V2DF, RS6000_BTI_INTSI, ~RS6000_BTI_V2DF, 0 },
{ VSX_BUILTIN_VEC_XL_BE, VSX_BUILTIN_LD_ELEMREV_V2DF,
RS6000_BTI_V2DF, RS6000_BTI_INTSI, ~RS6000_BTI_double, 0 },
{ VSX_BUILTIN_VEC_XL_BE, VSX_BUILTIN_LD_ELEMREV_V1TI,
RS6000_BTI_V1TI, RS6000_BTI_INTSI, ~RS6000_BTI_INTTI, 0 },
{ VSX_BUILTIN_VEC_XL_BE, VSX_BUILTIN_LD_ELEMREV_V1TI,
RS6000_BTI_unsigned_V1TI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTTI, 0 },
{ VSX_BUILTIN_VEC_XL_BE, VSX_BUILTIN_LD_ELEMREV_V2DI,
RS6000_BTI_V2DI, RS6000_BTI_INTSI, ~RS6000_BTI_V2DI, 0 },
{ VSX_BUILTIN_VEC_XL_BE, VSX_BUILTIN_LD_ELEMREV_V2DI,
......@@ -4089,6 +4104,10 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = {
RS6000_BTI_void, RS6000_BTI_V2DF, RS6000_BTI_INTSI, ~RS6000_BTI_V2DF },
{ VSX_BUILTIN_VEC_XST_BE, VSX_BUILTIN_ST_ELEMREV_V2DF,
RS6000_BTI_void, RS6000_BTI_V2DF, RS6000_BTI_INTSI, ~RS6000_BTI_double },
{ VSX_BUILTIN_VEC_XST_BE, VSX_BUILTIN_ST_ELEMREV_V1TI,
RS6000_BTI_void, RS6000_BTI_V1TI, RS6000_BTI_INTSI, ~RS6000_BTI_INTTI },
{ VSX_BUILTIN_VEC_XST_BE, VSX_BUILTIN_ST_ELEMREV_V1TI,
RS6000_BTI_void, RS6000_BTI_unsigned_V1TI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTTI },
{ VSX_BUILTIN_VEC_XST_BE, VSX_BUILTIN_ST_ELEMREV_V2DI,
RS6000_BTI_void, RS6000_BTI_V2DI, RS6000_BTI_INTSI, ~RS6000_BTI_V2DI },
{ VSX_BUILTIN_VEC_XST_BE, VSX_BUILTIN_ST_ELEMREV_V2DI,
......@@ -4190,9 +4209,19 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = {
{ VSX_BUILTIN_VEC_LD, VSX_BUILTIN_LXVD2X_V2DI,
RS6000_BTI_V2DI, RS6000_BTI_INTSI, ~RS6000_BTI_V2DI, 0 },
{ VSX_BUILTIN_VEC_LD, VSX_BUILTIN_LXVD2X_V2DI,
RS6000_BTI_V1TI, RS6000_BTI_INTSI, ~RS6000_BTI_INTTI, 0 },
{ VSX_BUILTIN_VEC_LD, VSX_BUILTIN_LXVD2X_V2DI,
RS6000_BTI_V2DI, RS6000_BTI_INTSI, ~RS6000_BTI_long_long, 0 },
{ VSX_BUILTIN_VEC_LD, VSX_BUILTIN_LXVD2X_V2DI,
RS6000_BTI_V2DI, RS6000_BTI_INTSI, ~RS6000_BTI_long_long, 0 },
{ VSX_BUILTIN_VEC_LD, VSX_BUILTIN_LXVD2X_V2DI,
RS6000_BTI_unsigned_V1TI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTTI, 0 },
{ VSX_BUILTIN_VEC_LD, VSX_BUILTIN_LXVD2X_V2DI,
RS6000_BTI_unsigned_V2DI, RS6000_BTI_INTSI,
~RS6000_BTI_unsigned_V2DI, 0 },
{ VSX_BUILTIN_VEC_LD, VSX_BUILTIN_LXVD2X_V2DI,
RS6000_BTI_unsigned_V2DI, RS6000_BTI_INTSI, ~RS6000_BTI_unsigned_long_long, 0 },
{ VSX_BUILTIN_VEC_LD, VSX_BUILTIN_LXVD2X_V2DI,
RS6000_BTI_bool_V2DI, RS6000_BTI_INTSI, ~RS6000_BTI_bool_V2DI, 0 },
{ VSX_BUILTIN_VEC_LD, VSX_BUILTIN_LXVW4X_V4SF,
RS6000_BTI_V4SF, RS6000_BTI_INTSI, ~RS6000_BTI_V4SF, 0 },
......@@ -4244,6 +4273,16 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = {
{ VSX_BUILTIN_VEC_ST, VSX_BUILTIN_STXVD2X_V2DF,
RS6000_BTI_void, RS6000_BTI_V2DF, RS6000_BTI_INTSI, ~RS6000_BTI_double },
{ VSX_BUILTIN_VEC_ST, VSX_BUILTIN_STXVD2X_V2DI,
RS6000_BTI_void, RS6000_BTI_V2DI, RS6000_BTI_INTDI,
~RS6000_BTI_long_long },
{ VSX_BUILTIN_VEC_ST, VSX_BUILTIN_STXVD2X_V2DI,
RS6000_BTI_void, RS6000_BTI_unsigned_V2DI, RS6000_BTI_INTDI,
~RS6000_BTI_unsigned_long_long },
{ VSX_BUILTIN_VEC_ST, VSX_BUILTIN_STXVD2X_V1TI,
RS6000_BTI_void, RS6000_BTI_V1TI, RS6000_BTI_INTDI, ~RS6000_BTI_INTTI },
{ VSX_BUILTIN_VEC_ST, VSX_BUILTIN_STXVD2X_V1TI,
RS6000_BTI_void, RS6000_BTI_unsigned_V1TI, RS6000_BTI_INTDI, ~RS6000_BTI_UINTTI },
{ VSX_BUILTIN_VEC_ST, VSX_BUILTIN_STXVD2X_V2DI,
RS6000_BTI_void, RS6000_BTI_V2DI, RS6000_BTI_INTSI, ~RS6000_BTI_V2DI },
{ VSX_BUILTIN_VEC_ST, VSX_BUILTIN_STXVD2X_V2DI,
RS6000_BTI_void, RS6000_BTI_unsigned_V2DI, RS6000_BTI_INTSI,
......
......@@ -873,10 +873,11 @@ insn_is_swappable_p (swap_web_entry *insn_entry, rtx insn,
if (insn_entry[i].is_store)
{
if (GET_CODE (body) == SET
&& GET_CODE (SET_SRC (body)) != UNSPEC)
&& GET_CODE (SET_SRC (body)) != UNSPEC
&& GET_CODE (SET_SRC (body)) != VEC_SELECT)
{
rtx lhs = SET_DEST (body);
/* Even without a swap, the LHS might be a vec_select for, say,
/* Even without a swap, the RHS might be a vec_select for, say,
a byte-reversing store. */
if (GET_CODE (lhs) != MEM)
return 0;
......
......@@ -15582,6 +15582,12 @@ altivec_expand_builtin (tree exp, rtx target, bool *expandedp)
unaligned-supporting store, so use a generic expander. For
little-endian, the exact element-reversing instruction must
be used. */
case VSX_BUILTIN_ST_ELEMREV_V1TI:
{
enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_store_v1ti
: CODE_FOR_vsx_st_elemrev_v1ti);
return altivec_expand_stv_builtin (code, exp);
}
case VSX_BUILTIN_ST_ELEMREV_V2DF:
{
enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_store_v2df
......@@ -15856,6 +15862,12 @@ altivec_expand_builtin (tree exp, rtx target, bool *expandedp)
: CODE_FOR_vsx_ld_elemrev_v2df);
return altivec_expand_lv_builtin (code, exp, target, false);
}
case VSX_BUILTIN_LD_ELEMREV_V1TI:
{
enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_load_v1ti
: CODE_FOR_vsx_ld_elemrev_v1ti);
return altivec_expand_lv_builtin (code, exp, target, false);
}
case VSX_BUILTIN_LD_ELEMREV_V2DI:
{
enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_load_v2di
......@@ -17457,6 +17469,10 @@ altivec_init_builtins (void)
= build_function_type_list (void_type_node,
V2DF_type_node, long_integer_type_node,
pvoid_type_node, NULL_TREE);
tree void_ftype_v1ti_long_pvoid
= build_function_type_list (void_type_node,
V1TI_type_node, long_integer_type_node,
pvoid_type_node, NULL_TREE);
tree void_ftype_v2di_long_pvoid
= build_function_type_list (void_type_node,
V2DI_type_node, long_integer_type_node,
......@@ -17612,6 +17628,8 @@ altivec_init_builtins (void)
VSX_BUILTIN_LD_ELEMREV_V16QI);
def_builtin ("__builtin_vsx_st_elemrev_v2df", void_ftype_v2df_long_pvoid,
VSX_BUILTIN_ST_ELEMREV_V2DF);
def_builtin ("__builtin_vsx_st_elemrev_v1ti", void_ftype_v1ti_long_pvoid,
VSX_BUILTIN_ST_ELEMREV_V1TI);
def_builtin ("__builtin_vsx_st_elemrev_v2di", void_ftype_v2di_long_pvoid,
VSX_BUILTIN_ST_ELEMREV_V2DI);
def_builtin ("__builtin_vsx_st_elemrev_v4sf", void_ftype_v4sf_long_pvoid,
......@@ -17935,6 +17953,8 @@ altivec_init_builtins (void)
= build_function_type_list (void_type_node,
V1TI_type_node, long_integer_type_node,
pvoid_type_node, NULL_TREE);
def_builtin ("__builtin_vsx_ld_elemrev_v1ti", v1ti_ftype_long_pcvoid,
VSX_BUILTIN_LD_ELEMREV_V1TI);
def_builtin ("__builtin_vsx_lxvd2x_v1ti", v1ti_ftype_long_pcvoid,
VSX_BUILTIN_LXVD2X_V1TI);
def_builtin ("__builtin_vsx_stxvd2x_v1ti", void_ftype_v1ti_long_pvoid,
......@@ -1302,6 +1302,17 @@
"lxvd2x %x0,%y1"
[(set_attr "type" "vecload")])
(define_insn "vsx_ld_elemrev_v1ti"
[(set (match_operand:V1TI 0 "vsx_register_operand" "=wa")
(vec_select:V1TI
(match_operand:V1TI 1 "memory_operand" "Z")
(parallel [(const_int 0)])))]
"VECTOR_MEM_VSX_P (V1TImode) && !BYTES_BIG_ENDIAN"
{
return "lxvd2x %x0,%y1\;xxpermdi %x0,%x0,%x0,2";
}
[(set_attr "type" "vecload")])
(define_insn "vsx_ld_elemrev_v2df"
[(set (match_operand:V2DF 0 "vsx_register_operand" "=wa")
(vec_select:V2DF
......@@ -1431,6 +1442,18 @@
"lxvb16x %x0,%y1"
[(set_attr "type" "vecload")])
(define_insn "vsx_st_elemrev_v1ti"
[(set (match_operand:V1TI 0 "memory_operand" "=Z")
(vec_select:V1TI
(match_operand:V1TI 1 "vsx_register_operand" "+wa")
(parallel [(const_int 0)])))
(clobber (match_dup 1))]
"VECTOR_MEM_VSX_P (V2DImode) && !BYTES_BIG_ENDIAN"
{
return "xxpermdi %x1,%x1,%x1,2\;stxvd2x %x1,%y0";
}
[(set_attr "type" "vecstore")])
(define_insn "vsx_st_elemrev_v2df"
[(set (match_operand:V2DF 0 "memory_operand" "=Z")
(vec_select:V2DF
......@@ -1481,7 +1504,7 @@
{
if (!TARGET_P9_VECTOR)
{
rtx subreg, perm[16], pcv;
rtx mem_subreg, subreg, perm[16], pcv;
rtx tmp = gen_reg_rtx (V8HImode);
/* 2 is leftmost element in register */
unsigned int reorder[16] = {13,12,15,14,9,8,11,10,5,4,7,6,1,0,3,2};
......@@ -1496,11 +1519,21 @@
emit_insn (gen_altivec_vperm_v8hi_direct (tmp, operands[1],
operands[1], pcv));
subreg = simplify_gen_subreg (V4SImode, tmp, V8HImode, 0);
emit_insn (gen_vsx_st_elemrev_v4si (subreg, operands[0]));
mem_subreg = simplify_gen_subreg (V4SImode, operands[0], V8HImode, 0);
emit_insn (gen_vsx_st_elemrev_v4si (mem_subreg, subreg));
DONE;
}
})
(define_insn "*vsx_st_elemrev_v2di_internal"
[(set (match_operand:V2DI 0 "memory_operand" "=Z")
(vec_select:V2DI
(match_operand:V2DI 1 "vsx_register_operand" "wa")
(parallel [(const_int 1) (const_int 0)])))]
"VECTOR_MEM_VSX_P (V2DImode) && !BYTES_BIG_ENDIAN && TARGET_P9_VECTOR"
"stxvd2x %x1,%y0"
[(set_attr "type" "vecstore")])
(define_insn "*vsx_st_elemrev_v8hi_internal"
[(set (match_operand:V8HI 0 "memory_operand" "=Z")
(vec_select:V8HI
......@@ -1529,7 +1562,7 @@
{
if (!TARGET_P9_VECTOR)
{
rtx subreg, perm[16], pcv;
rtx mem_subreg, subreg, perm[16], pcv;
rtx tmp = gen_reg_rtx (V16QImode);
/* 3 is leftmost element in register */
unsigned int reorder[16] = {12,13,14,15,8,9,10,11,4,5,6,7,0,1,2,3};
......@@ -1544,7 +1577,8 @@
emit_insn (gen_altivec_vperm_v16qi_direct (tmp, operands[1],
operands[1], pcv));
subreg = simplify_gen_subreg (V4SImode, tmp, V16QImode, 0);
emit_insn (gen_vsx_st_elemrev_v4si (subreg, operands[0]));
mem_subreg = simplify_gen_subreg (V4SImode, operands[0], V16QImode, 0);
emit_insn (gen_vsx_st_elemrev_v4si (mem_subreg, subreg));
DONE;
}
})
......
2018-01-22 Carl Love <cel@us.ibm.com>
* gcc.target/powerpc/powerpc.exp: Add torture tests for
builtins-4-runnable.c, builtins-6-runnable.c,
builtins-5-p9-runnable.c, builtins-6-p9-runnable.c.
* gcc.target/powerpc/builtins-6-runnable.c: New test file.
* gcc.target/powerpc/builtins-4-runnable.c: Add additional tests
for signed/unsigned 128-bit and long long int loads.
2018-01-22 Marek Polacek <polacek@redhat.com>
PR c++/81933
......
......@@ -49,4 +49,16 @@ gcc-dg-runtest [list $srcdir/$subdir/savres.c] "" $alti
# All done.
torture-finish
torture-init
# Test load/store builtins at multiple optimizations
set-torture-options [list -O0 -Os -O1 -O2 -O3]
gcc-dg-runtest [list $srcdir/$subdir/builtins-4-runnable.c \
$srcdir/$subdir/builtins-6-runnable.c \
$srcdir/$subdir/builtins-5-p9-runnable.c \
$srcdir/$subdir/builtins-6-p9-runnable.c] "" $DEFAULT_CFLAGS
# All done.
torture-finish
dg-finish
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment