Commit 3278804e by Andreas Krebbel Committed by Andreas Krebbel

S/390: arch13: vec_reve element order reversal builtins

gcc/ChangeLog:

2019-04-02  Andreas Krebbel  <krebbel@linux.ibm.com>

	* config/s390/s390-builtin-types.def: Add new builtin function type.
	* config/s390/s390-builtins.def: Add overloaded builtin
	s390_vec_reve and low-level builtins for s390_vler and s390_vster.
	* config/s390/s390.md (UNSPEC_VEC_ELTSWAP): New constant definition.
	* config/s390/vecintrin.h (vec_reve): New builtin name definition.
	* config/s390/vx-builtins.md (V_HW_HSD): New mode iterator.
	("eltswap<mode>"): New expander.
	("*eltswapv16qi", "*eltswap<mode>", "*eltswap<mode>_emu"): New
	insn definitions.

gcc/testsuite/ChangeLog:

2019-04-02  Andreas Krebbel  <krebbel@linux.ibm.com>

	* gcc.target/s390/zvector/vec-reve-load-byte-z14.c: New test.
	* gcc.target/s390/zvector/vec-reve-load-byte.c: New test.
	* gcc.target/s390/zvector/vec-reve-load-halfword-z14.c: New test.
	* gcc.target/s390/zvector/vec-reve-load-halfword.c: New test.
	* gcc.target/s390/zvector/vec-reve-store-byte-z14.c: New test.
	* gcc.target/s390/zvector/vec-reve-store-byte.c: New test.

From-SVN: r270085
parent 1b3bbaf6
2019-04-02 Andreas Krebbel <krebbel@linux.ibm.com> 2019-04-02 Andreas Krebbel <krebbel@linux.ibm.com>
* config/s390/s390-builtin-types.def: Add new builtin function type.
* config/s390/s390-builtins.def: Add overloaded builtin
s390_vec_reve and low-level builtins for s390_vler and s390_vster.
* config/s390/s390.md (UNSPEC_VEC_ELTSWAP): New constant definition.
* config/s390/vecintrin.h (vec_reve): New builtin name definition.
* config/s390/vx-builtins.md (V_HW_HSD): New mode iterator.
("eltswap<mode>"): New expander.
("*eltswapv16qi", "*eltswap<mode>", "*eltswap<mode>_emu"): New
insn definitions.
2019-04-02 Andreas Krebbel <krebbel@linux.ibm.com>
* config/s390/s390-builtin-types.def: Add new builtin function types. * config/s390/s390-builtin-types.def: Add new builtin function types.
* config/s390/s390-builtins.def: Add overloaded builtin * config/s390/s390-builtins.def: Add overloaded builtin
s390_vec_revb. Add low-level builtins for vlbr and vstbr s390_vec_revb. Add low-level builtins for vlbr and vstbr
......
...@@ -382,6 +382,7 @@ DEF_OV_TYPE (BT_OV_BV16QI_UV16QI_UV16QI_UV16QI, BT_BV16QI, BT_UV16QI, BT_UV16QI, ...@@ -382,6 +382,7 @@ DEF_OV_TYPE (BT_OV_BV16QI_UV16QI_UV16QI_UV16QI, BT_BV16QI, BT_UV16QI, BT_UV16QI,
DEF_OV_TYPE (BT_OV_BV16QI_UV16QI_UV16QI_UV16QI_INTPTR, BT_BV16QI, BT_UV16QI, BT_UV16QI, BT_UV16QI, BT_INTPTR) DEF_OV_TYPE (BT_OV_BV16QI_UV16QI_UV16QI_UV16QI_INTPTR, BT_BV16QI, BT_UV16QI, BT_UV16QI, BT_UV16QI, BT_INTPTR)
DEF_OV_TYPE (BT_OV_BV16QI_V16QI_V16QI, BT_BV16QI, BT_V16QI, BT_V16QI) DEF_OV_TYPE (BT_OV_BV16QI_V16QI_V16QI, BT_BV16QI, BT_V16QI, BT_V16QI)
DEF_OV_TYPE (BT_OV_BV16QI_V16QI_V16QI_INTPTR, BT_BV16QI, BT_V16QI, BT_V16QI, BT_INTPTR) DEF_OV_TYPE (BT_OV_BV16QI_V16QI_V16QI_INTPTR, BT_BV16QI, BT_V16QI, BT_V16QI, BT_INTPTR)
DEF_OV_TYPE (BT_OV_BV2DI_BV2DI, BT_BV2DI, BT_BV2DI)
DEF_OV_TYPE (BT_OV_BV2DI_BV2DI_BV2DI, BT_BV2DI, BT_BV2DI, BT_BV2DI) DEF_OV_TYPE (BT_OV_BV2DI_BV2DI_BV2DI, BT_BV2DI, BT_BV2DI, BT_BV2DI)
DEF_OV_TYPE (BT_OV_BV2DI_BV2DI_BV2DI_BV2DI, BT_BV2DI, BT_BV2DI, BT_BV2DI, BT_BV2DI) DEF_OV_TYPE (BT_OV_BV2DI_BV2DI_BV2DI_BV2DI, BT_BV2DI, BT_BV2DI, BT_BV2DI, BT_BV2DI)
DEF_OV_TYPE (BT_OV_BV2DI_BV2DI_BV2DI_INT, BT_BV2DI, BT_BV2DI, BT_BV2DI, BT_INT) DEF_OV_TYPE (BT_OV_BV2DI_BV2DI_BV2DI_INT, BT_BV2DI, BT_BV2DI, BT_BV2DI, BT_INT)
......
...@@ -2892,3 +2892,34 @@ B_DEF (s390_vstbrg, bswapv2di, 0, ...@@ -2892,3 +2892,34 @@ B_DEF (s390_vstbrg, bswapv2di, 0,
B_DEF (s390_vstbrq, bswapv1ti, 0, B_VX, 0, BT_FN_V1TI_V1TI) B_DEF (s390_vstbrq, bswapv1ti, 0, B_VX, 0, BT_FN_V1TI_V1TI)
B_DEF (s390_vstbrf_flt, bswapv4sf, 0, B_VXE, 0, BT_FN_V4SF_V4SF) B_DEF (s390_vstbrf_flt, bswapv4sf, 0, B_VXE, 0, BT_FN_V4SF_V4SF)
B_DEF (s390_vstbrg_dbl, bswapv2df, 0, B_VX, 0, BT_FN_V2DF_V2DF) B_DEF (s390_vstbrg_dbl, bswapv2df, 0, B_VX, 0, BT_FN_V2DF_V2DF)
/* Returns a vector with the elements of the input vector OP0 in reversed order. */
OB_DEF (s390_vec_reve, s390_vec_reve_b8, s390_vec_reve_dbl, B_VX, BT_FN_OV4SI_OV4SI)
OB_DEF_VAR (s390_vec_reve_b8, s390_vlerb, 0, 0, BT_OV_BV16QI_BV16QI)
OB_DEF_VAR (s390_vec_reve_s8, s390_vlerb, 0, 0, BT_OV_V16QI_V16QI)
OB_DEF_VAR (s390_vec_reve_u8, s390_vlerb, 0, 0, BT_OV_UV16QI_UV16QI)
OB_DEF_VAR (s390_vec_reve_b16, s390_vlerh, 0, 0, BT_OV_BV8HI_BV8HI)
OB_DEF_VAR (s390_vec_reve_s16, s390_vlerh, 0, 0, BT_OV_V8HI_V8HI)
OB_DEF_VAR (s390_vec_reve_u16, s390_vlerh, 0, 0, BT_OV_UV8HI_UV8HI)
OB_DEF_VAR (s390_vec_reve_b32, s390_vlerf, 0, 0, BT_OV_BV4SI_BV4SI)
OB_DEF_VAR (s390_vec_reve_s32, s390_vlerf, 0, 0, BT_OV_V4SI_V4SI)
OB_DEF_VAR (s390_vec_reve_u32, s390_vlerf, 0, 0, BT_OV_UV4SI_UV4SI)
OB_DEF_VAR (s390_vec_reve_b64, s390_vlerg, 0, 0, BT_OV_BV2DI_BV2DI)
OB_DEF_VAR (s390_vec_reve_s64, s390_vlerg, 0, 0, BT_OV_V2DI_V2DI)
OB_DEF_VAR (s390_vec_reve_u64, s390_vlerg, 0, 0, BT_OV_UV2DI_UV2DI)
OB_DEF_VAR (s390_vec_reve_flt, s390_vlerf_flt, 0, B_VXE, BT_OV_V4SF_V4SF)
OB_DEF_VAR (s390_vec_reve_dbl, s390_vlerg_dbl, 0, 0, BT_OV_V2DF_V2DF)
B_DEF (s390_vlerb, eltswapv16qi, 0, B_VX, 0, BT_FN_V16QI_V16QI)
B_DEF (s390_vlerh, eltswapv8hi, 0, B_VX, 0, BT_FN_V8HI_V8HI)
B_DEF (s390_vlerf, eltswapv4si, 0, B_VX, 0, BT_FN_V4SI_V4SI)
B_DEF (s390_vlerg, eltswapv2di, 0, B_VX, 0, BT_FN_V2DI_V2DI)
B_DEF (s390_vlerf_flt, eltswapv4sf, 0, B_VXE, 0, BT_FN_V4SF_V4SF)
B_DEF (s390_vlerg_dbl, eltswapv2df, 0, B_VX, 0, BT_FN_V2DF_V2DF)
B_DEF (s390_vsterb, eltswapv16qi, 0, B_VX, 0, BT_FN_V16QI_V16QI)
B_DEF (s390_vsterh, eltswapv8hi, 0, B_VX, 0, BT_FN_V8HI_V8HI)
B_DEF (s390_vsterf, eltswapv4si, 0, B_VX, 0, BT_FN_V4SI_V4SI)
B_DEF (s390_vsterg, eltswapv2di, 0, B_VX, 0, BT_FN_V2DI_V2DI)
B_DEF (s390_vsterf_flt, eltswapv4sf, 0, B_VXE, 0, BT_FN_V4SF_V4SF)
B_DEF (s390_vsterg_dbl, eltswapv2df, 0, B_VX, 0, BT_FN_V2DF_V2DF)
...@@ -238,6 +238,8 @@ ...@@ -238,6 +238,8 @@
UNSPEC_VEC_VFMIN UNSPEC_VEC_VFMIN
UNSPEC_VEC_VFMAX UNSPEC_VEC_VFMAX
UNSPEC_VEC_ELTSWAP
]) ])
;; ;;
......
...@@ -312,4 +312,5 @@ __lcbb(const void *ptr, int bndry) ...@@ -312,4 +312,5 @@ __lcbb(const void *ptr, int bndry)
#define vec_sqrt __builtin_s390_vec_sqrt #define vec_sqrt __builtin_s390_vec_sqrt
#define vec_fp_test_data_class __builtin_s390_vec_fp_test_data_class #define vec_fp_test_data_class __builtin_s390_vec_fp_test_data_class
#define vec_revb __builtin_s390_vec_revb #define vec_revb __builtin_s390_vec_revb
#define vec_reve __builtin_s390_vec_reve
#endif /* _VECINTRIN_H */ #endif /* _VECINTRIN_H */
...@@ -22,7 +22,7 @@ ...@@ -22,7 +22,7 @@
(define_mode_iterator V_HW_32_64 [V4SI V2DI V2DF (V4SF "TARGET_VXE")]) (define_mode_iterator V_HW_32_64 [V4SI V2DI V2DF (V4SF "TARGET_VXE")])
(define_mode_iterator VI_HW_SD [V4SI V2DI]) (define_mode_iterator VI_HW_SD [V4SI V2DI])
(define_mode_iterator V_HW_HSD [V8HI V4SI V2DI V2DF]) (define_mode_iterator V_HW_HSD [V8HI V4SI (V4SF "TARGET_VXE") V2DI V2DF])
(define_mode_iterator V_HW_4 [V4SI V4SF]) (define_mode_iterator V_HW_4 [V4SI V4SF])
; Full size vector modes with more than one element which are directly supported in vector registers by the hardware. ; Full size vector modes with more than one element which are directly supported in vector registers by the hardware.
(define_mode_iterator VEC_HW [V16QI V8HI V4SI V2DI V2DF (V4SF "TARGET_VXE")]) (define_mode_iterator VEC_HW [V16QI V8HI V4SI V2DI V2DF (V4SF "TARGET_VXE")])
...@@ -2058,3 +2058,84 @@ ...@@ -2058,3 +2058,84 @@
"TARGET_VXE" "TARGET_VXE"
"<vw>fmax<sdx>b\t%v0,%v1,%v2,%b3" "<vw>fmax<sdx>b\t%v0,%v1,%v2,%b3"
[(set_attr "op_type" "VRR")]) [(set_attr "op_type" "VRR")])
; The element reversal builtins introduced with arch13 have been made
; available also for older CPUs down to z13.
(define_expand "eltswap<mode>"
[(set (match_operand:VEC_HW 0 "nonimmediate_operand" "")
(unspec:VEC_HW [(match_operand:VEC_HW 1 "nonimmediate_operand" "")]
UNSPEC_VEC_ELTSWAP))]
"TARGET_VX")
; The byte element reversal is implemented as 128 bit byte swap.
; Alternatively this could be emitted as bswap:V1TI but the required
; subregs appear to confuse combine.
(define_insn "*eltswapv16qi"
[(set (match_operand:V16QI 0 "nonimmediate_operand" "=v,v,R")
(unspec:V16QI [(match_operand:V16QI 1 "nonimmediate_operand" "v,R,v")]
UNSPEC_VEC_ELTSWAP))]
"TARGET_VXE2"
"@
#
vlbrq\t%v0,%v1
vstbrq\t%v1,%v0"
[(set_attr "op_type" "*,VRX,VRX")])
; vlerh, vlerf, vlerg, vsterh, vsterf, vsterg
(define_insn "*eltswap<mode>"
[(set (match_operand:V_HW_HSD 0 "nonimmediate_operand" "=v,v,R")
(unspec:V_HW_HSD [(match_operand:V_HW_HSD 1 "nonimmediate_operand" "v,R,v")]
UNSPEC_VEC_ELTSWAP))]
"TARGET_VXE2"
"@
#
vler<bhfgq>\t%v0,%v1
vster<bhfgq>\t%v1,%v0"
[(set_attr "op_type" "*,VRX,VRX")])
; arch13 has instructions for doing element reversal from mem to reg
; or the other way around. For reg to reg or on pre arch13 machines
; we have to emulate it with vector permute.
(define_insn_and_split "*eltswap<mode>_emu"
[(set (match_operand:VEC_HW 0 "nonimmediate_operand" "=vR")
(unspec:VEC_HW [(match_operand:VEC_HW 1 "nonimmediate_operand" "vR")]
UNSPEC_VEC_ELTSWAP))]
"TARGET_VX && can_create_pseudo_p ()"
"#"
"&& ((!memory_operand (operands[0], <MODE>mode)
&& !memory_operand (operands[1], <MODE>mode))
|| !TARGET_VXE2)"
[(set (match_dup 3)
(unspec:V16QI [(match_dup 4)
(match_dup 4)
(match_dup 2)]
UNSPEC_VEC_PERM))
(set (match_dup 0) (subreg:VEC_HW (match_dup 3) 0))]
{
static char p[4][16] =
{ { 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 }, /* Q */
{ 14, 15, 12, 13, 10, 11, 8, 9, 6, 7, 4, 5, 2, 3, 0, 1 }, /* H */
{ 12, 13, 14, 15, 8, 9, 10, 11, 4, 5, 6, 7, 0, 1, 2, 3 }, /* S */
{ 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7 } }; /* D */
char *perm;
rtx perm_rtx[16], constv;
switch (GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)))
{
case 1: perm = p[0]; break;
case 2: perm = p[1]; break;
case 4: perm = p[2]; break;
case 8: perm = p[3]; break;
default: gcc_unreachable ();
}
for (int i = 0; i < 16; i++)
perm_rtx[i] = GEN_INT (perm[i]);
operands[1] = force_reg (<MODE>mode, operands[1]);
operands[2] = gen_reg_rtx (V16QImode);
operands[3] = gen_reg_rtx (V16QImode);
operands[4] = simplify_gen_subreg (V16QImode, operands[1], <MODE>mode, 0);
constv = force_const_mem (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, perm_rtx)));
emit_move_insn (operands[2], constv);
})
2019-04-02 Andreas Krebbel <krebbel@linux.ibm.com> 2019-04-02 Andreas Krebbel <krebbel@linux.ibm.com>
* gcc.target/s390/zvector/vec-reve-load-byte-z14.c: New test.
* gcc.target/s390/zvector/vec-reve-load-byte.c: New test.
* gcc.target/s390/zvector/vec-reve-load-halfword-z14.c: New test.
* gcc.target/s390/zvector/vec-reve-load-halfword.c: New test.
* gcc.target/s390/zvector/vec-reve-store-byte-z14.c: New test.
* gcc.target/s390/zvector/vec-reve-store-byte.c: New test.
2019-04-02 Andreas Krebbel <krebbel@linux.ibm.com>
* gcc.target/s390/zvector/vec-revb-load-double-z14.c: New test. * gcc.target/s390/zvector/vec-revb-load-double-z14.c: New test.
* gcc.target/s390/zvector/vec-revb-load-double.c: New test. * gcc.target/s390/zvector/vec-revb-load-double.c: New test.
* gcc.target/s390/zvector/vec-revb-store-double-z14.c: New test. * gcc.target/s390/zvector/vec-revb-store-double-z14.c: New test.
......
/* { dg-do compile } */
/* { dg-options "-O3 -mzarch -march=z14 -mzvector" } */
#include <vecintrin.h>
vector signed char
test (vector signed char x)
{
return vec_reve (x);
}
vector signed char
test2 (vector signed char *x)
{
return vec_reve (*x);
}
vector signed char
test3 (signed char *x)
{
return vec_reve (vec_xl (0, x));
}
/* { dg-final { scan-assembler-times "vperm\t" 3 } } */
/* { dg-do compile } */
/* { dg-options "-O3 -mzarch -march=arch13 -mzvector" } */
/* The vector byte element reversal is actually implemented with a 128
bit bswap. */
#include <vecintrin.h>
vector signed char
test (vector signed char x)
{
return vec_reve (x);
}
/* { dg-final { scan-assembler-times "vperm\t" 1 } } */
vector signed char
test2 (vector signed char *x)
{
return vec_reve (*x);
}
vector signed char
test3 (signed char *x)
{
return vec_reve (vec_xl (0, x));
}
/* { dg-final { scan-assembler-times "vlbrq\t" 2 } } */
/* { dg-do compile } */
/* { dg-options "-O3 -mzarch -march=z14 -mzvector" } */
#include <vecintrin.h>
vector signed short
foo (vector signed short x)
{
return vec_reve (x);
}
vector signed short
bar (vector signed short *x)
{
return vec_reve (*x);
}
vector signed short
baz (signed short *x)
{
return vec_reve (vec_xl (0, x));
}
/* { dg-final { scan-assembler-times "vperm\t" 3 } } */
/* { dg-do compile } */
/* { dg-options "-O3 -mzarch -march=arch13 -mzvector" } */
#include <vecintrin.h>
vector signed short
foo (vector signed short x)
{
return vec_reve (x);
}
/* { dg-final { scan-assembler-times "vperm\t" 1 } } */
vector signed short
bar (vector signed short *x)
{
return vec_reve (*x);
}
vector signed short
baz (signed short *x)
{
return vec_reve (vec_xl (0, x));
}
/* { dg-final { scan-assembler-times "vlerh\t" 2 } } */
/* { dg-do compile } */
/* { dg-options "-O3 -mzarch -march=z14 -mzvector" } */
#include <vecintrin.h>
/* reg -> mem */
void
foo (vector signed char *target, vector signed char x)
{
*target = vec_reve (x);
}
void
bar (signed char *target, vector signed char x)
{
vec_xst (vec_reve (x), 0, target);
}
/* mem -> mem */
void
baz (vector signed char *target, vector signed char *x)
{
*target = vec_reve (*x);
}
/* { dg-final { scan-assembler-times "vperm\t" 3 } } */
/* { dg-do compile } */
/* { dg-options "-O3 -mzarch -march=arch13 -mzvector" } */
#include <vecintrin.h>
/* reg -> mem */
void
foo (vector signed char *target, vector signed char x)
{
*target = vec_reve (x);
}
void
bar (signed char *target, vector signed char x)
{
vec_xst (vec_reve (x), 0, target);
}
/* { dg-final { scan-assembler-times "vstbrq\t" 2 } } */
/* mem -> mem: This becomes vlbrq + vst */
void
baz (vector signed char *target, vector signed char *x)
{
*target = vec_reve (*x);
}
/* { dg-final { scan-assembler-times "vlbrq\t" 1 } } */
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment