Commit 0fac5151 by Richard Henderson Committed by Richard Henderson

i386-builtin-types.awk (DEF_VECTOR_TYPE): Allow an optional 3rd argument to define the mode.

	* config/i386/i386-builtin-types.awk (DEF_VECTOR_TYPE): Allow an
	optional 3rd argument to define the mode.
	* config/i386/i386-builtin-types.def (UQI, UHI, USI, UDI): New.
	(V2UDI, V4USI, V8UHI, V16UQI): New.
	(V4SF_FTYPE_V4SF_V4SF_V4SI, V2UDI_FTYPE_V2UDI_V2UDI_V2UDI,
	V4USI_FTYPE_V4USI_V4USI_V4USI, V8UHI_FTYPE_V8UHI_V8UHI_V8UHI,
	V16UQI_FTYPE_V16UQI_V16UQI_V16UQI): New.
	* config/i386/i386-modes.def: Rearrange for double-wide AVX.
	* config/i386/i386-protos.h (ix86_expand_vec_extract_even_odd): New.
	* config/i386/i386.c (IX86_BUILTIN_VEC_PERM_*): New.
	(bdesc_args): Add the builtin definitions to match.
	(ix86_expand_builtin): Expand them.
	(ix86_builtin_vectorization_cost): Rename from
	x86_builtin_vectorization_cost.
	(ix86_vectorize_builtin_vec_perm, struct expand_vec_perm_d,
	doublesize_vector_mode, expand_vselect, expand_vselect_vconcat,
	expand_vec_perm_blend, expand_vec_perm_vpermil,
	expand_vec_perm_pshufb, expand_vec_perm_1,
	expand_vec_perm_pshuflw_pshufhw, expand_vec_perm_palignr,
	expand_vec_perm_interleave2, expand_vec_perm_pshufb2,
	expand_vec_perm_even_odd_1, expand_vec_perm_even_odd,
	ix86_expand_vec_perm_builtin_1, extract_vec_perm_cst,
	ix86_expand_vec_perm_builtin, ix86_vectorize_builtin_vec_perm_ok,
	ix86_expand_vec_extract_even_odd, TARGET_VECTORIZE_BUILTIN_VEC_PERM,
	TARGET_VECTORIZE_BUILTIN_VEC_PERM_OK): New.
	* sse.md (SSEMODE_EO): New.
	(vec_extract_even<mode>): Use SSEMODE_EO and
	ix86_expand_vec_extract_even_odd.
	(vec_extract_odd<mode>): Likewise.
	(mulv16qi3, vec_pack_trunc_v8hi, vec_pack_trunc_v4si,
	vec_pack_trunc_v2di): Use ix86_expand_vec_extract_even_odd.

testsuite/
	* gcc.dg/vect/slp-21.c: Succeed with vect_extract_even_odd too.

	* lib/target-supports.exp
	(check_effective_target_vect_extract_even_odd): Add x86.

	* gcc.target/i386/isa-check.h: New.
	* gcc.target/i386/vperm-2-2.inc, gcc.target/i386/vperm-4-1.inc,
	gcc.target/i386/vperm-4-2.inc, gcc.target/i386/vperm-v2df.c,
	gcc.target/i386/vperm-v2di.c, gcc.target/i386/vperm-v4sf-1.c,
	gcc.target/i386/vperm-v4sf-2.c, gcc.target/i386/vperm-v4si-1.c,
	gcc.target/i386/vperm-v4si-2.c, gcc.target/i386/vperm-v4si-2x.c,
	gcc.target/i386/vperm.pl: New files.

From-SVN: r154667
parent dac9d53a
2009-11-25 Richard Henderson <rth@redhat.com> 2009-11-25 Richard Henderson <rth@redhat.com>
* config/i386/i386-builtin-types.awk (DEF_VECTOR_TYPE): Allow an
optional 3rd argument to define the mode.
* config/i386/i386-builtin-types.def (UQI, UHI, USI, UDI): New.
(V2UDI, V4USI, V8UHI, V16UQI): New.
(V4SF_FTYPE_V4SF_V4SF_V4SI, V2UDI_FTYPE_V2UDI_V2UDI_V2UDI,
V4USI_FTYPE_V4USI_V4USI_V4USI, V8UHI_FTYPE_V8UHI_V8UHI_V8UHI,
V16UQI_FTYPE_V16UQI_V16UQI_V16UQI): New.
* config/i386/i386-modes.def: Rearrange for double-wide AVX.
* config/i386/i386-protos.h (ix86_expand_vec_extract_even_odd): New.
* config/i386/i386.c (IX86_BUILTIN_VEC_PERM_*): New.
(bdesc_args): Add the builtin definitions to match.
(ix86_expand_builtin): Expand them.
(ix86_builtin_vectorization_cost): Rename from
x86_builtin_vectorization_cost.
(ix86_vectorize_builtin_vec_perm, struct expand_vec_perm_d,
doublesize_vector_mode, expand_vselect, expand_vselect_vconcat,
expand_vec_perm_blend, expand_vec_perm_vpermil,
expand_vec_perm_pshufb, expand_vec_perm_1,
expand_vec_perm_pshuflw_pshufhw, expand_vec_perm_palignr,
expand_vec_perm_interleave2, expand_vec_perm_pshufb2,
expand_vec_perm_even_odd_1, expand_vec_perm_even_odd,
ix86_expand_vec_perm_builtin_1, extract_vec_perm_cst,
ix86_expand_vec_perm_builtin, ix86_vectorize_builtin_vec_perm_ok,
ix86_expand_vec_extract_even_odd, TARGET_VECTORIZE_BUILTIN_VEC_PERM,
TARGET_VECTORIZE_BUILTIN_VEC_PERM_OK): New.
* sse.md (SSEMODE_EO): New.
(vec_extract_even<mode>): Use SSEMODE_EO and
ix86_expand_vec_extract_even_odd.
(vec_extract_odd<mode>): Likewise.
(mulv16qi3, vec_pack_trunc_v8hi, vec_pack_trunc_v4si,
vec_pack_trunc_v2di): Use ix86_expand_vec_extract_even_odd.
2009-11-25 Richard Henderson <rth@redhat.com>
* target.h (targetm.vectorize.builtin_vec_perm_ok): New. * target.h (targetm.vectorize.builtin_vec_perm_ok): New.
* target-def.h (TARGET_VECTORIZE_BUILTIN_VEC_PERM_OK): New. * target-def.h (TARGET_VECTORIZE_BUILTIN_VEC_PERM_OK): New.
* hooks.h, hooks.c (hook_bool_tree_tree_true): New. * hooks.h, hooks.c (hook_bool_tree_tree_true): New.
...@@ -69,11 +69,12 @@ $1 == "DEF_PRIMITIVE_TYPE" { ...@@ -69,11 +69,12 @@ $1 == "DEF_PRIMITIVE_TYPE" {
} }
$1 == "DEF_VECTOR_TYPE" { $1 == "DEF_VECTOR_TYPE" {
if (NF == 4) { if (NF == 4 || NF == 5) {
check_type($3) check_type($3)
type_hash[$2] = 1 type_hash[$2] = 1
vect_mode[vect_defs] = $2 vect_name[vect_defs] = $2
vect_base[vect_defs] = $3 vect_base[vect_defs] = $3
vect_mode[vect_defs] = (NF == 5 ? $4 : $2)
vect_defs++ vect_defs++
} else } else
do_error("DEF_VECTOR_TYPE expected 2 arguments") do_error("DEF_VECTOR_TYPE expected 2 arguments")
...@@ -152,8 +153,8 @@ END { ...@@ -152,8 +153,8 @@ END {
print " IX86_BT_" prim_name[i] "," print " IX86_BT_" prim_name[i] ","
print " IX86_BT_LAST_PRIM = IX86_BT_" prim_name[i-1] "," print " IX86_BT_LAST_PRIM = IX86_BT_" prim_name[i-1] ","
for (i = 0; i < vect_defs; ++i) for (i = 0; i < vect_defs; ++i)
print " IX86_BT_" vect_mode[i] "," print " IX86_BT_" vect_name[i] ","
print " IX86_BT_LAST_VECT = IX86_BT_" vect_mode[i-1] "," print " IX86_BT_LAST_VECT = IX86_BT_" vect_name[i-1] ","
for (i = 0; i < ptr_defs; ++i) for (i = 0; i < ptr_defs; ++i)
print " IX86_BT_" ptr_name[i] "," print " IX86_BT_" ptr_name[i] ","
print " IX86_BT_LAST_PTR = IX86_BT_" ptr_name[i-1] "," print " IX86_BT_LAST_PTR = IX86_BT_" ptr_name[i-1] ","
......
...@@ -10,12 +10,12 @@ ...@@ -10,12 +10,12 @@
# At present, that's all that's required; revisit if it turns out # At present, that's all that's required; revisit if it turns out
# that we need more than that. # that we need more than that.
# #
# DEF_VECTOR_TYPE (ENUM, TYPE) # DEF_VECTOR_TYPE (ENUM, TYPE [, MODE])
# #
# This describes a vector type. ENUM doubles as both the identifier # This describes a vector type. ENUM is an identifier as above.
# to define in the enumeration as well as the mode of the vector; TYPE is # TYPE is the enumeral for the inner type which should of course
# the enumeral for the inner type which should of course name a type of # name a type of the proper inner mode. If present, MODE is the
# the proper inner mode. # machine mode, else the machine mode should be the same as ENUM.
# #
# DEF_POINTER_TYPE (ENUM, TYPE [, CONST]) # DEF_POINTER_TYPE (ENUM, TYPE [, CONST])
# #
...@@ -40,10 +40,22 @@ ...@@ -40,10 +40,22 @@
DEF_PRIMITIVE_TYPE (VOID, void_type_node) DEF_PRIMITIVE_TYPE (VOID, void_type_node)
DEF_PRIMITIVE_TYPE (CHAR, char_type_node) DEF_PRIMITIVE_TYPE (CHAR, char_type_node)
DEF_PRIMITIVE_TYPE (UCHAR, unsigned_char_type_node) DEF_PRIMITIVE_TYPE (UCHAR, unsigned_char_type_node)
DEF_PRIMITIVE_TYPE (QI, intQI_type_node) # ??? Logically this should be intQI_type_node, but that maps to "signed char"
# which is a different type than "char" even if "char" is signed. This must
# match the usage in emmintrin.h and changing this would change name mangling
# and so is not advisable.
DEF_PRIMITIVE_TYPE (QI, char_type_node)
DEF_PRIMITIVE_TYPE (HI, intHI_type_node) DEF_PRIMITIVE_TYPE (HI, intHI_type_node)
DEF_PRIMITIVE_TYPE (SI, intSI_type_node) DEF_PRIMITIVE_TYPE (SI, intSI_type_node)
# ??? Logically this should be intDI_type_node, but that maps to "long"
# with 64-bit, and that's not how the emmintrin.h is written. Again,
# changing this would change name mangling.
DEF_PRIMITIVE_TYPE (DI, long_long_integer_type_node) DEF_PRIMITIVE_TYPE (DI, long_long_integer_type_node)
DEF_PRIMITIVE_TYPE (UQI, unsigned_intQI_type_node)
DEF_PRIMITIVE_TYPE (UHI, unsigned_intHI_type_node)
DEF_PRIMITIVE_TYPE (USI, unsigned_intSI_type_node)
DEF_PRIMITIVE_TYPE (UDI, long_long_unsigned_type_node)
# ??? Some of the types below should use the mode types above.
DEF_PRIMITIVE_TYPE (USHORT, short_unsigned_type_node) DEF_PRIMITIVE_TYPE (USHORT, short_unsigned_type_node)
DEF_PRIMITIVE_TYPE (INT, integer_type_node) DEF_PRIMITIVE_TYPE (INT, integer_type_node)
DEF_PRIMITIVE_TYPE (UINT, unsigned_type_node) DEF_PRIMITIVE_TYPE (UINT, unsigned_type_node)
...@@ -59,23 +71,33 @@ DEF_PRIMITIVE_TYPE (DOUBLE, double_type_node) ...@@ -59,23 +71,33 @@ DEF_PRIMITIVE_TYPE (DOUBLE, double_type_node)
DEF_PRIMITIVE_TYPE (FLOAT80, float80_type_node) DEF_PRIMITIVE_TYPE (FLOAT80, float80_type_node)
DEF_PRIMITIVE_TYPE (FLOAT128, float128_type_node) DEF_PRIMITIVE_TYPE (FLOAT128, float128_type_node)
DEF_VECTOR_TYPE (V16HI, HI) # MMX vectors
DEF_VECTOR_TYPE (V16QI, CHAR)
DEF_VECTOR_TYPE (V1DI, DI)
DEF_VECTOR_TYPE (V2DF, DOUBLE)
DEF_VECTOR_TYPE (V2DI, DI)
DEF_VECTOR_TYPE (V2SF, FLOAT) DEF_VECTOR_TYPE (V2SF, FLOAT)
DEF_VECTOR_TYPE (V1DI, DI)
DEF_VECTOR_TYPE (V2SI, SI) DEF_VECTOR_TYPE (V2SI, SI)
DEF_VECTOR_TYPE (V32QI, CHAR)
DEF_VECTOR_TYPE (V4DF, DOUBLE)
DEF_VECTOR_TYPE (V4DI, DI)
DEF_VECTOR_TYPE (V4HI, HI) DEF_VECTOR_TYPE (V4HI, HI)
DEF_VECTOR_TYPE (V8QI, QI)
# SSE vectors
DEF_VECTOR_TYPE (V2DF, DOUBLE)
DEF_VECTOR_TYPE (V4SF, FLOAT) DEF_VECTOR_TYPE (V4SF, FLOAT)
DEF_VECTOR_TYPE (V2DI, DI)
DEF_VECTOR_TYPE (V4SI, SI) DEF_VECTOR_TYPE (V4SI, SI)
DEF_VECTOR_TYPE (V8HI, HI) DEF_VECTOR_TYPE (V8HI, HI)
DEF_VECTOR_TYPE (V8QI, CHAR) DEF_VECTOR_TYPE (V16QI, QI)
DEF_VECTOR_TYPE (V2UDI, UDI, V2DI)
DEF_VECTOR_TYPE (V4USI, USI, V4SI)
DEF_VECTOR_TYPE (V8UHI, UHI, V8HI)
DEF_VECTOR_TYPE (V16UQI, UQI, V16QI)
# AVX vectors
DEF_VECTOR_TYPE (V4DF, DOUBLE)
DEF_VECTOR_TYPE (V8SF, FLOAT) DEF_VECTOR_TYPE (V8SF, FLOAT)
DEF_VECTOR_TYPE (V4DI, DI)
DEF_VECTOR_TYPE (V8SI, SI) DEF_VECTOR_TYPE (V8SI, SI)
DEF_VECTOR_TYPE (V16HI, HI)
DEF_VECTOR_TYPE (V32QI, QI)
DEF_POINTER_TYPE (PCCHAR, CHAR, CONST) DEF_POINTER_TYPE (PCCHAR, CHAR, CONST)
DEF_POINTER_TYPE (PCDOUBLE, DOUBLE, CONST) DEF_POINTER_TYPE (PCDOUBLE, DOUBLE, CONST)
...@@ -323,6 +345,12 @@ DEF_FUNCTION_TYPE (VOID, UINT64, UINT, UINT) ...@@ -323,6 +345,12 @@ DEF_FUNCTION_TYPE (VOID, UINT64, UINT, UINT)
DEF_FUNCTION_TYPE (VOID, USHORT, UINT, USHORT) DEF_FUNCTION_TYPE (VOID, USHORT, UINT, USHORT)
DEF_FUNCTION_TYPE (VOID, V16QI, V16QI, PCHAR) DEF_FUNCTION_TYPE (VOID, V16QI, V16QI, PCHAR)
DEF_FUNCTION_TYPE (VOID, V8QI, V8QI, PCHAR) DEF_FUNCTION_TYPE (VOID, V8QI, V8QI, PCHAR)
DEF_FUNCTION_TYPE (V2DF, V2DF, V2DF, V2DI)
DEF_FUNCTION_TYPE (V4SF, V4SF, V4SF, V4SI)
DEF_FUNCTION_TYPE (V2UDI, V2UDI, V2UDI, V2UDI)
DEF_FUNCTION_TYPE (V4USI, V4USI, V4USI, V4USI)
DEF_FUNCTION_TYPE (V8UHI, V8UHI, V8UHI, V8UHI)
DEF_FUNCTION_TYPE (V16UQI, V16UQI, V16UQI, V16UQI)
DEF_FUNCTION_TYPE (V2DI, V2DI, V2DI, UINT, UINT) DEF_FUNCTION_TYPE (V2DI, V2DI, V2DI, UINT, UINT)
DEF_FUNCTION_TYPE (V4HI, HI, HI, HI, HI) DEF_FUNCTION_TYPE (V4HI, HI, HI, HI, HI)
......
...@@ -69,22 +69,20 @@ CC_MODE (CCZ); ...@@ -69,22 +69,20 @@ CC_MODE (CCZ);
CC_MODE (CCFP); CC_MODE (CCFP);
CC_MODE (CCFPU); CC_MODE (CCFPU);
/* Vector modes. */ /* Vector modes. Note that VEC_CONCAT patterns require vector
VECTOR_MODES (INT, 4); /* V4QI V2HI */ sizes twice as big as implemented in hardware. */
VECTOR_MODES (INT, 8); /* V8QI V4HI V2SI */ VECTOR_MODES (INT, 4); /* V4QI V2HI */
VECTOR_MODES (INT, 16); /* V16QI V8HI V4SI V2DI */ VECTOR_MODES (INT, 8); /* V8QI V4HI V2SI */
VECTOR_MODES (INT, 32); /* V32QI V16HI V8SI V4DI */ VECTOR_MODES (INT, 16); /* V16QI V8HI V4SI V2DI */
VECTOR_MODES (FLOAT, 8); /* V4HF V2SF */ VECTOR_MODES (INT, 32); /* V32QI V16HI V8SI V4DI */
VECTOR_MODES (FLOAT, 16); /* V8HF V4SF V2DF */ VECTOR_MODES (INT, 64); /* V64QI V32HI V16SI V8DI */
VECTOR_MODES (FLOAT, 32); /* V16HF V8SF V4DF */ VECTOR_MODES (FLOAT, 8); /* V4HF V2SF */
VECTOR_MODE (INT, DI, 1); /* V1DI */ VECTOR_MODES (FLOAT, 16); /* V8HF V4SF V2DF */
VECTOR_MODE (INT, SI, 1); /* V1SI */ VECTOR_MODES (FLOAT, 32); /* V16HF V8SF V4DF */
VECTOR_MODE (INT, QI, 2); /* V2QI */ VECTOR_MODES (FLOAT, 64); /* V32HF V16SF V8DF */
VECTOR_MODE (INT, DI, 8); /* V8DI */ VECTOR_MODE (INT, DI, 1); /* V1DI */
VECTOR_MODE (INT, HI, 32); /* V32HI */ VECTOR_MODE (INT, SI, 1); /* V1SI */
VECTOR_MODE (INT, QI, 64); /* V64QI */ VECTOR_MODE (INT, QI, 2); /* V2QI */
VECTOR_MODE (FLOAT, DF, 8); /* V8DF */
VECTOR_MODE (FLOAT, SF, 16); /* V16SF */
INT_MODE (OI, 32); INT_MODE (OI, 32);
......
...@@ -219,6 +219,8 @@ extern void ix86_expand_reduc_v4sf (rtx (*)(rtx, rtx, rtx), rtx, rtx); ...@@ -219,6 +219,8 @@ extern void ix86_expand_reduc_v4sf (rtx (*)(rtx, rtx, rtx), rtx, rtx);
extern bool ix86_fma4_valid_op_p (rtx [], rtx, int, bool, int, bool); extern bool ix86_fma4_valid_op_p (rtx [], rtx, int, bool, int, bool);
extern void ix86_expand_fma4_multiple_memory (rtx [], int, enum machine_mode); extern void ix86_expand_fma4_multiple_memory (rtx [], int, enum machine_mode);
extern void ix86_expand_vec_extract_even_odd (rtx, rtx, rtx, unsigned);
/* In i386-c.c */ /* In i386-c.c */
extern void ix86_target_macros (void); extern void ix86_target_macros (void);
extern void ix86_register_pragmas (void); extern void ix86_register_pragmas (void);
...@@ -277,4 +279,3 @@ extern int asm_preferred_eh_data_format (int, int); ...@@ -277,4 +279,3 @@ extern int asm_preferred_eh_data_format (int, int);
#ifdef HAVE_ATTR_cpu #ifdef HAVE_ATTR_cpu
extern enum attr_cpu ix86_schedule; extern enum attr_cpu ix86_schedule;
#endif #endif
...@@ -71,6 +71,14 @@ ...@@ -71,6 +71,14 @@
(define_mode_iterator SSEMODE124C8 [V16QI V8HI V4SI (define_mode_iterator SSEMODE124C8 [V16QI V8HI V4SI
(V2DI "TARGET_SSE4_2")]) (V2DI "TARGET_SSE4_2")])
;; Modes handled by vec_extract_even/odd pattern.
(define_mode_iterator SSEMODE_EO
[(V4SF "TARGET_SSE")
(V2DF "TARGET_SSE2")
(V2DI "TARGET_SSE2") (V4SI "TARGET_SSE2")
(V8HI "TARGET_SSE2") (V16QI "TARGET_SSE2")
(V4DF "TARGET_AVX") (V8SF "TARGET_AVX")])
;; Mapping from float mode to required SSE level ;; Mapping from float mode to required SSE level
(define_mode_attr sse [(SF "sse") (DF "sse2") (V4SF "sse") (V2DF "sse2")]) (define_mode_attr sse [(SF "sse") (DF "sse2") (V4SF "sse") (V2DF "sse2")])
...@@ -4693,48 +4701,24 @@ ...@@ -4693,48 +4701,24 @@
}) })
(define_expand "vec_extract_even<mode>" (define_expand "vec_extract_even<mode>"
[(set (match_operand:SSEMODE4S 0 "register_operand" "") [(match_operand:SSEMODE_EO 0 "register_operand" "")
(vec_select:SSEMODE4S (match_operand:SSEMODE_EO 1 "register_operand" "")
(vec_concat:<ssedoublesizemode> (match_operand:SSEMODE_EO 2 "register_operand" "")]
(match_operand:SSEMODE4S 1 "register_operand" "") ""
(match_operand:SSEMODE4S 2 "nonimmediate_operand" "")) {
(parallel [(const_int 0) ix86_expand_vec_extract_even_odd (operands[0], operands[1], operands[2], 0);
(const_int 2) DONE;
(const_int 4) })
(const_int 6)])))]
"TARGET_SSE")
(define_expand "vec_extract_odd<mode>"
[(set (match_operand:SSEMODE4S 0 "register_operand" "")
(vec_select:SSEMODE4S
(vec_concat:<ssedoublesizemode>
(match_operand:SSEMODE4S 1 "register_operand" "")
(match_operand:SSEMODE4S 2 "nonimmediate_operand" ""))
(parallel [(const_int 1)
(const_int 3)
(const_int 5)
(const_int 7)])))]
"TARGET_SSE")
(define_expand "vec_extract_even<mode>"
[(set (match_operand:SSEMODE2D 0 "register_operand" "")
(vec_select:SSEMODE2D
(vec_concat:<ssedoublesizemode>
(match_operand:SSEMODE2D 1 "register_operand" "")
(match_operand:SSEMODE2D 2 "nonimmediate_operand" ""))
(parallel [(const_int 0)
(const_int 2)])))]
"TARGET_SSE2")
(define_expand "vec_extract_odd<mode>" (define_expand "vec_extract_odd<mode>"
[(set (match_operand:SSEMODE2D 0 "register_operand" "") [(match_operand:SSEMODE_EO 0 "register_operand" "")
(vec_select:SSEMODE2D (match_operand:SSEMODE_EO 1 "register_operand" "")
(vec_concat:<ssedoublesizemode> (match_operand:SSEMODE_EO 2 "register_operand" "")]
(match_operand:SSEMODE2D 1 "register_operand" "") ""
(match_operand:SSEMODE2D 2 "nonimmediate_operand" "")) {
(parallel [(const_int 1) ix86_expand_vec_extract_even_odd (operands[0], operands[1], operands[2], 1);
(const_int 3)])))] DONE;
"TARGET_SSE2") })
;; punpcklqdq and punpckhqdq are shorter than shufpd. ;; punpcklqdq and punpckhqdq are shorter than shufpd.
(define_insn "*avx_punpckhqdq" (define_insn "*avx_punpckhqdq"
...@@ -5243,20 +5227,16 @@ ...@@ -5243,20 +5227,16 @@
(set_attr "prefix_data16" "1") (set_attr "prefix_data16" "1")
(set_attr "mode" "TI")]) (set_attr "mode" "TI")])
(define_insn_and_split "mulv16qi3" (define_expand "mulv16qi3"
[(set (match_operand:V16QI 0 "register_operand" "") [(set (match_operand:V16QI 0 "register_operand" "")
(mult:V16QI (match_operand:V16QI 1 "register_operand" "") (mult:V16QI (match_operand:V16QI 1 "register_operand" "")
(match_operand:V16QI 2 "register_operand" "")))] (match_operand:V16QI 2 "register_operand" "")))]
"TARGET_SSE2 "TARGET_SSE2"
&& can_create_pseudo_p ()"
"#"
"&& 1"
[(const_int 0)]
{ {
rtx t[12]; rtx t[6];
int i; int i;
for (i = 0; i < 12; ++i) for (i = 0; i < 6; ++i)
t[i] = gen_reg_rtx (V16QImode); t[i] = gen_reg_rtx (V16QImode);
/* Unpack data such that we've got a source byte in each low byte of /* Unpack data such that we've got a source byte in each low byte of
...@@ -5278,15 +5258,8 @@ ...@@ -5278,15 +5258,8 @@
gen_lowpart (V8HImode, t[2]), gen_lowpart (V8HImode, t[2]),
gen_lowpart (V8HImode, t[3]))); gen_lowpart (V8HImode, t[3])));
/* Extract the relevant bytes and merge them back together. */ /* Extract the even bytes and merge them back together. */
emit_insn (gen_sse2_punpckhbw (t[6], t[5], t[4])); /* ..AI..BJ..CK..DL */ ix86_expand_vec_extract_even_odd (operands[0], t[5], t[4], 0);
emit_insn (gen_sse2_punpcklbw (t[7], t[5], t[4])); /* ..EM..FN..GO..HP */
emit_insn (gen_sse2_punpckhbw (t[8], t[7], t[6])); /* ....AEIM....BFJN */
emit_insn (gen_sse2_punpcklbw (t[9], t[7], t[6])); /* ....CGKO....DHLP */
emit_insn (gen_sse2_punpckhbw (t[10], t[9], t[8])); /* ........ACEGIKMO */
emit_insn (gen_sse2_punpcklbw (t[11], t[9], t[8])); /* ........BDFHJLNP */
emit_insn (gen_sse2_punpcklbw (operands[0], t[11], t[10])); /* ABCDEFGHIJKLMNOP */
DONE; DONE;
}) })
...@@ -6578,96 +6551,39 @@ ...@@ -6578,96 +6551,39 @@
;; ;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Reduce:
;; op1 = abcdefghijklmnop
;; op2 = qrstuvwxyz012345
;; h1 = aqbrcsdteufvgwhx
;; l1 = iyjzk0l1m2n3o4p5
;; h2 = aiqybjrzcks0dlt1
;; l2 = emu2fnv3gow4hpx5
;; h3 = aeimquy2bfjnrvz3
;; l3 = cgkosw04dhlptx15
;; result = bdfhjlnprtvxz135
(define_expand "vec_pack_trunc_v8hi" (define_expand "vec_pack_trunc_v8hi"
[(match_operand:V16QI 0 "register_operand" "") [(match_operand:V16QI 0 "register_operand" "")
(match_operand:V8HI 1 "register_operand" "") (match_operand:V8HI 1 "register_operand" "")
(match_operand:V8HI 2 "register_operand" "")] (match_operand:V8HI 2 "register_operand" "")]
"TARGET_SSE2" "TARGET_SSE2"
{ {
rtx op1, op2, h1, l1, h2, l2, h3, l3; rtx op1 = gen_lowpart (V16QImode, operands[1]);
rtx op2 = gen_lowpart (V16QImode, operands[2]);
op1 = gen_lowpart (V16QImode, operands[1]); ix86_expand_vec_extract_even_odd (operands[0], op1, op2, 0);
op2 = gen_lowpart (V16QImode, operands[2]);
h1 = gen_reg_rtx (V16QImode);
l1 = gen_reg_rtx (V16QImode);
h2 = gen_reg_rtx (V16QImode);
l2 = gen_reg_rtx (V16QImode);
h3 = gen_reg_rtx (V16QImode);
l3 = gen_reg_rtx (V16QImode);
emit_insn (gen_vec_interleave_highv16qi (h1, op1, op2));
emit_insn (gen_vec_interleave_lowv16qi (l1, op1, op2));
emit_insn (gen_vec_interleave_highv16qi (h2, l1, h1));
emit_insn (gen_vec_interleave_lowv16qi (l2, l1, h1));
emit_insn (gen_vec_interleave_highv16qi (h3, l2, h2));
emit_insn (gen_vec_interleave_lowv16qi (l3, l2, h2));
emit_insn (gen_vec_interleave_lowv16qi (operands[0], l3, h3));
DONE; DONE;
}) })
;; Reduce:
;; op1 = abcdefgh
;; op2 = ijklmnop
;; h1 = aibjckdl
;; l1 = emfngohp
;; h2 = aeimbfjn
;; l2 = cgkodhlp
;; result = bdfhjlnp
(define_expand "vec_pack_trunc_v4si" (define_expand "vec_pack_trunc_v4si"
[(match_operand:V8HI 0 "register_operand" "") [(match_operand:V8HI 0 "register_operand" "")
(match_operand:V4SI 1 "register_operand" "") (match_operand:V4SI 1 "register_operand" "")
(match_operand:V4SI 2 "register_operand" "")] (match_operand:V4SI 2 "register_operand" "")]
"TARGET_SSE2" "TARGET_SSE2"
{ {
rtx op1, op2, h1, l1, h2, l2; rtx op1 = gen_lowpart (V8HImode, operands[1]);
rtx op2 = gen_lowpart (V8HImode, operands[2]);
op1 = gen_lowpart (V8HImode, operands[1]); ix86_expand_vec_extract_even_odd (operands[0], op1, op2, 0);
op2 = gen_lowpart (V8HImode, operands[2]);
h1 = gen_reg_rtx (V8HImode);
l1 = gen_reg_rtx (V8HImode);
h2 = gen_reg_rtx (V8HImode);
l2 = gen_reg_rtx (V8HImode);
emit_insn (gen_vec_interleave_highv8hi (h1, op1, op2));
emit_insn (gen_vec_interleave_lowv8hi (l1, op1, op2));
emit_insn (gen_vec_interleave_highv8hi (h2, l1, h1));
emit_insn (gen_vec_interleave_lowv8hi (l2, l1, h1));
emit_insn (gen_vec_interleave_lowv8hi (operands[0], l2, h2));
DONE; DONE;
}) })
;; Reduce:
;; op1 = abcd
;; op2 = efgh
;; h1 = aebf
;; l1 = cgdh
;; result = bdfh
(define_expand "vec_pack_trunc_v2di" (define_expand "vec_pack_trunc_v2di"
[(match_operand:V4SI 0 "register_operand" "") [(match_operand:V4SI 0 "register_operand" "")
(match_operand:V2DI 1 "register_operand" "") (match_operand:V2DI 1 "register_operand" "")
(match_operand:V2DI 2 "register_operand" "")] (match_operand:V2DI 2 "register_operand" "")]
"TARGET_SSE2" "TARGET_SSE2"
{ {
rtx op1, op2, h1, l1; rtx op1 = gen_lowpart (V4SImode, operands[1]);
rtx op2 = gen_lowpart (V4SImode, operands[2]);
op1 = gen_lowpart (V4SImode, operands[1]); ix86_expand_vec_extract_even_odd (operands[0], op1, op2, 0);
op2 = gen_lowpart (V4SImode, operands[2]);
h1 = gen_reg_rtx (V4SImode);
l1 = gen_reg_rtx (V4SImode);
emit_insn (gen_vec_interleave_highv4si (h1, op1, op2));
emit_insn (gen_vec_interleave_lowv4si (l1, op1, op2));
emit_insn (gen_vec_interleave_lowv4si (operands[0], l1, h1));
DONE; DONE;
}) })
......
2009-11-25 Richard Henderson <rth@redhat.com> 2009-11-25 Richard Henderson <rth@redhat.com>
* gcc.dg/vect/slp-21.c: Succeed with vect_extract_even_odd too.
* lib/target-supports.exp
(check_effective_target_vect_extract_even_odd): Add x86.
* gcc.target/i386/isa-check.h: New.
* gcc.target/i386/vperm-2-2.inc, gcc.target/i386/vperm-4-1.inc,
gcc.target/i386/vperm-4-2.inc, gcc.target/i386/vperm-v2df.c,
gcc.target/i386/vperm-v2di.c, gcc.target/i386/vperm-v4sf-1.c,
gcc.target/i386/vperm-v4sf-2.c, gcc.target/i386/vperm-v4si-1.c,
gcc.target/i386/vperm-v4si-2.c, gcc.target/i386/vperm-v4si-2x.c,
gcc.target/i386/vperm.pl: New files.
2009-11-25 Richard Henderson <rth@redhat.com>
* gcc.dg/vect/pr36493.c: Call check_vect. * gcc.dg/vect/pr36493.c: Call check_vect.
* gcc.dg/vect/pr37539.c: Likewise. * gcc.dg/vect/pr37539.c: Likewise.
* gcc.dg/vect/vect-nest-cycle-3.c: Call check_vect earlier. * gcc.dg/vect/vect-nest-cycle-3.c: Call check_vect earlier.
......
...@@ -200,8 +200,8 @@ int main (void) ...@@ -200,8 +200,8 @@ int main (void)
return 0; return 0;
} }
/* { dg-final { scan-tree-dump-times "vectorized 4 loops" 1 "vect" { target vect_strided } } } */ /* { dg-final { scan-tree-dump-times "vectorized 4 loops" 1 "vect" { target { vect_strided || vect_extract_even_odd } } } } */
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { ! { vect_strided } } } } } */ /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { ! { vect_strided || vect_extract_even_odd } } } } } */
/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" { target vect_strided } } } */ /* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" { target vect_strided } } } */
/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 0 "vect" { target { ! { vect_strided } } } } } */ /* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 0 "vect" { target { ! { vect_strided } } } } } */
/* { dg-final { cleanup-tree-dump "vect" } } */ /* { dg-final { cleanup-tree-dump "vect" } } */
......
#include "cpuid.h"
extern void exit (int) __attribute__((noreturn));
/* Determine what instruction set we've been compiled for,
and detect that we're running with it. */
static void __attribute__((constructor))
check_isa (void)
{
int a, b, c, d;
int c1, d1, c1e, d1e;
c1 = d1 = c1e = d1e = 0;
#ifdef __MMX__
d1 |= bit_MMX;
#endif
#ifdef __3dNOW__
d1e |= bit_3DNOW;
#endif
#ifdef __3dNOW_A__
d1e |= bit_3DNOWP;
#endif
#ifdef __SSE__
d1 |= bit_SSE;
#endif
#ifdef __SSE2__
d1 |= bit_SSE2;
#endif
#ifdef __SSE3__
c1 |= bit_SSE3;
#endif
#ifdef __SSSE3__
c1 |= bit_SSSE3;
#endif
#ifdef __SSE4_1__
c1 |= bit_SSE4_1;
#endif
#ifdef __SSE4_2__
c1 |= bit_SSE4_2;
#endif
#ifdef __AES__
c1 |= bit_AES;
#endif
#ifdef __PCLMUL__
c1 |= bit_PCLMUL;
#endif
#ifdef __AVX__
c1 |= bit_AVX;
#endif
#ifdef __FMA__
c1 |= bit_FMA;
#endif
#ifdef __SSE4A__
c1e |= bit_SSE4a;
#endif
#ifdef __FMA4__
c1e |= bit_FMA4;
#endif
#ifdef __XOP__
c1e |= bit_XOP;
#endif
#ifdef __LWP__
c1e |= bit_LWP;
#endif
if (c1 | d1)
{
if (!__get_cpuid (1, &a, &b, &c, &d))
goto fail;
if ((c & c1) != c1 || (d & d1) != d1)
goto fail;
}
if (c1e | d1e)
{
if (!__get_cpuid (0x80000001, &a, &b, &c, &d))
goto fail;
if ((c & c1e) != c1e || (d & d1e) != d1e)
goto fail;
}
return;
fail:
exit (0);
}
/* This file auto-generated with ./vperm.pl 2 2. */
void check0(void)
{
TEST (0, 0)
TEST (1, 0)
TEST (2, 0)
TEST (3, 0)
TEST (0, 1)
TEST (1, 1)
TEST (2, 1)
TEST (3, 1)
TEST (0, 2)
TEST (1, 2)
TEST (2, 2)
TEST (3, 2)
TEST (0, 3)
TEST (1, 3)
TEST (2, 3)
TEST (3, 3)
}
void check(void)
{
check0 ();
}
/* This file auto-generated with ./vperm.pl 4 1. */
void check0(void)
{
TEST (0, 0, 0, 0)
TEST (1, 0, 0, 0)
TEST (2, 0, 0, 0)
TEST (3, 0, 0, 0)
TEST (0, 1, 0, 0)
TEST (1, 1, 0, 0)
TEST (2, 1, 0, 0)
TEST (3, 1, 0, 0)
TEST (0, 2, 0, 0)
TEST (1, 2, 0, 0)
TEST (2, 2, 0, 0)
TEST (3, 2, 0, 0)
TEST (0, 3, 0, 0)
TEST (1, 3, 0, 0)
TEST (2, 3, 0, 0)
TEST (3, 3, 0, 0)
TEST (0, 0, 1, 0)
TEST (1, 0, 1, 0)
TEST (2, 0, 1, 0)
TEST (3, 0, 1, 0)
TEST (0, 1, 1, 0)
TEST (1, 1, 1, 0)
TEST (2, 1, 1, 0)
TEST (3, 1, 1, 0)
TEST (0, 2, 1, 0)
TEST (1, 2, 1, 0)
TEST (2, 2, 1, 0)
TEST (3, 2, 1, 0)
TEST (0, 3, 1, 0)
TEST (1, 3, 1, 0)
TEST (2, 3, 1, 0)
TEST (3, 3, 1, 0)
TEST (0, 0, 2, 0)
TEST (1, 0, 2, 0)
TEST (2, 0, 2, 0)
TEST (3, 0, 2, 0)
TEST (0, 1, 2, 0)
TEST (1, 1, 2, 0)
TEST (2, 1, 2, 0)
TEST (3, 1, 2, 0)
TEST (0, 2, 2, 0)
TEST (1, 2, 2, 0)
TEST (2, 2, 2, 0)
TEST (3, 2, 2, 0)
TEST (0, 3, 2, 0)
TEST (1, 3, 2, 0)
TEST (2, 3, 2, 0)
TEST (3, 3, 2, 0)
TEST (0, 0, 3, 0)
TEST (1, 0, 3, 0)
TEST (2, 0, 3, 0)
TEST (3, 0, 3, 0)
TEST (0, 1, 3, 0)
TEST (1, 1, 3, 0)
TEST (2, 1, 3, 0)
TEST (3, 1, 3, 0)
TEST (0, 2, 3, 0)
TEST (1, 2, 3, 0)
TEST (2, 2, 3, 0)
TEST (3, 2, 3, 0)
TEST (0, 3, 3, 0)
TEST (1, 3, 3, 0)
TEST (2, 3, 3, 0)
TEST (3, 3, 3, 0)
TEST (0, 0, 0, 1)
TEST (1, 0, 0, 1)
TEST (2, 0, 0, 1)
TEST (3, 0, 0, 1)
TEST (0, 1, 0, 1)
TEST (1, 1, 0, 1)
TEST (2, 1, 0, 1)
TEST (3, 1, 0, 1)
TEST (0, 2, 0, 1)
TEST (1, 2, 0, 1)
TEST (2, 2, 0, 1)
TEST (3, 2, 0, 1)
TEST (0, 3, 0, 1)
TEST (1, 3, 0, 1)
TEST (2, 3, 0, 1)
TEST (3, 3, 0, 1)
TEST (0, 0, 1, 1)
TEST (1, 0, 1, 1)
TEST (2, 0, 1, 1)
TEST (3, 0, 1, 1)
TEST (0, 1, 1, 1)
TEST (1, 1, 1, 1)
TEST (2, 1, 1, 1)
TEST (3, 1, 1, 1)
TEST (0, 2, 1, 1)
TEST (1, 2, 1, 1)
TEST (2, 2, 1, 1)
TEST (3, 2, 1, 1)
TEST (0, 3, 1, 1)
TEST (1, 3, 1, 1)
TEST (2, 3, 1, 1)
TEST (3, 3, 1, 1)
TEST (0, 0, 2, 1)
TEST (1, 0, 2, 1)
TEST (2, 0, 2, 1)
TEST (3, 0, 2, 1)
TEST (0, 1, 2, 1)
TEST (1, 1, 2, 1)
TEST (2, 1, 2, 1)
TEST (3, 1, 2, 1)
TEST (0, 2, 2, 1)
TEST (1, 2, 2, 1)
TEST (2, 2, 2, 1)
TEST (3, 2, 2, 1)
TEST (0, 3, 2, 1)
TEST (1, 3, 2, 1)
TEST (2, 3, 2, 1)
TEST (3, 3, 2, 1)
TEST (0, 0, 3, 1)
TEST (1, 0, 3, 1)
TEST (2, 0, 3, 1)
TEST (3, 0, 3, 1)
TEST (0, 1, 3, 1)
TEST (1, 1, 3, 1)
TEST (2, 1, 3, 1)
TEST (3, 1, 3, 1)
TEST (0, 2, 3, 1)
TEST (1, 2, 3, 1)
TEST (2, 2, 3, 1)
TEST (3, 2, 3, 1)
TEST (0, 3, 3, 1)
TEST (1, 3, 3, 1)
TEST (2, 3, 3, 1)
TEST (3, 3, 3, 1)
}
void check1(void)
{
TEST (0, 0, 0, 2)
TEST (1, 0, 0, 2)
TEST (2, 0, 0, 2)
TEST (3, 0, 0, 2)
TEST (0, 1, 0, 2)
TEST (1, 1, 0, 2)
TEST (2, 1, 0, 2)
TEST (3, 1, 0, 2)
TEST (0, 2, 0, 2)
TEST (1, 2, 0, 2)
TEST (2, 2, 0, 2)
TEST (3, 2, 0, 2)
TEST (0, 3, 0, 2)
TEST (1, 3, 0, 2)
TEST (2, 3, 0, 2)
TEST (3, 3, 0, 2)
TEST (0, 0, 1, 2)
TEST (1, 0, 1, 2)
TEST (2, 0, 1, 2)
TEST (3, 0, 1, 2)
TEST (0, 1, 1, 2)
TEST (1, 1, 1, 2)
TEST (2, 1, 1, 2)
TEST (3, 1, 1, 2)
TEST (0, 2, 1, 2)
TEST (1, 2, 1, 2)
TEST (2, 2, 1, 2)
TEST (3, 2, 1, 2)
TEST (0, 3, 1, 2)
TEST (1, 3, 1, 2)
TEST (2, 3, 1, 2)
TEST (3, 3, 1, 2)
TEST (0, 0, 2, 2)
TEST (1, 0, 2, 2)
TEST (2, 0, 2, 2)
TEST (3, 0, 2, 2)
TEST (0, 1, 2, 2)
TEST (1, 1, 2, 2)
TEST (2, 1, 2, 2)
TEST (3, 1, 2, 2)
TEST (0, 2, 2, 2)
TEST (1, 2, 2, 2)
TEST (2, 2, 2, 2)
TEST (3, 2, 2, 2)
TEST (0, 3, 2, 2)
TEST (1, 3, 2, 2)
TEST (2, 3, 2, 2)
TEST (3, 3, 2, 2)
TEST (0, 0, 3, 2)
TEST (1, 0, 3, 2)
TEST (2, 0, 3, 2)
TEST (3, 0, 3, 2)
TEST (0, 1, 3, 2)
TEST (1, 1, 3, 2)
TEST (2, 1, 3, 2)
TEST (3, 1, 3, 2)
TEST (0, 2, 3, 2)
TEST (1, 2, 3, 2)
TEST (2, 2, 3, 2)
TEST (3, 2, 3, 2)
TEST (0, 3, 3, 2)
TEST (1, 3, 3, 2)
TEST (2, 3, 3, 2)
TEST (3, 3, 3, 2)
TEST (0, 0, 0, 3)
TEST (1, 0, 0, 3)
TEST (2, 0, 0, 3)
TEST (3, 0, 0, 3)
TEST (0, 1, 0, 3)
TEST (1, 1, 0, 3)
TEST (2, 1, 0, 3)
TEST (3, 1, 0, 3)
TEST (0, 2, 0, 3)
TEST (1, 2, 0, 3)
TEST (2, 2, 0, 3)
TEST (3, 2, 0, 3)
TEST (0, 3, 0, 3)
TEST (1, 3, 0, 3)
TEST (2, 3, 0, 3)
TEST (3, 3, 0, 3)
TEST (0, 0, 1, 3)
TEST (1, 0, 1, 3)
TEST (2, 0, 1, 3)
TEST (3, 0, 1, 3)
TEST (0, 1, 1, 3)
TEST (1, 1, 1, 3)
TEST (2, 1, 1, 3)
TEST (3, 1, 1, 3)
TEST (0, 2, 1, 3)
TEST (1, 2, 1, 3)
TEST (2, 2, 1, 3)
TEST (3, 2, 1, 3)
TEST (0, 3, 1, 3)
TEST (1, 3, 1, 3)
TEST (2, 3, 1, 3)
TEST (3, 3, 1, 3)
TEST (0, 0, 2, 3)
TEST (1, 0, 2, 3)
TEST (2, 0, 2, 3)
TEST (3, 0, 2, 3)
TEST (0, 1, 2, 3)
TEST (1, 1, 2, 3)
TEST (2, 1, 2, 3)
TEST (3, 1, 2, 3)
TEST (0, 2, 2, 3)
TEST (1, 2, 2, 3)
TEST (2, 2, 2, 3)
TEST (3, 2, 2, 3)
TEST (0, 3, 2, 3)
TEST (1, 3, 2, 3)
TEST (2, 3, 2, 3)
TEST (3, 3, 2, 3)
TEST (0, 0, 3, 3)
TEST (1, 0, 3, 3)
TEST (2, 0, 3, 3)
TEST (3, 0, 3, 3)
TEST (0, 1, 3, 3)
TEST (1, 1, 3, 3)
TEST (2, 1, 3, 3)
TEST (3, 1, 3, 3)
TEST (0, 2, 3, 3)
TEST (1, 2, 3, 3)
TEST (2, 2, 3, 3)
TEST (3, 2, 3, 3)
TEST (0, 3, 3, 3)
TEST (1, 3, 3, 3)
TEST (2, 3, 3, 3)
TEST (3, 3, 3, 3)
}
void check(void)
{
check0 ();
check1 ();
}
/* { dg-do run } */
/* { dg-options "-O -msse2" } */
#include "isa-check.h"
typedef double S;
typedef double V __attribute__((vector_size(16)));
typedef long long IV __attribute__((vector_size(16)));
typedef union { S s[2]; V v; } U;
static U i[2], b, c;
extern int memcmp (const void *, const void *, __SIZE_TYPE__);
#define assert(T) ((T) || (__builtin_trap (), 0))
#define TEST(E0, E1) \
b.v = __builtin_ia32_vec_perm_v2df (i[0].v, i[1].v, (IV){E0, E1}); \
c.s[0] = i[0].s[E0]; \
c.s[1] = i[0].s[E1]; \
__asm__("" : : : "memory"); \
assert (memcmp (&b, &c, sizeof(c)) == 0);
#include "vperm-2-2.inc"
int main()
{
i[0].s[0] = 0;
i[0].s[1] = 1;
i[0].s[2] = 2;
i[0].s[3] = 3;
check();
return 0;
}
/* { dg-do run } */
/* { dg-options "-O -msse2" } */
#include "isa-check.h"
typedef long long S;
typedef long long V __attribute__((vector_size(16)));
typedef long long IV __attribute__((vector_size(16)));
typedef union { S s[2]; V v; } U;
static U i[2], b, c;
extern int memcmp (const void *, const void *, __SIZE_TYPE__);
#define assert(T) ((T) || (__builtin_trap (), 0))
#define TEST(E0, E1) \
b.v = __builtin_ia32_vec_perm_v2di (i[0].v, i[1].v, (IV){E0, E1}); \
c.s[0] = i[0].s[E0]; \
c.s[1] = i[0].s[E1]; \
__asm__("" : : : "memory"); \
assert (memcmp (&b, &c, sizeof(c)) == 0);
#include "vperm-2-2.inc"
int main()
{
i[0].s[0] = 0;
i[0].s[1] = 1;
i[0].s[2] = 2;
i[0].s[3] = 3;
check();
return 0;
}
/* { dg-do run } */
/* { dg-options "-O -msse" } */
#include "isa-check.h"
typedef float S;
typedef float V __attribute__((vector_size(16)));
typedef int IV __attribute__((vector_size(16)));
typedef union { S s[4]; V v; } U;
static U i[2], b, c;
extern int memcmp (const void *, const void *, __SIZE_TYPE__);
#define assert(T) ((T) || (__builtin_trap (), 0))
#define TEST(E0, E1, E2, E3) \
b.v = __builtin_ia32_vec_perm_v4sf (i[0].v, i[1].v, (IV){E0, E1, E2, E3}); \
c.s[0] = i[0].s[E0]; \
c.s[1] = i[0].s[E1]; \
c.s[2] = i[0].s[E2]; \
c.s[3] = i[0].s[E3]; \
__asm__("" : : : "memory"); \
assert (memcmp (&b, &c, sizeof(c)) == 0);
#include "vperm-4-1.inc"
int main()
{
i[0].s[0] = 0;
i[0].s[1] = 1;
i[0].s[2] = 2;
i[0].s[3] = 3;
i[0].s[4] = 4;
i[0].s[5] = 5;
i[0].s[6] = 6;
i[0].s[7] = 7;
check();
return 0;
}
/* { dg-do run } */
/* { dg-options "-O -mssse3" } */
#include "isa-check.h"
typedef float S;
typedef float V __attribute__((vector_size(16)));
typedef int IV __attribute__((vector_size(16)));
typedef union { S s[4]; V v; } U;
static U i[2], b, c;
extern int memcmp (const void *, const void *, __SIZE_TYPE__);
#define assert(T) ((T) || (__builtin_trap (), 0))
#define TEST(E0, E1, E2, E3) \
b.v = __builtin_ia32_vec_perm_v4sf (i[0].v, i[1].v, (IV){E0, E1, E2, E3}); \
c.s[0] = i[0].s[E0]; \
c.s[1] = i[0].s[E1]; \
c.s[2] = i[0].s[E2]; \
c.s[3] = i[0].s[E3]; \
__asm__("" : : : "memory"); \
assert (memcmp (&b, &c, sizeof(c)) == 0);
#include "vperm-4-2.inc"
int main()
{
i[0].s[0] = 0;
i[0].s[1] = 1;
i[0].s[2] = 2;
i[0].s[3] = 3;
i[0].s[4] = 4;
i[0].s[5] = 5;
i[0].s[6] = 6;
i[0].s[7] = 7;
check();
return 0;
}
/* { dg-do run } */
/* { dg-options "-O -msse2" } */
#include "isa-check.h"
typedef int S;
typedef int V __attribute__((vector_size(16)));
typedef int IV __attribute__((vector_size(16)));
typedef union { S s[4]; V v; } U;
static U i[2], b, c;
extern int memcmp (const void *, const void *, __SIZE_TYPE__);
#define assert(T) ((T) || (__builtin_trap (), 0))
#define TEST(E0, E1, E2, E3) \
b.v = __builtin_ia32_vec_perm_v4si (i[0].v, i[1].v, (IV){E0, E1, E2, E3}); \
c.s[0] = i[0].s[E0]; \
c.s[1] = i[0].s[E1]; \
c.s[2] = i[0].s[E2]; \
c.s[3] = i[0].s[E3]; \
__asm__("" : : : "memory"); \
assert (memcmp (&b, &c, sizeof(c)) == 0);
#include "vperm-4-1.inc"
int main()
{
i[0].s[0] = 0;
i[0].s[1] = 1;
i[0].s[2] = 2;
i[0].s[3] = 3;
i[0].s[4] = 4;
i[0].s[5] = 5;
i[0].s[6] = 6;
i[0].s[7] = 7;
check();
return 0;
}
/* { dg-do run } */
/* { dg-options "-O -mssse3" } */
#include "isa-check.h"
typedef int S;
typedef int V __attribute__((vector_size(16)));
typedef int IV __attribute__((vector_size(16)));
typedef union { S s[4]; V v; } U;
static U i[2], b, c;
extern int memcmp (const void *, const void *, __SIZE_TYPE__);
#define assert(T) ((T) || (__builtin_trap (), 0))
#define TEST(E0, E1, E2, E3) \
b.v = __builtin_ia32_vec_perm_v4si (i[0].v, i[1].v, (IV){E0, E1, E2, E3}); \
c.s[0] = i[0].s[E0]; \
c.s[1] = i[0].s[E1]; \
c.s[2] = i[0].s[E2]; \
c.s[3] = i[0].s[E3]; \
__asm__("" : : : "memory"); \
assert (memcmp (&b, &c, sizeof(c)) == 0);
#include "vperm-4-2.inc"
int main()
{
i[0].s[0] = 0;
i[0].s[1] = 1;
i[0].s[2] = 2;
i[0].s[3] = 3;
i[0].s[4] = 4;
i[0].s[5] = 5;
i[0].s[6] = 6;
i[0].s[7] = 7;
check();
return 0;
}
/* { dg-do run } */
/* { dg-options "-O -mxop" } */
#include "vperm-v4si-2.c"
#!/usr/bin/perl
$nelt = int($ARGV[0]);
$leng = int($ARGV[1]);
print "/* This file auto-generated with ./vperm.pl $nelt $leng. */\n\n";
for ($i = 0; $i < $nelt; ++$i) { $perm[$i] = 0; }
$ncheck = 0;
for ($i = 0; $i < ($leng * $nelt) ** $nelt; ++$i)
{
if ($i % 128 == 0)
{
print "}\n\n" if $ncheck > 0;
print "void check$ncheck(void)\n{\n";
++$ncheck;
}
print " TEST (";
for ($j = 0; $j < $nelt; ++$j)
{
print $perm[$j];
print ", " if $j < $nelt - 1;
}
print ")\n";
INCR: for ($j = 0; $j < $nelt; ++$j)
{
last INCR if ++$perm[$j] < $leng * $nelt;
$perm[$j] = 0;
}
}
print "}\n\n";
print "void check(void)\n{\n";
for ($i = 0; $i < $ncheck; ++$i)
{
print " check$i ();\n";
}
print "}\n\n";
...@@ -2562,7 +2562,9 @@ proc check_effective_target_vect_extract_even_odd { } { ...@@ -2562,7 +2562,9 @@ proc check_effective_target_vect_extract_even_odd { } {
verbose "check_effective_target_vect_extract_even_odd: using cached result" 2 verbose "check_effective_target_vect_extract_even_odd: using cached result" 2
} else { } else {
set et_vect_extract_even_odd_saved 0 set et_vect_extract_even_odd_saved 0
if { [istarget powerpc*-*-*] if { [istarget powerpc*-*-*]
|| [istarget i?86-*-*]
|| [istarget x86_64-*-*]
|| [istarget spu-*-*] } { || [istarget spu-*-*] } {
set et_vect_extract_even_odd_saved 1 set et_vect_extract_even_odd_saved 1
} }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment