Commit 7a10ea9f by Kyrylo Tkachov Committed by Kyrylo Tkachov

[ARM] Vectorise bswap* in aarch32.

	* config/arm/neon.md (neon_bswap<mode>): New pattern.
	* config/arm/arm.c (neon_itype): Add NEON_BSWAP.
	(arm_init_neon_builtins): Handle NEON_BSWAP.
	Define required type nodes.
	(arm_expand_neon_builtin): Handle NEON_BSWAP.
	(arm_builtin_vectorized_function): Handle BUILTIN_BSWAP builtins.
	* config/arm/arm_neon_builtins.def (bswap): Define builtins.
	* config/arm/iterators.md (VDQHSD): New mode iterator.

	* lib/target-supports.exp (check_effective_target_vect_bswap):
	Specify arm*-*-* support.

From-SVN: r210967
parent 597f5997
2014-05-27 Kyrylo Tkachov <kyrylo.tkachov@arm.com>
* config/arm/neon.md (neon_bswap<mode>): New pattern.
* config/arm/arm.c (neon_itype): Add NEON_BSWAP.
(arm_init_neon_builtins): Handle NEON_BSWAP.
Define required type nodes.
(arm_expand_neon_builtin): Handle NEON_BSWAP.
(arm_builtin_vectorized_function): Handle BUILTIN_BSWAP builtins.
* config/arm/arm_neon_builtins.def (bswap): Define builtins.
* config/arm/iterators.md (VDQHSD): New mode iterator.
2014-05-27 Richard Biener <rguenther@suse.de> 2014-05-27 Richard Biener <rguenther@suse.de>
* tree-vrp.c (vrp_evaluate_conditional_warnv_with_ops_using_ranges): * tree-vrp.c (vrp_evaluate_conditional_warnv_with_ops_using_ranges):
......
...@@ -23065,6 +23065,7 @@ typedef enum { ...@@ -23065,6 +23065,7 @@ typedef enum {
NEON_BINOP, NEON_BINOP,
NEON_TERNOP, NEON_TERNOP,
NEON_UNOP, NEON_UNOP,
NEON_BSWAP,
NEON_GETLANE, NEON_GETLANE,
NEON_SETLANE, NEON_SETLANE,
NEON_CREATE, NEON_CREATE,
...@@ -23531,14 +23532,19 @@ arm_init_neon_builtins (void) ...@@ -23531,14 +23532,19 @@ arm_init_neon_builtins (void)
tree V8QI_type_node; tree V8QI_type_node;
tree V4HI_type_node; tree V4HI_type_node;
tree V4UHI_type_node;
tree V4HF_type_node; tree V4HF_type_node;
tree V2SI_type_node; tree V2SI_type_node;
tree V2USI_type_node;
tree V2SF_type_node; tree V2SF_type_node;
tree V16QI_type_node; tree V16QI_type_node;
tree V8HI_type_node; tree V8HI_type_node;
tree V8UHI_type_node;
tree V4SI_type_node; tree V4SI_type_node;
tree V4USI_type_node;
tree V4SF_type_node; tree V4SF_type_node;
tree V2DI_type_node; tree V2DI_type_node;
tree V2UDI_type_node;
tree intUQI_type_node; tree intUQI_type_node;
tree intUHI_type_node; tree intUHI_type_node;
...@@ -23613,16 +23619,26 @@ arm_init_neon_builtins (void) ...@@ -23613,16 +23619,26 @@ arm_init_neon_builtins (void)
const_intDI_pointer_node = build_pointer_type (const_intDI_node); const_intDI_pointer_node = build_pointer_type (const_intDI_node);
const_float_pointer_node = build_pointer_type (const_float_node); const_float_pointer_node = build_pointer_type (const_float_node);
/* Unsigned integer types for various mode sizes. */
intUQI_type_node = make_unsigned_type (GET_MODE_PRECISION (QImode));
intUHI_type_node = make_unsigned_type (GET_MODE_PRECISION (HImode));
intUSI_type_node = make_unsigned_type (GET_MODE_PRECISION (SImode));
intUDI_type_node = make_unsigned_type (GET_MODE_PRECISION (DImode));
neon_intUTI_type_node = make_unsigned_type (GET_MODE_PRECISION (TImode));
/* Now create vector types based on our NEON element types. */ /* Now create vector types based on our NEON element types. */
/* 64-bit vectors. */ /* 64-bit vectors. */
V8QI_type_node = V8QI_type_node =
build_vector_type_for_mode (neon_intQI_type_node, V8QImode); build_vector_type_for_mode (neon_intQI_type_node, V8QImode);
V4HI_type_node = V4HI_type_node =
build_vector_type_for_mode (neon_intHI_type_node, V4HImode); build_vector_type_for_mode (neon_intHI_type_node, V4HImode);
V4UHI_type_node =
build_vector_type_for_mode (intUHI_type_node, V4HImode);
V4HF_type_node = V4HF_type_node =
build_vector_type_for_mode (neon_floatHF_type_node, V4HFmode); build_vector_type_for_mode (neon_floatHF_type_node, V4HFmode);
V2SI_type_node = V2SI_type_node =
build_vector_type_for_mode (neon_intSI_type_node, V2SImode); build_vector_type_for_mode (neon_intSI_type_node, V2SImode);
V2USI_type_node =
build_vector_type_for_mode (intUSI_type_node, V2SImode);
V2SF_type_node = V2SF_type_node =
build_vector_type_for_mode (neon_float_type_node, V2SFmode); build_vector_type_for_mode (neon_float_type_node, V2SFmode);
/* 128-bit vectors. */ /* 128-bit vectors. */
...@@ -23630,19 +23646,18 @@ arm_init_neon_builtins (void) ...@@ -23630,19 +23646,18 @@ arm_init_neon_builtins (void)
build_vector_type_for_mode (neon_intQI_type_node, V16QImode); build_vector_type_for_mode (neon_intQI_type_node, V16QImode);
V8HI_type_node = V8HI_type_node =
build_vector_type_for_mode (neon_intHI_type_node, V8HImode); build_vector_type_for_mode (neon_intHI_type_node, V8HImode);
V8UHI_type_node =
build_vector_type_for_mode (intUHI_type_node, V8HImode);
V4SI_type_node = V4SI_type_node =
build_vector_type_for_mode (neon_intSI_type_node, V4SImode); build_vector_type_for_mode (neon_intSI_type_node, V4SImode);
V4USI_type_node =
build_vector_type_for_mode (intUSI_type_node, V4SImode);
V4SF_type_node = V4SF_type_node =
build_vector_type_for_mode (neon_float_type_node, V4SFmode); build_vector_type_for_mode (neon_float_type_node, V4SFmode);
V2DI_type_node = V2DI_type_node =
build_vector_type_for_mode (neon_intDI_type_node, V2DImode); build_vector_type_for_mode (neon_intDI_type_node, V2DImode);
V2UDI_type_node =
/* Unsigned integer types for various mode sizes. */ build_vector_type_for_mode (intUDI_type_node, V2DImode);
intUQI_type_node = make_unsigned_type (GET_MODE_PRECISION (QImode));
intUHI_type_node = make_unsigned_type (GET_MODE_PRECISION (HImode));
intUSI_type_node = make_unsigned_type (GET_MODE_PRECISION (SImode));
intUDI_type_node = make_unsigned_type (GET_MODE_PRECISION (DImode));
neon_intUTI_type_node = make_unsigned_type (GET_MODE_PRECISION (TImode));
(*lang_hooks.types.register_builtin_type) (intUQI_type_node, (*lang_hooks.types.register_builtin_type) (intUQI_type_node,
...@@ -23677,8 +23692,6 @@ arm_init_neon_builtins (void) ...@@ -23677,8 +23692,6 @@ arm_init_neon_builtins (void)
if (TARGET_CRYPTO && TARGET_HARD_FLOAT) if (TARGET_CRYPTO && TARGET_HARD_FLOAT)
{ {
tree V4USI_type_node =
build_vector_type_for_mode (intUSI_type_node, V4SImode);
tree V16UQI_type_node = tree V16UQI_type_node =
build_vector_type_for_mode (intUQI_type_node, V16QImode); build_vector_type_for_mode (intUQI_type_node, V16QImode);
...@@ -24023,6 +24036,31 @@ arm_init_neon_builtins (void) ...@@ -24023,6 +24036,31 @@ arm_init_neon_builtins (void)
ftype = build_function_type_list (return_type, eltype, NULL); ftype = build_function_type_list (return_type, eltype, NULL);
break; break;
} }
case NEON_BSWAP:
{
tree eltype = NULL_TREE;
switch (insn_data[d->code].operand[1].mode)
{
case V4HImode:
eltype = V4UHI_type_node;
break;
case V8HImode:
eltype = V8UHI_type_node;
break;
case V2SImode:
eltype = V2USI_type_node;
break;
case V4SImode:
eltype = V4USI_type_node;
break;
case V2DImode:
eltype = V2UDI_type_node;
break;
default: gcc_unreachable ();
}
ftype = build_function_type_list (eltype, eltype, NULL);
break;
}
default: default:
gcc_unreachable (); gcc_unreachable ();
} }
...@@ -25197,6 +25235,7 @@ arm_expand_neon_builtin (int fcode, tree exp, rtx target) ...@@ -25197,6 +25235,7 @@ arm_expand_neon_builtin (int fcode, tree exp, rtx target)
case NEON_SPLIT: case NEON_SPLIT:
case NEON_FLOAT_WIDEN: case NEON_FLOAT_WIDEN:
case NEON_FLOAT_NARROW: case NEON_FLOAT_NARROW:
case NEON_BSWAP:
case NEON_REINTERP: case NEON_REINTERP:
return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode, return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
NEON_ARG_COPY_TO_REG, NEON_ARG_STOP); NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
...@@ -29677,8 +29716,7 @@ arm_builtin_vectorized_function (tree fndecl, tree type_out, tree type_in) ...@@ -29677,8 +29716,7 @@ arm_builtin_vectorized_function (tree fndecl, tree type_out, tree type_in)
int in_n, out_n; int in_n, out_n;
if (TREE_CODE (type_out) != VECTOR_TYPE if (TREE_CODE (type_out) != VECTOR_TYPE
|| TREE_CODE (type_in) != VECTOR_TYPE || TREE_CODE (type_in) != VECTOR_TYPE)
|| !(TARGET_NEON && TARGET_FPU_ARMV8 && flag_unsafe_math_optimizations))
return NULL_TREE; return NULL_TREE;
out_mode = TYPE_MODE (TREE_TYPE (type_out)); out_mode = TYPE_MODE (TREE_TYPE (type_out));
...@@ -29690,7 +29728,13 @@ arm_builtin_vectorized_function (tree fndecl, tree type_out, tree type_in) ...@@ -29690,7 +29728,13 @@ arm_builtin_vectorized_function (tree fndecl, tree type_out, tree type_in)
decl of the vectorized builtin for the appropriate vector mode. decl of the vectorized builtin for the appropriate vector mode.
NULL_TREE is returned if no such builtin is available. */ NULL_TREE is returned if no such builtin is available. */
#undef ARM_CHECK_BUILTIN_MODE #undef ARM_CHECK_BUILTIN_MODE
#define ARM_CHECK_BUILTIN_MODE(C) \ #define ARM_CHECK_BUILTIN_MODE(C) \
(TARGET_NEON && TARGET_FPU_ARMV8 \
&& flag_unsafe_math_optimizations \
&& ARM_CHECK_BUILTIN_MODE_1 (C))
#undef ARM_CHECK_BUILTIN_MODE_1
#define ARM_CHECK_BUILTIN_MODE_1(C) \
(out_mode == SFmode && out_n == C \ (out_mode == SFmode && out_n == C \
&& in_mode == SFmode && in_n == C) && in_mode == SFmode && in_n == C)
...@@ -29715,6 +29759,30 @@ arm_builtin_vectorized_function (tree fndecl, tree type_out, tree type_in) ...@@ -29715,6 +29759,30 @@ arm_builtin_vectorized_function (tree fndecl, tree type_out, tree type_in)
return ARM_FIND_VRINT_VARIANT (vrintz); return ARM_FIND_VRINT_VARIANT (vrintz);
case BUILT_IN_ROUNDF: case BUILT_IN_ROUNDF:
return ARM_FIND_VRINT_VARIANT (vrinta); return ARM_FIND_VRINT_VARIANT (vrinta);
#undef ARM_CHECK_BUILTIN_MODE
#define ARM_CHECK_BUILTIN_MODE(C, N) \
(out_mode == N##Imode && out_n == C \
&& in_mode == N##Imode && in_n == C)
case BUILT_IN_BSWAP16:
if (ARM_CHECK_BUILTIN_MODE (4, H))
return arm_builtin_decl (ARM_BUILTIN_NEON_bswapv4hi, false);
else if (ARM_CHECK_BUILTIN_MODE (8, H))
return arm_builtin_decl (ARM_BUILTIN_NEON_bswapv8hi, false);
else
return NULL_TREE;
case BUILT_IN_BSWAP32:
if (ARM_CHECK_BUILTIN_MODE (2, S))
return arm_builtin_decl (ARM_BUILTIN_NEON_bswapv2si, false);
else if (ARM_CHECK_BUILTIN_MODE (4, S))
return arm_builtin_decl (ARM_BUILTIN_NEON_bswapv4si, false);
else
return NULL_TREE;
case BUILT_IN_BSWAP64:
if (ARM_CHECK_BUILTIN_MODE (2, D))
return arm_builtin_decl (ARM_BUILTIN_NEON_bswapv2di, false);
else
return NULL_TREE;
default: default:
return NULL_TREE; return NULL_TREE;
} }
......
...@@ -88,6 +88,7 @@ VAR8 (UNOP, vneg, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf), ...@@ -88,6 +88,7 @@ VAR8 (UNOP, vneg, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
VAR6 (UNOP, vqneg, v8qi, v4hi, v2si, v16qi, v8hi, v4si), VAR6 (UNOP, vqneg, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
VAR6 (UNOP, vcls, v8qi, v4hi, v2si, v16qi, v8hi, v4si), VAR6 (UNOP, vcls, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
VAR6 (UNOP, vclz, v8qi, v4hi, v2si, v16qi, v8hi, v4si), VAR6 (UNOP, vclz, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
VAR5 (BSWAP, bswap, v4hi, v8hi, v2si, v4si, v2di),
VAR2 (UNOP, vcnt, v8qi, v16qi), VAR2 (UNOP, vcnt, v8qi, v16qi),
VAR4 (UNOP, vrecpe, v2si, v2sf, v4si, v4sf), VAR4 (UNOP, vrecpe, v2si, v2sf, v4si, v4sf),
VAR4 (UNOP, vrsqrte, v2si, v2sf, v4si, v4sf), VAR4 (UNOP, vrsqrte, v2si, v2sf, v4si, v4sf),
......
...@@ -116,6 +116,9 @@ ...@@ -116,6 +116,9 @@
;; Vector modes including 64-bit integer elements, but no floats. ;; Vector modes including 64-bit integer elements, but no floats.
(define_mode_iterator VDQIX [V8QI V16QI V4HI V8HI V2SI V4SI DI V2DI]) (define_mode_iterator VDQIX [V8QI V16QI V4HI V8HI V2SI V4SI DI V2DI])
;; Vector modes for H, S and D types.
(define_mode_iterator VDQHSD [V4HI V8HI V2SI V4SI V2DI])
;; Vector modes for float->int conversions. ;; Vector modes for float->int conversions.
(define_mode_iterator VCVTF [V2SF V4SF]) (define_mode_iterator VCVTF [V2SF V4SF])
......
...@@ -2547,6 +2547,14 @@ ...@@ -2547,6 +2547,14 @@
[(set_attr "type" "neon_qabs<q>")] [(set_attr "type" "neon_qabs<q>")]
) )
(define_insn "neon_bswap<mode>"
[(set (match_operand:VDQHSD 0 "register_operand" "=w")
(bswap:VDQHSD (match_operand:VDQHSD 1 "register_operand" "w")))]
"TARGET_NEON"
"vrev<V_sz_elem>.8\\t%<V_reg>0, %<V_reg>1"
[(set_attr "type" "neon_rev<q>")]
)
(define_expand "neon_vneg<mode>" (define_expand "neon_vneg<mode>"
[(match_operand:VDQW 0 "s_register_operand" "") [(match_operand:VDQW 0 "s_register_operand" "")
(match_operand:VDQW 1 "s_register_operand" "") (match_operand:VDQW 1 "s_register_operand" "")
......
2014-05-27 Kyrylo Tkachov <kyrylo.tkachov@arm.com>
* lib/target-supports.exp (check_effective_target_vect_bswap):
Specify arm*-*-* support.
2014-05-27 Dominique d'Humieres <dominiq@lps.ens.fr> 2014-05-27 Dominique d'Humieres <dominiq@lps.ens.fr>
PR testsuite/61319 PR testsuite/61319
......
...@@ -3306,7 +3306,10 @@ proc check_effective_target_vect_bswap { } { ...@@ -3306,7 +3306,10 @@ proc check_effective_target_vect_bswap { } {
verbose "check_effective_target_vect_bswap: using cached result" 2 verbose "check_effective_target_vect_bswap: using cached result" 2
} else { } else {
set et_vect_bswap_saved 0 set et_vect_bswap_saved 0
if { [istarget aarch64*-*-*] } { if { [istarget aarch64*-*-*]
|| ([istarget arm*-*-*]
&& [check_effective_target_arm_neon])
} {
set et_vect_bswap_saved 1 set et_vect_bswap_saved 1
} }
} }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment