Commit 703bbcdf by Jiong Wang Committed by Jiong Wang

[AArch64][6/14] ARMv8.2-A FP16 reduction vector intrinsics

gcc/
	* config/aarch64/aarch64-simd-builtins.def (reduc_smax_scal_,
	reduc_smin_scal_): Use VDQIF_F16.
	(reduc_smax_nan_scal_, reduc_smin_nan_scal_): Use VHSDF.
	* config/aarch64/aarch64-simd.md (reduc_<maxmin_uns>_scal_<mode>):
	Use VHSDF.
	(aarch64_reduc_<maxmin_uns>_internal<mode>): Likewise.
	* config/aarch64/iterators.md (VDQIF_F16): New.
	(vp): Support HF modes.
	* config/aarch64/arm_neon.h (vmaxv_f16, vmaxvq_f16, vminv_f16,
	vminvq_f16, vmaxnmv_f16, vmaxnmvq_f16, vminnmv_f16, vminnmvq_f16): New.

From-SVN: r238721
parent ab2e8f01
2016-07-25 Jiong Wang <jiong.wang@arm.com> 2016-07-25 Jiong Wang <jiong.wang@arm.com>
* config/aarch64/aarch64-simd-builtins.def (reduc_smax_scal_,
reduc_smin_scal_): Use VDQIF_F16.
(reduc_smax_nan_scal_, reduc_smin_nan_scal_): Use VHSDF.
* config/aarch64/aarch64-simd.md (reduc_<maxmin_uns>_scal_<mode>):
Use VHSDF.
(aarch64_reduc_<maxmin_uns>_internal<mode>): Likewise.
* config/aarch64/iterators.md (VDQIF_F16): New.
(vp): Support HF modes.
* config/aarch64/arm_neon.h (vmaxv_f16, vmaxvq_f16, vminv_f16,
vminvq_f16, vmaxnmv_f16, vmaxnmvq_f16, vminnmv_f16, vminnmvq_f16): New.
2016-07-25 Jiong Wang <jiong.wang@arm.com>
* config/aarch64/aarch64-simd.md (*aarch64_mulx_elt_to_64v2df): Rename to * config/aarch64/aarch64-simd.md (*aarch64_mulx_elt_to_64v2df): Rename to
"*aarch64_mulx_elt_from_dup<mode>". "*aarch64_mulx_elt_from_dup<mode>".
(*aarch64_mul3_elt<mode>): Update schedule type. (*aarch64_mul3_elt<mode>): Update schedule type.
......
...@@ -234,12 +234,12 @@ ...@@ -234,12 +234,12 @@
BUILTIN_VALL (UNOP, reduc_plus_scal_, 10) BUILTIN_VALL (UNOP, reduc_plus_scal_, 10)
/* Implemented by reduc_<maxmin_uns>_scal_<mode> (producing scalar). */ /* Implemented by reduc_<maxmin_uns>_scal_<mode> (producing scalar). */
BUILTIN_VDQIF (UNOP, reduc_smax_scal_, 10) BUILTIN_VDQIF_F16 (UNOP, reduc_smax_scal_, 10)
BUILTIN_VDQIF (UNOP, reduc_smin_scal_, 10) BUILTIN_VDQIF_F16 (UNOP, reduc_smin_scal_, 10)
BUILTIN_VDQ_BHSI (UNOPU, reduc_umax_scal_, 10) BUILTIN_VDQ_BHSI (UNOPU, reduc_umax_scal_, 10)
BUILTIN_VDQ_BHSI (UNOPU, reduc_umin_scal_, 10) BUILTIN_VDQ_BHSI (UNOPU, reduc_umin_scal_, 10)
BUILTIN_VDQF (UNOP, reduc_smax_nan_scal_, 10) BUILTIN_VHSDF (UNOP, reduc_smax_nan_scal_, 10)
BUILTIN_VDQF (UNOP, reduc_smin_nan_scal_, 10) BUILTIN_VHSDF (UNOP, reduc_smin_nan_scal_, 10)
/* Implemented by <maxmin><mode>3. /* Implemented by <maxmin><mode>3.
smax variants map to fmaxnm, smax variants map to fmaxnm,
......
...@@ -2130,8 +2130,8 @@ ...@@ -2130,8 +2130,8 @@
;; gimple_fold'd to the REDUC_(MAX|MIN)_EXPR tree code. (This is FP smax/smin). ;; gimple_fold'd to the REDUC_(MAX|MIN)_EXPR tree code. (This is FP smax/smin).
(define_expand "reduc_<maxmin_uns>_scal_<mode>" (define_expand "reduc_<maxmin_uns>_scal_<mode>"
[(match_operand:<VEL> 0 "register_operand") [(match_operand:<VEL> 0 "register_operand")
(unspec:VDQF [(match_operand:VDQF 1 "register_operand")] (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand")]
FMAXMINV)] FMAXMINV)]
"TARGET_SIMD" "TARGET_SIMD"
{ {
rtx elt = GEN_INT (ENDIAN_LANE_N (<MODE>mode, 0)); rtx elt = GEN_INT (ENDIAN_LANE_N (<MODE>mode, 0));
...@@ -2178,12 +2178,12 @@ ...@@ -2178,12 +2178,12 @@
) )
(define_insn "aarch64_reduc_<maxmin_uns>_internal<mode>" (define_insn "aarch64_reduc_<maxmin_uns>_internal<mode>"
[(set (match_operand:VDQF 0 "register_operand" "=w") [(set (match_operand:VHSDF 0 "register_operand" "=w")
(unspec:VDQF [(match_operand:VDQF 1 "register_operand" "w")] (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")]
FMAXMINV))] FMAXMINV))]
"TARGET_SIMD" "TARGET_SIMD"
"<maxmin_uns_op><vp>\\t%<Vetype>0, %1.<Vtype>" "<maxmin_uns_op><vp>\\t%<Vetype>0, %1.<Vtype>"
[(set_attr "type" "neon_fp_reduc_minmax_<Vetype><q>")] [(set_attr "type" "neon_fp_reduc_minmax_<stype><q>")]
) )
;; aarch64_simd_bsl may compile to any of bsl/bif/bit depending on register ;; aarch64_simd_bsl may compile to any of bsl/bif/bit depending on register
......
...@@ -26927,6 +26927,56 @@ vmulxq_n_f16 (float16x8_t __a, float16_t __b) ...@@ -26927,6 +26927,56 @@ vmulxq_n_f16 (float16x8_t __a, float16_t __b)
return vmulxq_f16 (__a, vdupq_n_f16 (__b)); return vmulxq_f16 (__a, vdupq_n_f16 (__b));
} }
/* ARMv8.2-A FP16 reduction vector intrinsics. */
__extension__ static __inline float16_t __attribute__ ((__always_inline__))
vmaxv_f16 (float16x4_t __a)
{
return __builtin_aarch64_reduc_smax_nan_scal_v4hf (__a);
}
__extension__ static __inline float16_t __attribute__ ((__always_inline__))
vmaxvq_f16 (float16x8_t __a)
{
return __builtin_aarch64_reduc_smax_nan_scal_v8hf (__a);
}
__extension__ static __inline float16_t __attribute__ ((__always_inline__))
vminv_f16 (float16x4_t __a)
{
return __builtin_aarch64_reduc_smin_nan_scal_v4hf (__a);
}
__extension__ static __inline float16_t __attribute__ ((__always_inline__))
vminvq_f16 (float16x8_t __a)
{
return __builtin_aarch64_reduc_smin_nan_scal_v8hf (__a);
}
__extension__ static __inline float16_t __attribute__ ((__always_inline__))
vmaxnmv_f16 (float16x4_t __a)
{
return __builtin_aarch64_reduc_smax_scal_v4hf (__a);
}
__extension__ static __inline float16_t __attribute__ ((__always_inline__))
vmaxnmvq_f16 (float16x8_t __a)
{
return __builtin_aarch64_reduc_smax_scal_v8hf (__a);
}
__extension__ static __inline float16_t __attribute__ ((__always_inline__))
vminnmv_f16 (float16x4_t __a)
{
return __builtin_aarch64_reduc_smin_scal_v4hf (__a);
}
__extension__ static __inline float16_t __attribute__ ((__always_inline__))
vminnmvq_f16 (float16x8_t __a)
{
return __builtin_aarch64_reduc_smin_scal_v8hf (__a);
}
#pragma GCC pop_options #pragma GCC pop_options
#undef __aarch64_vget_lane_any #undef __aarch64_vget_lane_any
......
...@@ -159,6 +159,8 @@ ...@@ -159,6 +159,8 @@
;; Vector modes except double int. ;; Vector modes except double int.
(define_mode_iterator VDQIF [V8QI V16QI V4HI V8HI V2SI V4SI V2SF V4SF V2DF]) (define_mode_iterator VDQIF [V8QI V16QI V4HI V8HI V2SI V4SI V2SF V4SF V2DF])
(define_mode_iterator VDQIF_F16 [V8QI V16QI V4HI V8HI V2SI V4SI
V4HF V8HF V2SF V4SF V2DF])
;; Vector modes for S type. ;; Vector modes for S type.
(define_mode_iterator VDQ_SI [V2SI V4SI]) (define_mode_iterator VDQ_SI [V2SI V4SI])
...@@ -760,8 +762,9 @@ ...@@ -760,8 +762,9 @@
(define_mode_attr vp [(V8QI "v") (V16QI "v") (define_mode_attr vp [(V8QI "v") (V16QI "v")
(V4HI "v") (V8HI "v") (V4HI "v") (V8HI "v")
(V2SI "p") (V4SI "v") (V2SI "p") (V4SI "v")
(V2DI "p") (V2DF "p") (V2DI "p") (V2DF "p")
(V2SF "p") (V4SF "v")]) (V2SF "p") (V4SF "v")
(V4HF "v") (V8HF "v")])
(define_mode_attr vsi2qi [(V2SI "v8qi") (V4SI "v16qi")]) (define_mode_attr vsi2qi [(V2SI "v8qi") (V4SI "v16qi")])
(define_mode_attr VSI2QI [(V2SI "V8QI") (V4SI "V16QI")]) (define_mode_attr VSI2QI [(V2SI "V8QI") (V4SI "V16QI")])
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment