Commit 703bbcdf by Jiong Wang Committed by Jiong Wang

[AArch64][6/14] ARMv8.2-A FP16 reduction vector intrinsics

gcc/
	* config/aarch64/aarch64-simd-builtins.def (reduc_smax_scal_,
	reduc_smin_scal_): Use VDQIF_F16.
	(reduc_smax_nan_scal_, reduc_smin_nan_scal_): Use VHSDF.
	* config/aarch64/aarch64-simd.md (reduc_<maxmin_uns>_scal_<mode>):
	Use VHSDF.
	(aarch64_reduc_<maxmin_uns>_internal<mode>): Likewise.
	* config/aarch64/iterators.md (VDQIF_F16): New.
	(vp): Support HF modes.
	* config/aarch64/arm_neon.h (vmaxv_f16, vmaxvq_f16, vminv_f16,
	vminvq_f16, vmaxnmv_f16, vmaxnmvq_f16, vminnmv_f16, vminnmvq_f16): New.

From-SVN: r238721
parent ab2e8f01
2016-07-25 Jiong Wang <jiong.wang@arm.com>
* config/aarch64/aarch64-simd-builtins.def (reduc_smax_scal_,
reduc_smin_scal_): Use VDQIF_F16.
(reduc_smax_nan_scal_, reduc_smin_nan_scal_): Use VHSDF.
* config/aarch64/aarch64-simd.md (reduc_<maxmin_uns>_scal_<mode>):
Use VHSDF.
(aarch64_reduc_<maxmin_uns>_internal<mode>): Likewise.
* config/aarch64/iterators.md (VDQIF_F16): New.
(vp): Support HF modes.
* config/aarch64/arm_neon.h (vmaxv_f16, vmaxvq_f16, vminv_f16,
vminvq_f16, vmaxnmv_f16, vmaxnmvq_f16, vminnmv_f16, vminnmvq_f16): New.
2016-07-25 Jiong Wang <jiong.wang@arm.com>
* config/aarch64/aarch64-simd.md (*aarch64_mulx_elt_to_64v2df): Rename to
"*aarch64_mulx_elt_from_dup<mode>".
(*aarch64_mul3_elt<mode>): Update schedule type.
......
......@@ -234,12 +234,12 @@
BUILTIN_VALL (UNOP, reduc_plus_scal_, 10)
/* Implemented by reduc_<maxmin_uns>_scal_<mode> (producing scalar). */
BUILTIN_VDQIF (UNOP, reduc_smax_scal_, 10)
BUILTIN_VDQIF (UNOP, reduc_smin_scal_, 10)
BUILTIN_VDQIF_F16 (UNOP, reduc_smax_scal_, 10)
BUILTIN_VDQIF_F16 (UNOP, reduc_smin_scal_, 10)
BUILTIN_VDQ_BHSI (UNOPU, reduc_umax_scal_, 10)
BUILTIN_VDQ_BHSI (UNOPU, reduc_umin_scal_, 10)
BUILTIN_VDQF (UNOP, reduc_smax_nan_scal_, 10)
BUILTIN_VDQF (UNOP, reduc_smin_nan_scal_, 10)
BUILTIN_VHSDF (UNOP, reduc_smax_nan_scal_, 10)
BUILTIN_VHSDF (UNOP, reduc_smin_nan_scal_, 10)
/* Implemented by <maxmin><mode>3.
smax variants map to fmaxnm,
......
......@@ -2130,8 +2130,8 @@
;; gimple_fold'd to the REDUC_(MAX|MIN)_EXPR tree code. (This is FP smax/smin).
(define_expand "reduc_<maxmin_uns>_scal_<mode>"
[(match_operand:<VEL> 0 "register_operand")
(unspec:VDQF [(match_operand:VDQF 1 "register_operand")]
FMAXMINV)]
(unspec:VHSDF [(match_operand:VHSDF 1 "register_operand")]
FMAXMINV)]
"TARGET_SIMD"
{
rtx elt = GEN_INT (ENDIAN_LANE_N (<MODE>mode, 0));
......@@ -2178,12 +2178,12 @@
)
(define_insn "aarch64_reduc_<maxmin_uns>_internal<mode>"
[(set (match_operand:VDQF 0 "register_operand" "=w")
(unspec:VDQF [(match_operand:VDQF 1 "register_operand" "w")]
FMAXMINV))]
[(set (match_operand:VHSDF 0 "register_operand" "=w")
(unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")]
FMAXMINV))]
"TARGET_SIMD"
"<maxmin_uns_op><vp>\\t%<Vetype>0, %1.<Vtype>"
[(set_attr "type" "neon_fp_reduc_minmax_<Vetype><q>")]
[(set_attr "type" "neon_fp_reduc_minmax_<stype><q>")]
)
;; aarch64_simd_bsl may compile to any of bsl/bif/bit depending on register
......
......@@ -26927,6 +26927,56 @@ vmulxq_n_f16 (float16x8_t __a, float16_t __b)
return vmulxq_f16 (__a, vdupq_n_f16 (__b));
}
/* ARMv8.2-A FP16 reduction vector intrinsics. */
__extension__ static __inline float16_t __attribute__ ((__always_inline__))
vmaxv_f16 (float16x4_t __a)
{
return __builtin_aarch64_reduc_smax_nan_scal_v4hf (__a);
}
__extension__ static __inline float16_t __attribute__ ((__always_inline__))
vmaxvq_f16 (float16x8_t __a)
{
return __builtin_aarch64_reduc_smax_nan_scal_v8hf (__a);
}
__extension__ static __inline float16_t __attribute__ ((__always_inline__))
vminv_f16 (float16x4_t __a)
{
return __builtin_aarch64_reduc_smin_nan_scal_v4hf (__a);
}
__extension__ static __inline float16_t __attribute__ ((__always_inline__))
vminvq_f16 (float16x8_t __a)
{
return __builtin_aarch64_reduc_smin_nan_scal_v8hf (__a);
}
__extension__ static __inline float16_t __attribute__ ((__always_inline__))
vmaxnmv_f16 (float16x4_t __a)
{
return __builtin_aarch64_reduc_smax_scal_v4hf (__a);
}
__extension__ static __inline float16_t __attribute__ ((__always_inline__))
vmaxnmvq_f16 (float16x8_t __a)
{
return __builtin_aarch64_reduc_smax_scal_v8hf (__a);
}
__extension__ static __inline float16_t __attribute__ ((__always_inline__))
vminnmv_f16 (float16x4_t __a)
{
return __builtin_aarch64_reduc_smin_scal_v4hf (__a);
}
__extension__ static __inline float16_t __attribute__ ((__always_inline__))
vminnmvq_f16 (float16x8_t __a)
{
return __builtin_aarch64_reduc_smin_scal_v8hf (__a);
}
#pragma GCC pop_options
#undef __aarch64_vget_lane_any
......
......@@ -159,6 +159,8 @@
;; Vector modes except double int.
(define_mode_iterator VDQIF [V8QI V16QI V4HI V8HI V2SI V4SI V2SF V4SF V2DF])
(define_mode_iterator VDQIF_F16 [V8QI V16QI V4HI V8HI V2SI V4SI
V4HF V8HF V2SF V4SF V2DF])
;; Vector modes for S type.
(define_mode_iterator VDQ_SI [V2SI V4SI])
......@@ -760,8 +762,9 @@
(define_mode_attr vp [(V8QI "v") (V16QI "v")
(V4HI "v") (V8HI "v")
(V2SI "p") (V4SI "v")
(V2DI "p") (V2DF "p")
(V2SF "p") (V4SF "v")])
(V2DI "p") (V2DF "p")
(V2SF "p") (V4SF "v")
(V4HF "v") (V8HF "v")])
(define_mode_attr vsi2qi [(V2SI "v8qi") (V4SI "v16qi")])
(define_mode_attr VSI2QI [(V2SI "V8QI") (V4SI "V16QI")])
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment