Commit 8beb9a0d by Jiong Wang Committed by Jiong Wang

[AArch64] Use fmin/fmax for v[min|max]nm{q} intrinsics

  smin/smax will actually honor quiet NaN.

gcc/
	* config/aarch64/aarch64-simd-builtins.def (smax): Remove float
	variants.
	(smin): Likewise.
	(fmax): New entry.
	(fmin): Likewise.
	* config/aarch64/arm_neon.h (vmaxnm_f32): Use
	__builtin_aarch64_fmaxv2sf.
	(vmaxnmq_f32): Likewise.
	(vmaxnmq_f64): Likewise.
	(vminnm_f32): Likewise.
	(vminnmq_f32): Likewise.
	(vminnmq_f64): Likewise.

gcc/testsuite/

	* gcc.target/aarch64/simd/vminmaxnm_1.c: New.

From-SVN: r238166
parent cef4b650
2016-07-08 Jiong Wang <jiong.wang@arm.com>
* config/aarch64/aarch64-simd-builtins.def (smax): Remove float
variants.
(smin): Likewise.
(fmax): New entry.
(fmin): Likewise.
* config/aarch64/arm_neon.h (vmaxnm_f32): Use
__builtin_aarch64_fmaxv2sf.
(vmaxnmq_f32): Likewise.
(vmaxnmq_f64): Likewise.
(vminnm_f32): Likewise.
(vminnmq_f32): Likewise.
(vminnmq_f64): Likewise.
2016-07-08 Michael Meissner <meissner@linux.vnet.ibm.com> 2016-07-08 Michael Meissner <meissner@linux.vnet.ibm.com>
PR target/71806 PR target/71806
......
...@@ -244,13 +244,17 @@ ...@@ -244,13 +244,17 @@
/* Implemented by <maxmin><mode>3. /* Implemented by <maxmin><mode>3.
smax variants map to fmaxnm, smax variants map to fmaxnm,
smax_nan variants map to fmax. */ smax_nan variants map to fmax. */
BUILTIN_VDQIF (BINOP, smax, 3) BUILTIN_VDQ_BHSI (BINOP, smax, 3)
BUILTIN_VDQIF (BINOP, smin, 3) BUILTIN_VDQ_BHSI (BINOP, smin, 3)
BUILTIN_VDQ_BHSI (BINOP, umax, 3) BUILTIN_VDQ_BHSI (BINOP, umax, 3)
BUILTIN_VDQ_BHSI (BINOP, umin, 3) BUILTIN_VDQ_BHSI (BINOP, umin, 3)
BUILTIN_VDQF (BINOP, smax_nan, 3) BUILTIN_VDQF (BINOP, smax_nan, 3)
BUILTIN_VDQF (BINOP, smin_nan, 3) BUILTIN_VDQF (BINOP, smin_nan, 3)
/* Implemented by <fmaxmin><mode>3. */
BUILTIN_VDQF (BINOP, fmax, 3)
BUILTIN_VDQF (BINOP, fmin, 3)
/* Implemented by aarch64_<maxmin_uns>p<mode>. */ /* Implemented by aarch64_<maxmin_uns>p<mode>. */
BUILTIN_VDQ_BHSI (BINOP, smaxp, 0) BUILTIN_VDQ_BHSI (BINOP, smaxp, 0)
BUILTIN_VDQ_BHSI (BINOP, sminp, 0) BUILTIN_VDQ_BHSI (BINOP, sminp, 0)
......
...@@ -17588,19 +17588,19 @@ vpminnms_f32 (float32x2_t a) ...@@ -17588,19 +17588,19 @@ vpminnms_f32 (float32x2_t a)
__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
vmaxnm_f32 (float32x2_t __a, float32x2_t __b) vmaxnm_f32 (float32x2_t __a, float32x2_t __b)
{ {
return __builtin_aarch64_smaxv2sf (__a, __b); return __builtin_aarch64_fmaxv2sf (__a, __b);
} }
__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
vmaxnmq_f32 (float32x4_t __a, float32x4_t __b) vmaxnmq_f32 (float32x4_t __a, float32x4_t __b)
{ {
return __builtin_aarch64_smaxv4sf (__a, __b); return __builtin_aarch64_fmaxv4sf (__a, __b);
} }
__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
vmaxnmq_f64 (float64x2_t __a, float64x2_t __b) vmaxnmq_f64 (float64x2_t __a, float64x2_t __b)
{ {
return __builtin_aarch64_smaxv2df (__a, __b); return __builtin_aarch64_fmaxv2df (__a, __b);
} }
/* vmaxv */ /* vmaxv */
...@@ -17818,19 +17818,19 @@ vminq_u32 (uint32x4_t __a, uint32x4_t __b) ...@@ -17818,19 +17818,19 @@ vminq_u32 (uint32x4_t __a, uint32x4_t __b)
__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
vminnm_f32 (float32x2_t __a, float32x2_t __b) vminnm_f32 (float32x2_t __a, float32x2_t __b)
{ {
return __builtin_aarch64_sminv2sf (__a, __b); return __builtin_aarch64_fminv2sf (__a, __b);
} }
__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
vminnmq_f32 (float32x4_t __a, float32x4_t __b) vminnmq_f32 (float32x4_t __a, float32x4_t __b)
{ {
return __builtin_aarch64_sminv4sf (__a, __b); return __builtin_aarch64_fminv4sf (__a, __b);
} }
__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
vminnmq_f64 (float64x2_t __a, float64x2_t __b) vminnmq_f64 (float64x2_t __a, float64x2_t __b)
{ {
return __builtin_aarch64_sminv2df (__a, __b); return __builtin_aarch64_fminv2df (__a, __b);
} }
/* vminv */ /* vminv */
......
2016-07-08 Jiong Wang <jiong.wang@arm.com>
* gcc.target/aarch64/simd/vminmaxnm_1.c: New.
2016-07-08 Michael Meissner <meissner@linux.vnet.ibm.com> 2016-07-08 Michael Meissner <meissner@linux.vnet.ibm.com>
PR target/71806 PR target/71806
......
/* Test the `v[min|max]nm{q}_f*' AArch64 SIMD intrinsic. */
/* { dg-do run } */
/* { dg-options "-O2" } */
#include "arm_neon.h"
extern void abort ();
#define CHECK(T, N, R, E) \
{\
int i = 0;\
for (; i < N; i++)\
if (* (T *) &R[i] != * (T *) &E[i])\
abort ();\
}
int
main (int argc, char **argv)
{
float32x2_t f32x2_input1 = vdup_n_f32 (-1.0);
float32x2_t f32x2_input2 = vdup_n_f32 (0.0);
float32x2_t f32x2_exp_minnm = vdup_n_f32 (-1.0);
float32x2_t f32x2_exp_maxnm = vdup_n_f32 (0.0);
float32x2_t f32x2_ret_minnm = vminnm_f32 (f32x2_input1, f32x2_input2);
float32x2_t f32x2_ret_maxnm = vmaxnm_f32 (f32x2_input1, f32x2_input2);
CHECK (uint32_t, 2, f32x2_ret_minnm, f32x2_exp_minnm);
CHECK (uint32_t, 2, f32x2_ret_maxnm, f32x2_exp_maxnm);
f32x2_input1 = vdup_n_f32 (__builtin_nanf (""));
f32x2_input2 = vdup_n_f32 (1.0);
f32x2_exp_minnm = vdup_n_f32 (1.0);
f32x2_exp_maxnm = vdup_n_f32 (1.0);
f32x2_ret_minnm = vminnm_f32 (f32x2_input1, f32x2_input2);
f32x2_ret_maxnm = vmaxnm_f32 (f32x2_input1, f32x2_input2);
CHECK (uint32_t, 2, f32x2_ret_minnm, f32x2_exp_minnm);
CHECK (uint32_t, 2, f32x2_ret_maxnm, f32x2_exp_maxnm);
float32x4_t f32x4_input1 = vdupq_n_f32 (-1024.0);
float32x4_t f32x4_input2 = vdupq_n_f32 (77.0);
float32x4_t f32x4_exp_minnm = vdupq_n_f32 (-1024.0);
float32x4_t f32x4_exp_maxnm = vdupq_n_f32 (77.0);
float32x4_t f32x4_ret_minnm = vminnmq_f32 (f32x4_input1, f32x4_input2);
float32x4_t f32x4_ret_maxnm = vmaxnmq_f32 (f32x4_input1, f32x4_input2);
CHECK (uint32_t, 4, f32x4_ret_minnm, f32x4_exp_minnm);
CHECK (uint32_t, 4, f32x4_ret_maxnm, f32x4_exp_maxnm);
f32x4_input1 = vdupq_n_f32 (-__builtin_nanf (""));
f32x4_input2 = vdupq_n_f32 (-1.0);
f32x4_exp_minnm = vdupq_n_f32 (-1.0);
f32x4_exp_maxnm = vdupq_n_f32 (-1.0);
f32x4_ret_minnm = vminnmq_f32 (f32x4_input1, f32x4_input2);
f32x4_ret_maxnm = vmaxnmq_f32 (f32x4_input1, f32x4_input2);
CHECK (uint32_t, 4, f32x4_ret_minnm, f32x4_exp_minnm);
CHECK (uint32_t, 4, f32x4_ret_maxnm, f32x4_exp_maxnm);
float64x2_t f64x2_input1 = vdupq_n_f64 (1.23);
float64x2_t f64x2_input2 = vdupq_n_f64 (4.56);
float64x2_t f64x2_exp_minnm = vdupq_n_f64 (1.23);
float64x2_t f64x2_exp_maxnm = vdupq_n_f64 (4.56);
float64x2_t f64x2_ret_minnm = vminnmq_f64 (f64x2_input1, f64x2_input2);
float64x2_t f64x2_ret_maxnm = vmaxnmq_f64 (f64x2_input1, f64x2_input2);
CHECK (uint64_t, 2, f64x2_ret_minnm, f64x2_exp_minnm);
CHECK (uint64_t, 2, f64x2_ret_maxnm, f64x2_exp_maxnm);
f64x2_input1 = vdupq_n_f64 (-__builtin_nan (""));
f64x2_input2 = vdupq_n_f64 (1.0);
f64x2_exp_minnm = vdupq_n_f64 (1.0);
f64x2_exp_maxnm = vdupq_n_f64 (1.0);
f64x2_ret_minnm = vminnmq_f64 (f64x2_input1, f64x2_input2);
f64x2_ret_maxnm = vmaxnmq_f64 (f64x2_input1, f64x2_input2);
CHECK (uint64_t, 2, f64x2_ret_minnm, f64x2_exp_minnm);
CHECK (uint64_t, 2, f64x2_ret_maxnm, f64x2_exp_maxnm);
return 0;
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment