[AArch64] Use fmin/fmax for v[min|max]nm{q} intrinsics

smin/smax will actually honor quiet NaN. gcc/ * config/aarch64/aarch64-simd-builtins.def (smax): Remove float variants. (smin): Likewise. (fmax): New entry. (fmin): Likewise. * config/aarch64/arm_neon.h (vmaxnm_f32): Use __builtin_aarch64_fmaxv2sf. (vmaxnmq_f32): Likewise. (vmaxnmq_f64): Likewise. (vminnm_f32): Likewise. (vminnmq_f32): Likewise. (vminnmq_f64): Likewise. gcc/testsuite/ * gcc.target/aarch64/simd/vminmaxnm_1.c: New. From-SVN: r238166

[AArch64] Use fmin/fmax for v[min|max]nm{q} intrinsics
smin/smax will actually honor quiet NaN. gcc/ * config/aarch64/aarch64-simd-builtins.def (smax): Remove float variants. (smin): Likewise. (fmax): New entry. (fmin): Likewise. * config/aarch64/arm_neon.h (vmaxnm_f32): Use __builtin_aarch64_fmaxv2sf. (vmaxnmq_f32): Likewise. (vmaxnmq_f64): Likewise. (vminnm_f32): Likewise. (vminnmq_f32): Likewise. (vminnmq_f64): Likewise. gcc/testsuite/ * gcc.target/aarch64/simd/vminmaxnm_1.c: New. From-SVN: r238166
8beb9a0d · Jiong Wang · Jiong Wang · cef4b650 · 8beb9a0d · 8beb9a0d
Commit 8beb9a0d authored Jul 08, 2016 by Jiong Wang Committed by Jiong Wang Jul 08, 2016
Showing with 113 additions and 8 deletions

gcc/ChangeLog
+15 -0

gcc/config/aarch64/aarch64-simd-builtins.def
+6 -2

gcc/config/aarch64/arm_neon.h
+6 -6

gcc/testsuite/ChangeLog
+4 -0

gcc/testsuite/gcc.target/aarch64/simd/vminmaxnm_1.c
+82 -0

No files found.
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
+2016-07-08  Jiong Wang  <jiong.wang@arm.com>
+	* config/aarch64/aarch64-simd-builtins.def (smax): Remove float
+	variants.
+	(smin): Likewise.
+	(fmax): New entry.
+	(fmin): Likewise.
+	* config/aarch64/arm_neon.h (vmaxnm_f32): Use
+	__builtin_aarch64_fmaxv2sf.
+	(vmaxnmq_f32): Likewise.
+	(vmaxnmq_f64): Likewise.
+	(vminnm_f32): Likewise.
+	(vminnmq_f32): Likewise.
+	(vminnmq_f64): Likewise.
 2016-07-08  Michael Meissner  <meissner@linux.vnet.ibm.com>
 	PR target/71806

--- a/gcc/config/aarch64/aarch64-simd-builtins.def
+++ b/gcc/config/aarch64/aarch64-simd-builtins.def
@@ -244,13 +244,17 @@
  /* Implemented by <maxmin><mode>3.
     smax variants map to fmaxnm,
     smax_nan variants map to fmax.  */
-  BUILTIN_VDQIF (BINOP, smax, 3)
+  BUILTIN_VDQ_BHSI (BINOP, smax, 3)
-  BUILTIN_VDQIF (BINOP, smin, 3)
+  BUILTIN_VDQ_BHSI (BINOP, smin, 3)
  BUILTIN_VDQ_BHSI (BINOP, umax, 3)
  BUILTIN_VDQ_BHSI (BINOP, umin, 3)
  BUILTIN_VDQF (BINOP, smax_nan, 3)
  BUILTIN_VDQF (BINOP, smin_nan, 3)
+  /* Implemented by <fmaxmin><mode>3.  */
+  BUILTIN_VDQF (BINOP, fmax, 3)
+  BUILTIN_VDQF (BINOP, fmin, 3)
  /* Implemented by aarch64_<maxmin_uns>p<mode>.  */
  BUILTIN_VDQ_BHSI (BINOP, smaxp, 0)
  BUILTIN_VDQ_BHSI (BINOP, sminp, 0)

--- a/gcc/config/aarch64/arm_neon.h
+++ b/gcc/config/aarch64/arm_neon.h
@@ -17588,19 +17588,19 @@ vpminnms_f32 (float32x2_t a)
 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
 vmaxnm_f32 (float32x2_t __a, float32x2_t __b)
 {
-  return __builtin_aarch64_smaxv2sf (__a, __b);
+  return __builtin_aarch64_fmaxv2sf (__a, __b);
 }
 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
 vmaxnmq_f32 (float32x4_t __a, float32x4_t __b)
 {
-  return __builtin_aarch64_smaxv4sf (__a, __b);
+  return __builtin_aarch64_fmaxv4sf (__a, __b);
 }
 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
 vmaxnmq_f64 (float64x2_t __a, float64x2_t __b)
 {
-  return __builtin_aarch64_smaxv2df (__a, __b);
+  return __builtin_aarch64_fmaxv2df (__a, __b);
 }
 /* vmaxv  */
@@ -17818,19 +17818,19 @@ vminq_u32 (uint32x4_t __a, uint32x4_t __b)
 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
 vminnm_f32 (float32x2_t __a, float32x2_t __b)
 {
-  return __builtin_aarch64_sminv2sf (__a, __b);
+  return __builtin_aarch64_fminv2sf (__a, __b);
 }
 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
 vminnmq_f32 (float32x4_t __a, float32x4_t __b)
 {
-  return __builtin_aarch64_sminv4sf (__a, __b);
+  return __builtin_aarch64_fminv4sf (__a, __b);
 }
 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
 vminnmq_f64 (float64x2_t __a, float64x2_t __b)
 {
-  return __builtin_aarch64_sminv2df (__a, __b);
+  return __builtin_aarch64_fminv2df (__a, __b);
 }
 /* vminv  */

--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
+2016-07-08  Jiong Wang  <jiong.wang@arm.com>
+	* gcc.target/aarch64/simd/vminmaxnm_1.c: New.
 2016-07-08  Michael Meissner  <meissner@linux.vnet.ibm.com>
 	PR target/71806

--- a/gcc/testsuite/gcc.target/aarch64/simd/vminmaxnm_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/simd/vminmaxnm_1.c
+/* Test the `v[min|max]nm{q}_f*' AArch64 SIMD intrinsic.  */
+/* { dg-do run } */
+/* { dg-options "-O2" } */
+#include "arm_neon.h"
+extern void abort ();
+#define CHECK(T, N, R, E) \
+  {\
+    int i = 0;\
+    for (; i < N; i++)\
+      if (* (T *) &R[i] != * (T *) &E[i])\
+	abort ();\
+  }
+int
+main (int argc, char **argv)
+{
+  float32x2_t f32x2_input1 = vdup_n_f32 (-1.0);
+  float32x2_t f32x2_input2 = vdup_n_f32 (0.0);
+  float32x2_t f32x2_exp_minnm  = vdup_n_f32 (-1.0);
+  float32x2_t f32x2_exp_maxnm  = vdup_n_f32 (0.0);
+  float32x2_t f32x2_ret_minnm  = vminnm_f32 (f32x2_input1, f32x2_input2);
+  float32x2_t f32x2_ret_maxnm  = vmaxnm_f32 (f32x2_input1, f32x2_input2);
+  CHECK (uint32_t, 2, f32x2_ret_minnm, f32x2_exp_minnm);
+  CHECK (uint32_t, 2, f32x2_ret_maxnm, f32x2_exp_maxnm);
+  f32x2_input1 = vdup_n_f32 (__builtin_nanf (""));
+  f32x2_input2 = vdup_n_f32 (1.0);
+  f32x2_exp_minnm  = vdup_n_f32 (1.0);
+  f32x2_exp_maxnm  = vdup_n_f32 (1.0);
+  f32x2_ret_minnm  = vminnm_f32 (f32x2_input1, f32x2_input2);
+  f32x2_ret_maxnm  = vmaxnm_f32 (f32x2_input1, f32x2_input2);
+  CHECK (uint32_t, 2, f32x2_ret_minnm, f32x2_exp_minnm);
+  CHECK (uint32_t, 2, f32x2_ret_maxnm, f32x2_exp_maxnm);
+  float32x4_t f32x4_input1 = vdupq_n_f32 (-1024.0);
+  float32x4_t f32x4_input2 = vdupq_n_f32 (77.0);
+  float32x4_t f32x4_exp_minnm  = vdupq_n_f32 (-1024.0);
+  float32x4_t f32x4_exp_maxnm  = vdupq_n_f32 (77.0);
+  float32x4_t f32x4_ret_minnm  = vminnmq_f32 (f32x4_input1, f32x4_input2);
+  float32x4_t f32x4_ret_maxnm  = vmaxnmq_f32 (f32x4_input1, f32x4_input2);
+  CHECK (uint32_t, 4, f32x4_ret_minnm, f32x4_exp_minnm);
+  CHECK (uint32_t, 4, f32x4_ret_maxnm, f32x4_exp_maxnm);
+  f32x4_input1 = vdupq_n_f32 (-__builtin_nanf (""));
+  f32x4_input2 = vdupq_n_f32 (-1.0);
+  f32x4_exp_minnm  = vdupq_n_f32 (-1.0);
+  f32x4_exp_maxnm  = vdupq_n_f32 (-1.0);
+  f32x4_ret_minnm  = vminnmq_f32 (f32x4_input1, f32x4_input2);
+  f32x4_ret_maxnm  = vmaxnmq_f32 (f32x4_input1, f32x4_input2);
+  CHECK (uint32_t, 4, f32x4_ret_minnm, f32x4_exp_minnm);
+  CHECK (uint32_t, 4, f32x4_ret_maxnm, f32x4_exp_maxnm);
+  float64x2_t f64x2_input1 = vdupq_n_f64 (1.23);
+  float64x2_t f64x2_input2 = vdupq_n_f64 (4.56);
+  float64x2_t f64x2_exp_minnm  = vdupq_n_f64 (1.23);
+  float64x2_t f64x2_exp_maxnm  = vdupq_n_f64 (4.56);
+  float64x2_t f64x2_ret_minnm  = vminnmq_f64 (f64x2_input1, f64x2_input2);
+  float64x2_t f64x2_ret_maxnm  = vmaxnmq_f64 (f64x2_input1, f64x2_input2);
+  CHECK (uint64_t, 2, f64x2_ret_minnm, f64x2_exp_minnm);
+  CHECK (uint64_t, 2, f64x2_ret_maxnm, f64x2_exp_maxnm);
+  f64x2_input1 = vdupq_n_f64 (-__builtin_nan (""));
+  f64x2_input2 = vdupq_n_f64 (1.0);
+  f64x2_exp_minnm  = vdupq_n_f64 (1.0);
+  f64x2_exp_maxnm  = vdupq_n_f64 (1.0);
+  f64x2_ret_minnm  = vminnmq_f64 (f64x2_input1, f64x2_input2);
+  f64x2_ret_maxnm  = vmaxnmq_f64 (f64x2_input1, f64x2_input2);
+  CHECK (uint64_t, 2, f64x2_ret_minnm, f64x2_exp_minnm);
+  CHECK (uint64_t, 2, f64x2_ret_maxnm, f64x2_exp_maxnm);
+  return 0;
+}