Commit 1efafef3 by Tamar Christina Committed by James Greenhalgh

[PATCH AArch64] Add more AArch64 NEON intrinsics

Add vmaxnm_f64, vminnm_f64, vmax_f64, vmin_f64.

Committed on behalf of Tamar Christina <tamar.christina@arm.com> .

gcc/

	* config/aarch64/aarch64-simd-builtins.def
	(__builtin_aarch64_fmindf): Change BUILTIN_VDQF to BUILTIN_VDQF_DF.
	(__builtin_aarch64_fmaxdf): Likewise.
	(__builtin_aarch64_smin_nandf): Likewise.
	(__builtin_aarch64_smax_nandf): Likewise.
	* config/aarch64/aarch64-simd.md (<fmaxmin><mode>3): Remove.
	* config/aarch64/aarch64.md (<fmaxmin><mode>3): Rename to...
	(<fmaxmin><mode>3): ...this.
	* config/aarch64/arm_neon.h (vmaxnm_f64): New.
	(vminnm_f64): Likewise.
	(vmin_f64): Likewise.
	(vmax_f64): Likewise.
	* config/aarch64/iterators.md (FMAXMIN): Merge with...
	(FMAXMIN_UNS): ...this.
	(fmaxmin): Merged with
	(fmaxmin_op): ...this...
	(maxmin_uns_op): ...in to this.

gcc/testsuite/

	* gcc.target/aarch64/vminmaxnm.c: New.
	* gcc.target/aarch64/simd/vminmaxnm_1.c (main): Added float64x1_t
	tests.

From-SVN: r238977
parent 0b953808
2016-08-02 Tamar Christina <tamar.christina@arm.com>
* config/aarch64/aarch64-simd-builtins.def
(__builtin_aarch64_fmindf): Change BUILTIN_VDQF to BUILTIN_VDQF_DF.
(__builtin_aarch64_fmaxdf): Likewise.
(__builtin_aarch64_smin_nandf): Likewise.
(__builtin_aarch64_smax_nandf): Likewise.
* config/aarch64/aarch64-simd.md (<fmaxmin><mode>3): Remove.
* config/aarch64/aarch64.md (<fmaxmin><mode>3): Rename to...
(<fmaxmin><mode>3): ...this.
* config/aarch64/arm_neon.h (vmaxnm_f64): New.
(vminnm_f64): Likewise.
(vmin_f64): Likewise.
(vmax_f64): Likewise.
* config/aarch64/iterators.md (FMAXMIN): Merge with...
(FMAXMIN_UNS): ...this.
(fmaxmin): Merged with
(fmaxmin_op): ...this...
(maxmin_uns_op): ...in to this.
2016-08-01 Michael Meissner <meissner@linux.vnet.ibm.com>
* config/rs6000/rs6000-c.c (altivec_resolve_overloaded_builtin):
......
......@@ -241,19 +241,19 @@
BUILTIN_VHSDF (UNOP, reduc_smax_nan_scal_, 10)
BUILTIN_VHSDF (UNOP, reduc_smin_nan_scal_, 10)
/* Implemented by <maxmin><mode>3.
/* Implemented by <maxmin_uns><mode>3.
smax variants map to fmaxnm,
smax_nan variants map to fmax. */
BUILTIN_VDQ_BHSI (BINOP, smax, 3)
BUILTIN_VDQ_BHSI (BINOP, smin, 3)
BUILTIN_VDQ_BHSI (BINOP, umax, 3)
BUILTIN_VDQ_BHSI (BINOP, umin, 3)
BUILTIN_VHSDF (BINOP, smax_nan, 3)
BUILTIN_VHSDF (BINOP, smin_nan, 3)
BUILTIN_VHSDF_DF (BINOP, smax_nan, 3)
BUILTIN_VHSDF_DF (BINOP, smin_nan, 3)
/* Implemented by <fmaxmin><mode>3. */
BUILTIN_VHSDF (BINOP, fmax, 3)
BUILTIN_VHSDF (BINOP, fmin, 3)
/* Implemented by <maxmin_uns><mode>3. */
BUILTIN_VHSDF_HSDF (BINOP, fmax, 3)
BUILTIN_VHSDF_HSDF (BINOP, fmin, 3)
/* Implemented by aarch64_<maxmin_uns>p<mode>. */
BUILTIN_VDQ_BHSI (BINOP, smaxp, 0)
......@@ -549,8 +549,4 @@
BUILTIN_GPI (UNOP, fix_truncdf, 2)
BUILTIN_GPI_I16 (UNOPUS, fixuns_trunchf, 2)
BUILTIN_GPI (UNOPUS, fixuns_truncsf, 2)
BUILTIN_GPI (UNOPUS, fixuns_truncdf, 2)
/* Implemented by <fmaxmin><mode>3. */
VAR1 (BINOP, fmax, 3, hf)
VAR1 (BINOP, fmin, 3, hf)
BUILTIN_GPI (UNOPUS, fixuns_truncdf, 2)
\ No newline at end of file
......@@ -2038,6 +2038,9 @@
[(set_attr "type" "neon_fp_minmax_<stype><q>")]
)
;; Vector forms for fmax, fmin, fmaxnm, fminnm.
;; fmaxnm and fminnm are used for the fmax<mode>3 standard pattern names,
;; which implement the IEEE fmax ()/fmin () functions.
(define_insn "<maxmin_uns><mode>3"
[(set (match_operand:VHSDF 0 "register_operand" "=w")
(unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
......@@ -2048,17 +2051,6 @@
[(set_attr "type" "neon_fp_minmax_<stype><q>")]
)
;; Auto-vectorized forms for the IEEE-754 fmax()/fmin() functions
(define_insn "<fmaxmin><mode>3"
[(set (match_operand:VHSDF 0 "register_operand" "=w")
(unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
(match_operand:VHSDF 2 "register_operand" "w")]
FMAXMIN))]
"TARGET_SIMD"
"<fmaxmin_op>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
[(set_attr "type" "neon_fp_minmax_<stype><q>")]
)
;; 'across lanes' add.
(define_expand "reduc_plus_scal_<mode>"
......
......@@ -4841,14 +4841,16 @@
[(set_attr "type" "f_minmax<s>")]
)
;; Scalar forms for the IEEE-754 fmax()/fmin() functions
(define_insn "<fmaxmin><mode>3"
;; Scalar forms for fmax, fmin, fmaxnm, fminnm.
;; fmaxnm and fminnm are used for the fmax<mode>3 standard pattern names,
;; which implement the IEEE fmax ()/fmin () functions.
(define_insn "<maxmin_uns><mode>3"
[(set (match_operand:GPF_F16 0 "register_operand" "=w")
(unspec:GPF_F16 [(match_operand:GPF_F16 1 "register_operand" "w")
(match_operand:GPF_F16 2 "register_operand" "w")]
FMAXMIN))]
FMAXMIN_UNS))]
"TARGET_FLOAT"
"<fmaxmin_op>\\t%<s>0, %<s>1, %<s>2"
"<maxmin_uns_op>\\t%<s>0, %<s>1, %<s>2"
[(set_attr "type" "f_minmax<stype>")]
)
......
......@@ -17201,6 +17201,14 @@ vmax_f32 (float32x2_t __a, float32x2_t __b)
return __builtin_aarch64_smax_nanv2sf (__a, __b);
}
__extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
vmax_f64 (float64x1_t __a, float64x1_t __b)
{
return (float64x1_t)
{ __builtin_aarch64_smax_nandf (vget_lane_f64 (__a, 0),
vget_lane_f64 (__b, 0)) };
}
__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
vmax_s8 (int8x8_t __a, int8x8_t __b)
{
......@@ -17692,6 +17700,14 @@ vmaxnm_f32 (float32x2_t __a, float32x2_t __b)
return __builtin_aarch64_fmaxv2sf (__a, __b);
}
__extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
vmaxnm_f64 (float64x1_t __a, float64x1_t __b)
{
return (float64x1_t)
{ __builtin_aarch64_fmaxdf (vget_lane_f64 (__a, 0),
vget_lane_f64 (__b, 0)) };
}
__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
vmaxnmq_f32 (float32x4_t __a, float32x4_t __b)
{
......@@ -17824,6 +17840,14 @@ vmin_f32 (float32x2_t __a, float32x2_t __b)
return __builtin_aarch64_smin_nanv2sf (__a, __b);
}
__extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
vmin_f64 (float64x1_t __a, float64x1_t __b)
{
return (float64x1_t)
{ __builtin_aarch64_smin_nandf (vget_lane_f64 (__a, 0),
vget_lane_f64 (__b, 0)) };
}
__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
vmin_s8 (int8x8_t __a, int8x8_t __b)
{
......@@ -17922,6 +17946,14 @@ vminnm_f32 (float32x2_t __a, float32x2_t __b)
return __builtin_aarch64_fminv2sf (__a, __b);
}
__extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
vminnm_f64 (float64x1_t __a, float64x1_t __b)
{
return (float64x1_t)
{ __builtin_aarch64_fmind (vget_lane_f64 (__a, 0),
vget_lane_f64 (__b, 0)) };
}
__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
vminnmq_f32 (float32x4_t __a, float32x4_t __b)
{
......
......@@ -1016,9 +1016,8 @@
(define_int_iterator ADDSUBHN2 [UNSPEC_ADDHN2 UNSPEC_RADDHN2
UNSPEC_SUBHN2 UNSPEC_RSUBHN2])
(define_int_iterator FMAXMIN_UNS [UNSPEC_FMAX UNSPEC_FMIN])
(define_int_iterator FMAXMIN [UNSPEC_FMAXNM UNSPEC_FMINNM])
(define_int_iterator FMAXMIN_UNS [UNSPEC_FMAX UNSPEC_FMIN
UNSPEC_FMAXNM UNSPEC_FMINNM])
(define_int_iterator VQDMULH [UNSPEC_SQDMULH UNSPEC_SQRDMULH])
......@@ -1102,7 +1101,9 @@
(UNSPEC_FMAXV "smax_nan")
(UNSPEC_FMIN "smin_nan")
(UNSPEC_FMINNMV "smin")
(UNSPEC_FMINV "smin_nan")])
(UNSPEC_FMINV "smin_nan")
(UNSPEC_FMAXNM "fmax")
(UNSPEC_FMINNM "fmin")])
(define_int_attr maxmin_uns_op [(UNSPEC_UMAXV "umax")
(UNSPEC_UMINV "umin")
......@@ -1113,13 +1114,9 @@
(UNSPEC_FMAXV "fmax")
(UNSPEC_FMIN "fmin")
(UNSPEC_FMINNMV "fminnm")
(UNSPEC_FMINV "fmin")])
(define_int_attr fmaxmin [(UNSPEC_FMAXNM "fmax")
(UNSPEC_FMINNM "fmin")])
(define_int_attr fmaxmin_op [(UNSPEC_FMAXNM "fmaxnm")
(UNSPEC_FMINNM "fminnm")])
(UNSPEC_FMINV "fmin")
(UNSPEC_FMAXNM "fmaxnm")
(UNSPEC_FMINNM "fminnm")])
(define_int_attr sur [(UNSPEC_SHADD "s") (UNSPEC_UHADD "u")
(UNSPEC_SRHADD "sr") (UNSPEC_URHADD "ur")
......
2016-08-02 Tamar Christina <tamar.christina@arm.com>
* gcc.target/aarch64/vminmaxnm.c: New.
* gcc.target/aarch64/simd/vminmaxnm_1.c (main): Add float64x1_t
tests.
2016-08-01 Michael Meissner <meissner@linux.vnet.ibm.com>
* gcc.target/powerpc/vec-extract-5.c: New tests to test
......
/* Test the `v[min|max]nm{q}_f*' AArch64 SIMD intrinsic. */
/* Test the `v[min|max]{nm}{q}_f*' AArch64 SIMD intrinsic. */
/* { dg-do run } */
/* { dg-options "-O2" } */
......@@ -18,6 +18,7 @@ extern void abort ();
int
main (int argc, char **argv)
{
/* v{min|max}nm_f32 normal. */
float32x2_t f32x2_input1 = vdup_n_f32 (-1.0);
float32x2_t f32x2_input2 = vdup_n_f32 (0.0);
float32x2_t f32x2_exp_minnm = vdup_n_f32 (-1.0);
......@@ -28,6 +29,7 @@ main (int argc, char **argv)
CHECK (uint32_t, 2, f32x2_ret_minnm, f32x2_exp_minnm);
CHECK (uint32_t, 2, f32x2_ret_maxnm, f32x2_exp_maxnm);
/* v{min|max}nm_f32 NaN. */
f32x2_input1 = vdup_n_f32 (__builtin_nanf (""));
f32x2_input2 = vdup_n_f32 (1.0);
f32x2_exp_minnm = vdup_n_f32 (1.0);
......@@ -38,6 +40,7 @@ main (int argc, char **argv)
CHECK (uint32_t, 2, f32x2_ret_minnm, f32x2_exp_minnm);
CHECK (uint32_t, 2, f32x2_ret_maxnm, f32x2_exp_maxnm);
/* v{min|max}nmq_f32 normal. */
float32x4_t f32x4_input1 = vdupq_n_f32 (-1024.0);
float32x4_t f32x4_input2 = vdupq_n_f32 (77.0);
float32x4_t f32x4_exp_minnm = vdupq_n_f32 (-1024.0);
......@@ -48,6 +51,7 @@ main (int argc, char **argv)
CHECK (uint32_t, 4, f32x4_ret_minnm, f32x4_exp_minnm);
CHECK (uint32_t, 4, f32x4_ret_maxnm, f32x4_exp_maxnm);
/* v{min|max}nmq_f32 NaN. */
f32x4_input1 = vdupq_n_f32 (-__builtin_nanf (""));
f32x4_input2 = vdupq_n_f32 (-1.0);
f32x4_exp_minnm = vdupq_n_f32 (-1.0);
......@@ -58,16 +62,57 @@ main (int argc, char **argv)
CHECK (uint32_t, 4, f32x4_ret_minnm, f32x4_exp_minnm);
CHECK (uint32_t, 4, f32x4_ret_maxnm, f32x4_exp_maxnm);
/* v{min|max}nm_f64 normal. */
float64x1_t f64x1_input1 = vdup_n_f64 (1.23);
float64x1_t f64x1_input2 = vdup_n_f64 (4.56);
float64x1_t f64x1_exp_minnm = vdup_n_f64 (1.23);
float64x1_t f64x1_exp_maxnm = vdup_n_f64 (4.56);
float64x1_t f64x1_ret_minnm = vminnm_f64 (f64x1_input1, f64x1_input2);
float64x1_t f64x1_ret_maxnm = vmaxnm_f64 (f64x1_input1, f64x1_input2);
CHECK (uint64_t, 1, f64x1_ret_minnm, f64x1_exp_minnm);
CHECK (uint64_t, 1, f64x1_ret_maxnm, f64x1_exp_maxnm);
/* v{min|max}_f64 normal. */
float64x1_t f64x1_exp_min = vdup_n_f64 (1.23);
float64x1_t f64x1_exp_max = vdup_n_f64 (4.56);
float64x1_t f64x1_ret_min = vmin_f64 (f64x1_input1, f64x1_input2);
float64x1_t f64x1_ret_max = vmax_f64 (f64x1_input1, f64x1_input2);
CHECK (uint64_t, 1, f64x1_ret_min, f64x1_exp_min);
CHECK (uint64_t, 1, f64x1_ret_max, f64x1_exp_max);
/* v{min|max}nmq_f64 normal. */
float64x2_t f64x2_input1 = vdupq_n_f64 (1.23);
float64x2_t f64x2_input2 = vdupq_n_f64 (4.56);
float64x2_t f64x2_exp_minnm = vdupq_n_f64 (1.23);
float64x2_t f64x2_exp_maxnm = vdupq_n_f64 (4.56);
float64x2_t f64x2_ret_minnm = vminnmq_f64 (f64x2_input1, f64x2_input2);
float64x2_t f64x2_ret_maxnm = vmaxnmq_f64 (f64x2_input1, f64x2_input2);
CHECK (uint64_t, 2, f64x2_ret_minnm, f64x2_exp_minnm);
CHECK (uint64_t, 2, f64x2_ret_maxnm, f64x2_exp_maxnm);
/* v{min|max}nm_f64 NaN. */
f64x1_input1 = vdup_n_f64 (-__builtin_nanf (""));
f64x1_input2 = vdup_n_f64 (1.0);
f64x1_exp_minnm = vdup_n_f64 (1.0);
f64x1_exp_maxnm = vdup_n_f64 (1.0);
f64x1_ret_minnm = vminnm_f64 (f64x1_input1, f64x1_input2);
f64x1_ret_maxnm = vmaxnm_f64 (f64x1_input1, f64x1_input2);
CHECK (uint64_t, 1, f64x1_ret_minnm, f64x1_exp_minnm);
CHECK (uint64_t, 1, f64x1_ret_maxnm, f64x1_exp_maxnm);
/* v{min|max}_f64 NaN. */
f64x1_input1 = vdup_n_f64 (-__builtin_nanf (""));
f64x1_input2 = vdup_n_f64 (1.0);
f64x1_exp_minnm = vdup_n_f64 (-__builtin_nanf (""));
f64x1_exp_maxnm = vdup_n_f64 (-__builtin_nanf (""));
f64x1_ret_minnm = vmin_f64 (f64x1_input1, f64x1_input2);
f64x1_ret_maxnm = vmax_f64 (f64x1_input1, f64x1_input2);
CHECK (uint64_t, 1, f64x1_ret_minnm, f64x1_exp_minnm);
CHECK (uint64_t, 1, f64x1_ret_maxnm, f64x1_exp_maxnm);
/* v{min|max}nmq_f64 NaN. */
f64x2_input1 = vdupq_n_f64 (-__builtin_nan (""));
f64x2_input2 = vdupq_n_f64 (1.0);
f64x2_exp_minnm = vdupq_n_f64 (1.0);
......
/* { dg-do compile } */
/* { dg-options "-O2" } */
#include "arm_neon.h"
/* For each of these intrinsics, we map directly to an unspec in RTL.
We're just using the argument directly and returning the result, so we
can precisely specify the exact instruction pattern and register
allocations we expect. */
float64x1_t
test_vmaxnm_f64 (float64x1_t a, float64x1_t b)
{
/* { dg-final { scan-assembler-times "fmaxnm\td0, d0, d1" 1 } } */
return vmaxnm_f64 (a, b);
}
float64x1_t
test_vminnm_f64 (float64x1_t a, float64x1_t b)
{
/* { dg-final { scan-assembler-times "fminnm\td0, d0, d1" 1 } } */
return vminnm_f64 (a, b);
}
float64x1_t
test_vmax_f64 (float64x1_t a, float64x1_t b)
{
/* { dg-final { scan-assembler-times "fmax\td0, d0, d1" 1 } } */
return vmax_f64 (a, b);
}
float64x1_t
test_vmin_f64 (float64x1_t a, float64x1_t b)
{
/* { dg-final { scan-assembler-times "fmin\td0, d0, d1" 1 } } */
return vmin_f64 (a, b);
}
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment