Commit 98b3a5f2 by Jiong Wang Committed by Jiong Wang

[AArch64, 1/4] Add the missing support of vfms_n_f32, vfmsq_n_f32, vfmsq_n_f64

gcc/
	* config/aarch64/aarch64-simd.md (*aarch64_fma4_elt_to_128df): Rename
	to *aarch64_fma4_elt_from_dup<mode>.
	(*aarch64_fnma4_elt_to_128df): Rename to
	*aarch64_fnma4_elt_from_dup<mode>.
	* config/aarch64/arm_neon.h (vfma_n_f64): New.
	(vfms_n_f32): Likewise.
	(vfms_n_f64): Likewise.
	(vfmsq_n_f32): Likewise.
	(vfmsq_n_f64): Likewise.

gcc/testsuite/
	* gcc.target/aarch64/fmla_intrinsic_1.c: Allow ".d[index]" besides
	".2d[index]" when scan the assembly.
	* gcc.target/aarch64/fmls_intrinsic_1.c: Likewise.
	* gcc.target/aarch64/advsimd-intrinsics/arm-neon-ref.h: New entry for
	float64x1.
	* gcc.target/aarch64/advsimd-intrinsics/vfms_vfma_n.c: New.

From-SVN: r236331
parent 92237f43
2016-05-17 Jiong Wang <jiong.wang@arm.com>
* config/aarch64/aarch64-simd.md (*aarch64_fma4_elt_to_128df): Rename
to *aarch64_fma4_elt_from_dup<mode>.
(*aarch64_fnma4_elt_to_128df): Rename to
*aarch64_fnma4_elt_from_dup<mode>.
* config/aarch64/arm_neon.h (vfma_n_f64): New.
(vfms_n_f32): Likewise.
(vfms_n_f64): Likewise.
(vfmsq_n_f32): Likewise.
(vfmsq_n_f64): Likewise.
2016-05-17 Gerald Pfeifer <gerald@pfeifer.com>
* wide-int.h: Change fixed_wide_int_storage from class to struct.
......
......@@ -1579,16 +1579,16 @@
[(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
)
(define_insn "*aarch64_fma4_elt_to_128df"
[(set (match_operand:V2DF 0 "register_operand" "=w")
(fma:V2DF
(vec_duplicate:V2DF
(match_operand:DF 1 "register_operand" "w"))
(match_operand:V2DF 2 "register_operand" "w")
(match_operand:V2DF 3 "register_operand" "0")))]
(define_insn "*aarch64_fma4_elt_from_dup<mode>"
[(set (match_operand:VMUL 0 "register_operand" "=w")
(fma:VMUL
(vec_duplicate:VMUL
(match_operand:<VEL> 1 "register_operand" "w"))
(match_operand:VMUL 2 "register_operand" "w")
(match_operand:VMUL 3 "register_operand" "0")))]
"TARGET_SIMD"
"fmla\\t%0.2d, %2.2d, %1.2d[0]"
[(set_attr "type" "neon_fp_mla_d_scalar_q")]
"fmla\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]"
[(set_attr "type" "neon<fp>_mla_<Vetype>_scalar<q>")]
)
(define_insn "*aarch64_fma4_elt_to_64v2df"
......@@ -1656,17 +1656,17 @@
[(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
)
(define_insn "*aarch64_fnma4_elt_to_128df"
[(set (match_operand:V2DF 0 "register_operand" "=w")
(fma:V2DF
(neg:V2DF
(match_operand:V2DF 2 "register_operand" "w"))
(vec_duplicate:V2DF
(match_operand:DF 1 "register_operand" "w"))
(match_operand:V2DF 3 "register_operand" "0")))]
"TARGET_SIMD"
"fmls\\t%0.2d, %2.2d, %1.2d[0]"
[(set_attr "type" "neon_fp_mla_d_scalar_q")]
(define_insn "*aarch64_fnma4_elt_from_dup<mode>"
[(set (match_operand:VMUL 0 "register_operand" "=w")
(fma:VMUL
(neg:VMUL
(match_operand:VMUL 2 "register_operand" "w"))
(vec_duplicate:VMUL
(match_operand:<VEL> 1 "register_operand" "w"))
(match_operand:VMUL 3 "register_operand" "0")))]
"TARGET_SIMD"
"fmls\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]"
[(set_attr "type" "neon<fp>_mla_<Vetype>_scalar<q>")]
)
(define_insn "*aarch64_fnma4_elt_to_64v2df"
......
......@@ -14456,6 +14456,12 @@ vfma_n_f32 (float32x2_t __a, float32x2_t __b, float32_t __c)
return __builtin_aarch64_fmav2sf (__b, vdup_n_f32 (__c), __a);
}
__extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
vfma_n_f64 (float64x1_t __a, float64x1_t __b, float64_t __c)
{
return (float64x1_t) {__b[0] * __c + __a[0]};
}
__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
vfmaq_n_f32 (float32x4_t __a, float32x4_t __b, float32_t __c)
{
......@@ -14597,6 +14603,29 @@ vfmsq_f64 (float64x2_t __a, float64x2_t __b, float64x2_t __c)
return __builtin_aarch64_fmav2df (-__b, __c, __a);
}
__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
vfms_n_f32 (float32x2_t __a, float32x2_t __b, float32_t __c)
{
return __builtin_aarch64_fmav2sf (-__b, vdup_n_f32 (__c), __a);
}
__extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
vfms_n_f64 (float64x1_t __a, float64x1_t __b, float64_t __c)
{
return (float64x1_t) {-__b[0] * __c + __a[0]};
}
__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
vfmsq_n_f32 (float32x4_t __a, float32x4_t __b, float32_t __c)
{
return __builtin_aarch64_fmav4sf (-__b, vdupq_n_f32 (__c), __a);
}
__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
vfmsq_n_f64 (float64x2_t __a, float64x2_t __b, float64_t __c)
{
return __builtin_aarch64_fmav2df (-__b, vdupq_n_f64 (__c), __a);
}
/* vfms_lane */
......
2016-05-17 Jiong Wang <jiong.wang@arm.com>
* gcc.target/aarch64/fmla_intrinsic_1.c: Allow ".d[index]" besides
".2d[index]" when scan the assembly.
* gcc.target/aarch64/fmls_intrinsic_1.c: Likewise.
* gcc.target/aarch64/advsimd-intrinsics/arm-neon-ref.h: New entry for
float64x1.
* gcc.target/aarch64/advsimd-intrinsics/vfms_vfma_n.c: New.
2016-05-17 Richard Biener <rguenther@suse.de>
PR tree-optimization/71132
......
......@@ -136,6 +136,7 @@ static ARRAY(result, poly, 16, 4);
#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
static ARRAY(result, float, 16, 4);
#endif
static ARRAY(result, float, 64, 1);
static ARRAY(result, float, 32, 2);
static ARRAY(result, int, 8, 16);
static ARRAY(result, int, 16, 8);
......@@ -169,6 +170,7 @@ extern ARRAY(expected, poly, 8, 8);
extern ARRAY(expected, poly, 16, 4);
extern ARRAY(expected, hfloat, 16, 4);
extern ARRAY(expected, hfloat, 32, 2);
extern ARRAY(expected, hfloat, 64, 1);
extern ARRAY(expected, int, 8, 16);
extern ARRAY(expected, int, 16, 8);
extern ARRAY(expected, int, 32, 4);
......
......@@ -110,6 +110,6 @@ main (int argc, char **argv)
/* vfmaq_lane_f64.
vfma_laneq_f64.
vfmaq_laneq_f64. */
/* { dg-final { scan-assembler-times "fmla\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d, v\[0-9\]+\.2d\\\[\[0-9\]+\\\]" 3 } } */
/* { dg-final { scan-assembler-times "fmla\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d, v\[0-9\]+\.2?d\\\[\[0-9\]+\\\]" 3 } } */
......@@ -111,6 +111,6 @@ main (int argc, char **argv)
/* vfmsq_lane_f64.
vfms_laneq_f64.
vfmsq_laneq_f64. */
/* { dg-final { scan-assembler-times "fmls\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d, v\[0-9\]+\.2d\\\[\[0-9\]+\\\]" 3 } } */
/* { dg-final { scan-assembler-times "fmls\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d, v\[0-9\]+\.2?d\\\[\[0-9\]+\\\]" 3 } } */
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment