Arm: Update Armv8.4-a's FP16 FML intrinsics

This patch updates the Armv8.4-a FP16 FML intrinsics's suffixes from u32 to f16 to be more consistent with the naming convention for intrinsics. The specifications for these intrinsics have not been published yet so we do not need to maintain the old names. The patch was created with the following script: grep -lIE "(vfml[as].+)_u32" -r gcc/ | grep -iEv ".+Changelog.*" \ | xargs sed -i -E -e "s/(vfml[as].+)_u32/\1_f16/g" gcc/ChangeLog: * config/arm/arm_neon.h (vfmlal_low_u32, vfmlsl_low_u32, vfmlal_high_u32, vfmlsl_high_u32, vfmlalq_low_u32, vfmlslq_low_u32, vfmlalq_high_u32, vfmlslq_high_u32, vfmlal_lane_low_u32, vfmlal_lane_high_u32, vfmlalq_laneq_low_u32, vfmlalq_lane_low_u32, vfmlal_laneq_low_u32, vfmlalq_laneq_high_u32, vfmlalq_lane_high_u32, vfmlal_laneq_high_u32, vfmlsl_lane_low_u32, vfmlsl_lane_high_u32, vfmlslq_laneq_low_u32, vfmlslq_lane_low_u32, vfmlsl_laneq_low_u32, vfmlslq_laneq_high_u32, vfmlslq_lane_high_u32, vfmlsl_laneq_high_u32): Rename ... (vfmlal_low_f16, vfmlsl_low_f16, vfmlal_high_f16, vfmlsl_high_f16, vfmlalq_low_f16, vfmlslq_low_f16, vfmlalq_high_f16, vfmlslq_high_f16, vfmlal_lane_low_f16, vfmlal_lane_high_f16, vfmlalq_laneq_low_f16, vfmlalq_lane_low_f16, vfmlal_laneq_low_f16, vfmlalq_laneq_high_f16, vfmlalq_lane_high_f16, vfmlal_laneq_high_f16, vfmlsl_lane_low_f16, vfmlsl_lane_high_f16, vfmlslq_laneq_low_f16, vfmlslq_lane_low_f16, vfmlsl_laneq_low_f16, vfmlslq_laneq_high_f16, vfmlslq_lane_high_f16, vfmlsl_laneq_high_f16): ... To this. * config/arm/neon.md: Update comments. gcc/testsuite/ChangeLog: * gcc.target/arm/simd/fp16fml_high.c (test_vfmlal_high_u32, test_vfmlalq_high_u32, test_vfmlsl_high_u32, test_vfmlslq_high_u32): Rename .... (test_vfmlal_high_f16, test_vfmlalq_high_f16, test_vfmlsl_high_f16, test_vfmlslq_high_f16): ... To this. * gcc.target/arm/simd/fp16fml_lane_high.c (test_vfmlal_lane_high_u32, tets_vfmlsl_lane_high_u32, test_vfmlal_laneq_high_u32, test_vfmlsl_laneq_high_u32, test_vfmlalq_lane_high_u32, test_vfmlslq_lane_high_u32, test_vfmlalq_laneq_high_u32, test_vfmlslq_laneq_high_u32): Rename ... (test_vfmlal_lane_high_f16, tets_vfmlsl_lane_high_f16, test_vfmlal_laneq_high_f16, test_vfmlsl_laneq_high_f16, test_vfmlalq_lane_high_f16, test_vfmlslq_lane_high_f16, test_vfmlalq_laneq_high_f16, test_vfmlslq_laneq_high_f16): ... To this. * gcc.target/arm/simd/fp16fml_lane_low.c (test_vfmlal_lane_low_u32, test_vfmlsl_lane_low_u32, test_vfmlal_laneq_low_u32, test_vfmlsl_laneq_low_u32, test_vfmlalq_lane_low_u32, test_vfmlslq_lane_low_u32, test_vfmlalq_laneq_low_u32, test_vfmlslq_laneq_low_u32): Rename ... (test_vfmlal_lane_low_f16, test_vfmlsl_lane_low_f16, test_vfmlal_laneq_low_f16, test_vfmlsl_laneq_low_f16, test_vfmlalq_lane_low_f16, test_vfmlslq_lane_low_f16, test_vfmlalq_laneq_low_f16, test_vfmlslq_laneq_low_f16): ... To this. * gcc.target/arm/simd/fp16fml_low.c (test_vfmlal_low_u32, test_vfmlalq_low_u32, test_vfmlsl_low_u32, test_vfmlslq_low_u32): Rename ... (test_vfmlal_low_f16, test_vfmlalq_low_f16, test_vfmlsl_low_f16, test_vfmlslq_low_f16): ... To this. From-SVN: r269192

Arm: Update Armv8.4-a's FP16 FML intrinsics
This patch updates the Armv8.4-a FP16 FML intrinsics's suffixes from u32 to f16 to be more consistent with the naming convention for intrinsics. The specifications for these intrinsics have not been published yet so we do not need to maintain the old names. The patch was created with the following script: grep -lIE "(vfml[as].+)_u32" -r gcc/ | grep -iEv ".+Changelog.*" \ | xargs sed -i -E -e "s/(vfml[as].+)_u32/\1_f16/g" gcc/ChangeLog: * config/arm/arm_neon.h (vfmlal_low_u32, vfmlsl_low_u32, vfmlal_high_u32, vfmlsl_high_u32, vfmlalq_low_u32, vfmlslq_low_u32, vfmlalq_high_u32, vfmlslq_high_u32, vfmlal_lane_low_u32, vfmlal_lane_high_u32, vfmlalq_laneq_low_u32, vfmlalq_lane_low_u32, vfmlal_laneq_low_u32, vfmlalq_laneq_high_u32, vfmlalq_lane_high_u32, vfmlal_laneq_high_u32, vfmlsl_lane_low_u32, vfmlsl_lane_high_u32, vfmlslq_laneq_low_u32, vfmlslq_lane_low_u32, vfmlsl_laneq_low_u32, vfmlslq_laneq_high_u32, vfmlslq_lane_high_u32, vfmlsl_laneq_high_u32): Rename ... (vfmlal_low_f16, vfmlsl_low_f16, vfmlal_high_f16, vfmlsl_high_f16, vfmlalq_low_f16, vfmlslq_low_f16, vfmlalq_high_f16, vfmlslq_high_f16, vfmlal_lane_low_f16, vfmlal_lane_high_f16, vfmlalq_laneq_low_f16, vfmlalq_lane_low_f16, vfmlal_laneq_low_f16, vfmlalq_laneq_high_f16, vfmlalq_lane_high_f16, vfmlal_laneq_high_f16, vfmlsl_lane_low_f16, vfmlsl_lane_high_f16, vfmlslq_laneq_low_f16, vfmlslq_lane_low_f16, vfmlsl_laneq_low_f16, vfmlslq_laneq_high_f16, vfmlslq_lane_high_f16, vfmlsl_laneq_high_f16): ... To this. * config/arm/neon.md: Update comments. gcc/testsuite/ChangeLog: * gcc.target/arm/simd/fp16fml_high.c (test_vfmlal_high_u32, test_vfmlalq_high_u32, test_vfmlsl_high_u32, test_vfmlslq_high_u32): Rename .... (test_vfmlal_high_f16, test_vfmlalq_high_f16, test_vfmlsl_high_f16, test_vfmlslq_high_f16): ... To this. * gcc.target/arm/simd/fp16fml_lane_high.c (test_vfmlal_lane_high_u32, tets_vfmlsl_lane_high_u32, test_vfmlal_laneq_high_u32, test_vfmlsl_laneq_high_u32, test_vfmlalq_lane_high_u32, test_vfmlslq_lane_high_u32, test_vfmlalq_laneq_high_u32, test_vfmlslq_laneq_high_u32): Rename ... (test_vfmlal_lane_high_f16, tets_vfmlsl_lane_high_f16, test_vfmlal_laneq_high_f16, test_vfmlsl_laneq_high_f16, test_vfmlalq_lane_high_f16, test_vfmlslq_lane_high_f16, test_vfmlalq_laneq_high_f16, test_vfmlslq_laneq_high_f16): ... To this. * gcc.target/arm/simd/fp16fml_lane_low.c (test_vfmlal_lane_low_u32, test_vfmlsl_lane_low_u32, test_vfmlal_laneq_low_u32, test_vfmlsl_laneq_low_u32, test_vfmlalq_lane_low_u32, test_vfmlslq_lane_low_u32, test_vfmlalq_laneq_low_u32, test_vfmlslq_laneq_low_u32): Rename ... (test_vfmlal_lane_low_f16, test_vfmlsl_lane_low_f16, test_vfmlal_laneq_low_f16, test_vfmlsl_laneq_low_f16, test_vfmlalq_lane_low_f16, test_vfmlslq_lane_low_f16, test_vfmlalq_laneq_low_f16, test_vfmlslq_laneq_low_f16): ... To this. * gcc.target/arm/simd/fp16fml_low.c (test_vfmlal_low_u32, test_vfmlalq_low_u32, test_vfmlsl_low_u32, test_vfmlslq_low_u32): Rename ... (test_vfmlal_low_f16, test_vfmlalq_low_f16, test_vfmlsl_low_f16, test_vfmlslq_low_f16): ... To this. From-SVN: r269192
99cf78cf · Tamar Christina · Tamar Christina · 9d04c986 · 99cf78cf · 99cf78cf
Commit 99cf78cf authored Feb 25, 2019 by Tamar Christina Committed by Tamar Christina Feb 25, 2019
8 changed files
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
 2019-02-25  Tamar Christina  <tamar.christina@arm.com>

+	* config/arm/arm_neon.h (vfmlal_low_u32, vfmlsl_low_u32,
+	vfmlal_high_u32, vfmlsl_high_u32, vfmlalq_low_u32, vfmlslq_low_u32,
+	vfmlalq_high_u32, vfmlslq_high_u32, vfmlal_lane_low_u32,
+	vfmlal_lane_high_u32, vfmlalq_laneq_low_u32, vfmlalq_lane_low_u32,
+	vfmlal_laneq_low_u32, vfmlalq_laneq_high_u32, vfmlalq_lane_high_u32,
+	vfmlal_laneq_high_u32, vfmlsl_lane_low_u32, vfmlsl_lane_high_u32,
+	vfmlslq_laneq_low_u32, vfmlslq_lane_low_u32, vfmlsl_laneq_low_u32,
+	vfmlslq_laneq_high_u32, vfmlslq_lane_high_u32, vfmlsl_laneq_high_u32):
+	Rename ...
+	(vfmlal_low_f16, vfmlsl_low_f16, vfmlal_high_f16, vfmlsl_high_f16,
+	vfmlalq_low_f16, vfmlslq_low_f16, vfmlalq_high_f16, vfmlslq_high_f16,
+	vfmlal_lane_low_f16, vfmlal_lane_high_f16, vfmlalq_laneq_low_f16,
+	vfmlalq_lane_low_f16, vfmlal_laneq_low_f16, vfmlalq_laneq_high_f16,
+	vfmlalq_lane_high_f16, vfmlal_laneq_high_f16, vfmlsl_lane_low_f16,
+	vfmlsl_lane_high_f16, vfmlslq_laneq_low_f16, vfmlslq_lane_low_f16,
+	vfmlsl_laneq_low_f16, vfmlslq_laneq_high_f16, vfmlslq_lane_high_f16,
+	vfmlsl_laneq_high_f16): ... To this.
+	* config/arm/neon.md: Update comments.
+
+2019-02-25  Tamar Christina  <tamar.christina@arm.com>
+
 	* config/aarch64/arm_neon.h (vfmlal_low_u32, vfmlsl_low_u32,
 	vfmlalq_low_u32, vfmlslq_low_u32, vfmlal_high_u32, vfmlsl_high_u32,
 	vfmlalq_high_u32, vfmlslq_high_u32, vfmlal_lane_low_u32,

--- a/gcc/config/arm/arm_neon.h
+++ b/gcc/config/arm/arm_neon.h
@@ -18106,63 +18106,63 @@ vdotq_lane_s32 (int32x4_t __r, int8x16_t __a, int8x8_t __b, const int __index)

 __extension__ extern __inline float32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vfmlal_low_u32 (float32x2_t __r, float16x4_t __a, float16x4_t __b)
+vfmlal_low_f16 (float32x2_t __r, float16x4_t __a, float16x4_t __b)
 {
  return __builtin_neon_vfmal_lowv2sf (__r, __a, __b);
 }

 __extension__ extern __inline float32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vfmlsl_low_u32 (float32x2_t __r, float16x4_t __a, float16x4_t __b)
+vfmlsl_low_f16 (float32x2_t __r, float16x4_t __a, float16x4_t __b)
 {
  return __builtin_neon_vfmsl_lowv2sf (__r, __a, __b);
 }

 __extension__ extern __inline float32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vfmlal_high_u32 (float32x2_t __r, float16x4_t __a, float16x4_t __b)
+vfmlal_high_f16 (float32x2_t __r, float16x4_t __a, float16x4_t __b)
 {
  return __builtin_neon_vfmal_highv2sf (__r, __a, __b);
 }

 __extension__ extern __inline float32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vfmlsl_high_u32 (float32x2_t __r, float16x4_t __a, float16x4_t __b)
+vfmlsl_high_f16 (float32x2_t __r, float16x4_t __a, float16x4_t __b)
 {
  return __builtin_neon_vfmsl_highv2sf (__r, __a, __b);
 }

 __extension__ extern __inline float32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vfmlalq_low_u32 (float32x4_t __r, float16x8_t __a, float16x8_t __b)
+vfmlalq_low_f16 (float32x4_t __r, float16x8_t __a, float16x8_t __b)
 {
  return __builtin_neon_vfmal_lowv4sf (__r, __a, __b);
 }

 __extension__ extern __inline float32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vfmlslq_low_u32 (float32x4_t __r, float16x8_t __a, float16x8_t __b)
+vfmlslq_low_f16 (float32x4_t __r, float16x8_t __a, float16x8_t __b)
 {
  return __builtin_neon_vfmsl_lowv4sf (__r, __a, __b);
 }

 __extension__ extern __inline float32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vfmlalq_high_u32 (float32x4_t __r, float16x8_t __a, float16x8_t __b)
+vfmlalq_high_f16 (float32x4_t __r, float16x8_t __a, float16x8_t __b)
 {
  return __builtin_neon_vfmal_highv4sf (__r, __a, __b);
 }

 __extension__ extern __inline float32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vfmlslq_high_u32 (float32x4_t __r, float16x8_t __a, float16x8_t __b)
+vfmlslq_high_f16 (float32x4_t __r, float16x8_t __a, float16x8_t __b)
 {
  return __builtin_neon_vfmsl_highv4sf (__r, __a, __b);
 }

 __extension__ extern __inline float32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vfmlal_lane_low_u32 (float32x2_t __r, float16x4_t __a, float16x4_t __b,
+vfmlal_lane_low_f16 (float32x2_t __r, float16x4_t __a, float16x4_t __b,
 		     const int __index)
 {
  __builtin_arm_lane_check (4, __index);
@@ -18171,7 +18171,7 @@ vfmlal_lane_low_u32 (float32x2_t __r, float16x4_t __a, float16x4_t __b,

 __extension__ extern __inline float32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vfmlal_lane_high_u32 (float32x2_t __r, float16x4_t __a, float16x4_t __b,
+vfmlal_lane_high_f16 (float32x2_t __r, float16x4_t __a, float16x4_t __b,
 		      const int __index)
 {
  __builtin_arm_lane_check (4, __index);
@@ -18180,7 +18180,7 @@ vfmlal_lane_high_u32 (float32x2_t __r, float16x4_t __a, float16x4_t __b,

 __extension__ extern __inline float32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vfmlalq_laneq_low_u32 (float32x4_t __r, float16x8_t __a, float16x8_t __b,
+vfmlalq_laneq_low_f16 (float32x4_t __r, float16x8_t __a, float16x8_t __b,
 		       const int __index)
 {
  __builtin_arm_lane_check (8, __index);
@@ -18189,7 +18189,7 @@ vfmlalq_laneq_low_u32 (float32x4_t __r, float16x8_t __a, float16x8_t __b,

 __extension__ extern __inline float32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vfmlalq_lane_low_u32 (float32x4_t __r, float16x8_t __a, float16x4_t __b,
+vfmlalq_lane_low_f16 (float32x4_t __r, float16x8_t __a, float16x4_t __b,
 		       const int __index)
 {
  __builtin_arm_lane_check (4, __index);
@@ -18198,7 +18198,7 @@ vfmlalq_lane_low_u32 (float32x4_t __r, float16x8_t __a, float16x4_t __b,

 __extension__ extern __inline float32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vfmlal_laneq_low_u32 (float32x2_t __r, float16x4_t __a, float16x8_t __b,
+vfmlal_laneq_low_f16 (float32x2_t __r, float16x4_t __a, float16x8_t __b,
 		       const int __index)
 {
  __builtin_arm_lane_check (8, __index);
@@ -18207,7 +18207,7 @@ vfmlal_laneq_low_u32 (float32x2_t __r, float16x4_t __a, float16x8_t __b,

 __extension__ extern __inline float32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vfmlalq_laneq_high_u32 (float32x4_t __r, float16x8_t __a, float16x8_t __b,
+vfmlalq_laneq_high_f16 (float32x4_t __r, float16x8_t __a, float16x8_t __b,
 			const int __index)
 {
  __builtin_arm_lane_check (8, __index);
@@ -18216,7 +18216,7 @@ vfmlalq_laneq_high_u32 (float32x4_t __r, float16x8_t __a, float16x8_t __b,

 __extension__ extern __inline float32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vfmlalq_lane_high_u32 (float32x4_t __r, float16x8_t __a, float16x4_t __b,
+vfmlalq_lane_high_f16 (float32x4_t __r, float16x8_t __a, float16x4_t __b,
 		       const int __index)
 {
  __builtin_arm_lane_check (4, __index);
@@ -18225,7 +18225,7 @@ vfmlalq_lane_high_u32 (float32x4_t __r, float16x8_t __a, float16x4_t __b,

 __extension__ extern __inline float32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vfmlal_laneq_high_u32 (float32x2_t __r, float16x4_t __a, float16x8_t __b,
+vfmlal_laneq_high_f16 (float32x2_t __r, float16x4_t __a, float16x8_t __b,
 		       const int __index)
 {
  __builtin_arm_lane_check (8, __index);
@@ -18234,7 +18234,7 @@ vfmlal_laneq_high_u32 (float32x2_t __r, float16x4_t __a, float16x8_t __b,

 __extension__ extern __inline float32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vfmlsl_lane_low_u32 (float32x2_t __r, float16x4_t __a, float16x4_t __b,
+vfmlsl_lane_low_f16 (float32x2_t __r, float16x4_t __a, float16x4_t __b,
 		     const int __index)
 {
  __builtin_arm_lane_check (4, __index);
@@ -18243,7 +18243,7 @@ vfmlsl_lane_low_u32 (float32x2_t __r, float16x4_t __a, float16x4_t __b,

 __extension__ extern __inline float32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vfmlsl_lane_high_u32 (float32x2_t __r, float16x4_t __a, float16x4_t __b,
+vfmlsl_lane_high_f16 (float32x2_t __r, float16x4_t __a, float16x4_t __b,
 		      const int __index)
 {
  __builtin_arm_lane_check (4, __index);
@@ -18252,7 +18252,7 @@ vfmlsl_lane_high_u32 (float32x2_t __r, float16x4_t __a, float16x4_t __b,

 __extension__ extern __inline float32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vfmlslq_laneq_low_u32 (float32x4_t __r, float16x8_t __a, float16x8_t __b,
+vfmlslq_laneq_low_f16 (float32x4_t __r, float16x8_t __a, float16x8_t __b,
 		       const int __index)
 {
  __builtin_arm_lane_check (8, __index);
@@ -18261,7 +18261,7 @@ vfmlslq_laneq_low_u32 (float32x4_t __r, float16x8_t __a, float16x8_t __b,

 __extension__ extern __inline float32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vfmlslq_lane_low_u32 (float32x4_t __r, float16x8_t __a, float16x4_t __b,
+vfmlslq_lane_low_f16 (float32x4_t __r, float16x8_t __a, float16x4_t __b,
 		       const int __index)
 {
  __builtin_arm_lane_check (4, __index);
@@ -18270,7 +18270,7 @@ vfmlslq_lane_low_u32 (float32x4_t __r, float16x8_t __a, float16x4_t __b,

 __extension__ extern __inline float32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vfmlsl_laneq_low_u32 (float32x2_t __r, float16x4_t __a, float16x8_t __b,
+vfmlsl_laneq_low_f16 (float32x2_t __r, float16x4_t __a, float16x8_t __b,
 		       const int __index)
 {
  __builtin_arm_lane_check (8, __index);
@@ -18279,7 +18279,7 @@ vfmlsl_laneq_low_u32 (float32x2_t __r, float16x4_t __a, float16x8_t __b,

 __extension__ extern __inline float32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vfmlslq_laneq_high_u32 (float32x4_t __r, float16x8_t __a, float16x8_t __b,
+vfmlslq_laneq_high_f16 (float32x4_t __r, float16x8_t __a, float16x8_t __b,
 			const int __index)
 {
  __builtin_arm_lane_check (8, __index);
@@ -18288,7 +18288,7 @@ vfmlslq_laneq_high_u32 (float32x4_t __r, float16x8_t __a, float16x8_t __b,

 __extension__ extern __inline float32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vfmlslq_lane_high_u32 (float32x4_t __r, float16x8_t __a, float16x4_t __b,
+vfmlslq_lane_high_f16 (float32x4_t __r, float16x8_t __a, float16x4_t __b,
 		       const int __index)
 {
  __builtin_arm_lane_check (4, __index);
@@ -18297,7 +18297,7 @@ vfmlslq_lane_high_u32 (float32x4_t __r, float16x8_t __a, float16x4_t __b,

 __extension__ extern __inline float32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vfmlsl_laneq_high_u32 (float32x2_t __r, float16x4_t __a, float16x8_t __b,
+vfmlsl_laneq_high_f16 (float32x2_t __r, float16x4_t __a, float16x8_t __b,
 		       const int __index)
 {
  __builtin_arm_lane_check (8, __index);

--- a/gcc/config/arm/neon.md
+++ b/gcc/config/arm/neon.md
@@ -2473,8 +2473,8 @@
 })

 ;; Used to implement the intrinsics:
-;; float32x4_t vfmlalq_lane_low_u32 (float32x4_t r, float16x8_t a, float16x4_t b, const int lane)
-;; float32x2_t vfmlal_laneq_low_u32 (float32x2_t r, float16x4_t a, float16x8_t b, const int lane)
+;; float32x4_t vfmlalq_lane_low_f16 (float32x4_t r, float16x8_t a, float16x4_t b, const int lane)
+;; float32x2_t vfmlal_laneq_low_f16 (float32x2_t r, float16x4_t a, float16x8_t b, const int lane)
 ;; Needs a bit of care to get the modes of the different sub-expressions right
 ;; due to 'a' and 'b' having different sizes and make sure we use the right
 ;; S or D subregister to select the appropriate lane from.
@@ -2510,8 +2510,8 @@
 )

 ;; Used to implement the intrinsics:
-;; float32x4_t vfmlalq_lane_high_u32 (float32x4_t r, float16x8_t a, float16x4_t b, const int lane)
-;; float32x2_t vfmlal_laneq_high_u32 (float32x2_t r, float16x4_t a, float16x8_t b, const int lane)
+;; float32x4_t vfmlalq_lane_high_f16 (float32x4_t r, float16x8_t a, float16x4_t b, const int lane)
+;; float32x2_t vfmlal_laneq_high_f16 (float32x2_t r, float16x4_t a, float16x8_t b, const int lane)
 ;; Needs a bit of care to get the modes of the different sub-expressions right
 ;; due to 'a' and 'b' having different sizes and make sure we use the right
 ;; S or D subregister to select the appropriate lane from.
@@ -2607,8 +2607,8 @@
 )

 ;; Used to implement the intrinsics:
-;; float32x4_t vfmlslq_lane_low_u32 (float32x4_t r, float16x8_t a, float16x4_t b, const int lane)
-;; float32x2_t vfmlsl_laneq_low_u32 (float32x2_t r, float16x4_t a, float16x8_t b, const int lane)
+;; float32x4_t vfmlslq_lane_low_f16 (float32x4_t r, float16x8_t a, float16x4_t b, const int lane)
+;; float32x2_t vfmlsl_laneq_low_f16 (float32x2_t r, float16x4_t a, float16x8_t b, const int lane)
 ;; Needs a bit of care to get the modes of the different sub-expressions right
 ;; due to 'a' and 'b' having different sizes and make sure we use the right
 ;; S or D subregister to select the appropriate lane from.
@@ -2645,8 +2645,8 @@
 )

 ;; Used to implement the intrinsics:
-;; float32x4_t vfmlslq_lane_high_u32 (float32x4_t r, float16x8_t a, float16x4_t b, const int lane)
-;; float32x2_t vfmlsl_laneq_high_u32 (float32x2_t r, float16x4_t a, float16x8_t b, const int lane)
+;; float32x4_t vfmlslq_lane_high_f16 (float32x4_t r, float16x8_t a, float16x4_t b, const int lane)
+;; float32x2_t vfmlsl_laneq_high_f16 (float32x2_t r, float16x4_t a, float16x8_t b, const int lane)
 ;; Needs a bit of care to get the modes of the different sub-expressions right
 ;; due to 'a' and 'b' having different sizes and make sure we use the right
 ;; S or D subregister to select the appropriate lane from.

--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
 2019-02-25  Tamar Christina  <tamar.christina@arm.com>

+	* gcc.target/arm/simd/fp16fml_high.c (test_vfmlal_high_u32,
+	test_vfmlalq_high_u32, test_vfmlsl_high_u32, test_vfmlslq_high_u32):
+	Rename ....
+	(test_vfmlal_high_f16, test_vfmlalq_high_f16, test_vfmlsl_high_f16,
+	test_vfmlslq_high_f16): ... To this.
+	* gcc.target/arm/simd/fp16fml_lane_high.c (test_vfmlal_lane_high_u32,
+	tets_vfmlsl_lane_high_u32, test_vfmlal_laneq_high_u32,
+	test_vfmlsl_laneq_high_u32, test_vfmlalq_lane_high_u32,
+	test_vfmlslq_lane_high_u32, test_vfmlalq_laneq_high_u32,
+	test_vfmlslq_laneq_high_u32): Rename ...
+	(test_vfmlal_lane_high_f16, tets_vfmlsl_lane_high_f16,
+	test_vfmlal_laneq_high_f16, test_vfmlsl_laneq_high_f16,
+	test_vfmlalq_lane_high_f16, test_vfmlslq_lane_high_f16,
+	test_vfmlalq_laneq_high_f16, test_vfmlslq_laneq_high_f16): ... To this.
+	* gcc.target/arm/simd/fp16fml_lane_low.c (test_vfmlal_lane_low_u32,
+	test_vfmlsl_lane_low_u32, test_vfmlal_laneq_low_u32,
+	test_vfmlsl_laneq_low_u32, test_vfmlalq_lane_low_u32,
+	test_vfmlslq_lane_low_u32, test_vfmlalq_laneq_low_u32,
+	test_vfmlslq_laneq_low_u32): Rename ...
+	(test_vfmlal_lane_low_f16, test_vfmlsl_lane_low_f16,
+	test_vfmlal_laneq_low_f16, test_vfmlsl_laneq_low_f16,
+	test_vfmlalq_lane_low_f16, test_vfmlslq_lane_low_f16,
+	test_vfmlalq_laneq_low_f16, test_vfmlslq_laneq_low_f16): ... To this.
+	* gcc.target/arm/simd/fp16fml_low.c (test_vfmlal_low_u32,
+	test_vfmlalq_low_u32, test_vfmlsl_low_u32, test_vfmlslq_low_u32):
+	Rename ...
+	(test_vfmlal_low_f16, test_vfmlalq_low_f16, test_vfmlsl_low_f16,
+	test_vfmlslq_low_f16): ... To this.
+
+2019-02-25  Tamar Christina  <tamar.christina@arm.com>
+
 	* gcc.target/aarch64/fp16_fmul_high.h (test_vfmlal_high_u32,
 	test_vfmlalq_high_u32, test_vfmlsl_high_u32, test_vfmlslq_high_u32):
 	Rename ...

--- a/gcc/testsuite/gcc.target/arm/simd/fp16fml_high.c
+++ b/gcc/testsuite/gcc.target/arm/simd/fp16fml_high.c
@@ -5,27 +5,27 @@
 #include "arm_neon.h"

 float32x2_t
-test_vfmlal_high_u32 (float32x2_t r, float16x4_t a, float16x4_t b)
+test_vfmlal_high_f16 (float32x2_t r, float16x4_t a, float16x4_t b)
 {
-  return vfmlal_high_u32 (r, a, b);
+  return vfmlal_high_f16 (r, a, b);
 }

 float32x4_t
-test_vfmlalq_high_u32 (float32x4_t r, float16x8_t a, float16x8_t b)
+test_vfmlalq_high_f16 (float32x4_t r, float16x8_t a, float16x8_t b)
 {
-  return vfmlalq_high_u32 (r, a, b);
+  return vfmlalq_high_f16 (r, a, b);
 }

 float32x2_t
-test_vfmlsl_high_u32 (float32x2_t r, float16x4_t a, float16x4_t b)
+test_vfmlsl_high_f16 (float32x2_t r, float16x4_t a, float16x4_t b)
 {
-  return vfmlsl_high_u32 (r, a, b);
+  return vfmlsl_high_f16 (r, a, b);
 }

 float32x4_t
-test_vfmlslq_high_u32 (float32x4_t r, float16x8_t a, float16x8_t b)
+test_vfmlslq_high_f16 (float32x4_t r, float16x8_t a, float16x8_t b)
 {
-  return vfmlslq_high_u32 (r, a, b);
+  return vfmlslq_high_f16 (r, a, b);
 }

 /* { dg-final { scan-assembler-times {vfmal.f16\td[0-9]+, s[123]?[13579], s[123]?[13579]} 1 } } */

--- a/gcc/testsuite/gcc.target/arm/simd/fp16fml_lane_high.c
+++ b/gcc/testsuite/gcc.target/arm/simd/fp16fml_lane_high.c
@@ -5,51 +5,51 @@
 #include "arm_neon.h"

 float32x2_t
-test_vfmlal_lane_high_u32 (float32x2_t r, float16x4_t a, float16x4_t b)
+test_vfmlal_lane_high_f16 (float32x2_t r, float16x4_t a, float16x4_t b)
 {
-  return vfmlal_lane_high_u32 (r, a, b, 0);
+  return vfmlal_lane_high_f16 (r, a, b, 0);
 }

 float32x2_t
-tets_vfmlsl_lane_high_u32  (float32x2_t r, float16x4_t a, float16x4_t b)
+tets_vfmlsl_lane_high_f16  (float32x2_t r, float16x4_t a, float16x4_t b)
 {
-  return vfmlsl_lane_high_u32 (r, a, b, 0);
+  return vfmlsl_lane_high_f16 (r, a, b, 0);
 }

 float32x2_t
-test_vfmlal_laneq_high_u32 (float32x2_t r, float16x4_t a, float16x8_t b)
+test_vfmlal_laneq_high_f16 (float32x2_t r, float16x4_t a, float16x8_t b)
 {
-  return vfmlal_laneq_high_u32 (r, a, b, 6);
+  return vfmlal_laneq_high_f16 (r, a, b, 6);
 }

 float32x2_t
-test_vfmlsl_laneq_high_u32 (float32x2_t r, float16x4_t a, float16x8_t b)
+test_vfmlsl_laneq_high_f16 (float32x2_t r, float16x4_t a, float16x8_t b)
 {
-  return vfmlsl_laneq_high_u32 (r, a, b, 6);
+  return vfmlsl_laneq_high_f16 (r, a, b, 6);
 }

 float32x4_t
-test_vfmlalq_lane_high_u32 (float32x4_t r, float16x8_t a, float16x4_t b)
+test_vfmlalq_lane_high_f16 (float32x4_t r, float16x8_t a, float16x4_t b)
 {
-  return vfmlalq_lane_high_u32 (r, a, b, 1);
+  return vfmlalq_lane_high_f16 (r, a, b, 1);
 }

 float32x4_t
-test_vfmlslq_lane_high_u32 (float32x4_t r, float16x8_t a, float16x4_t b)
+test_vfmlslq_lane_high_f16 (float32x4_t r, float16x8_t a, float16x4_t b)
 {
-  return vfmlslq_lane_high_u32 (r, a, b, 1);
+  return vfmlslq_lane_high_f16 (r, a, b, 1);
 }

 float32x4_t
-test_vfmlalq_laneq_high_u32  (float32x4_t r, float16x8_t a, float16x8_t b)
+test_vfmlalq_laneq_high_f16  (float32x4_t r, float16x8_t a, float16x8_t b)
 {
-  return vfmlalq_laneq_high_u32 (r, a, b, 7);
+  return vfmlalq_laneq_high_f16 (r, a, b, 7);
 }

 float32x4_t
-test_vfmlslq_laneq_high_u32 (float32x4_t r, float16x8_t a, float16x8_t b)
+test_vfmlslq_laneq_high_f16 (float32x4_t r, float16x8_t a, float16x8_t b)
 {
-  return vfmlslq_laneq_high_u32 (r, a, b, 7);
+  return vfmlslq_laneq_high_f16 (r, a, b, 7);
 }

 /* { dg-final { scan-assembler-times {vfmal.f16\td[0-9]+, s[123]?[13579], s[123]?[02468]\[0\]} 1 } } */

--- a/gcc/testsuite/gcc.target/arm/simd/fp16fml_lane_low.c
+++ b/gcc/testsuite/gcc.target/arm/simd/fp16fml_lane_low.c
@@ -5,51 +5,51 @@
 #include "arm_neon.h"

 float32x2_t
-test_vfmlal_lane_low_u32 (float32x2_t r, float16x4_t a, float16x4_t b)
+test_vfmlal_lane_low_f16 (float32x2_t r, float16x4_t a, float16x4_t b)
 {
-  return vfmlal_lane_low_u32 (r, a, b, 0);
+  return vfmlal_lane_low_f16 (r, a, b, 0);
 }

 float32x2_t
-test_vfmlsl_lane_low_u32 (float32x2_t r, float16x4_t a, float16x4_t b)
+test_vfmlsl_lane_low_f16 (float32x2_t r, float16x4_t a, float16x4_t b)
 {
-  return vfmlsl_lane_low_u32 (r, a, b, 0);
+  return vfmlsl_lane_low_f16 (r, a, b, 0);
 }

 float32x2_t
-test_vfmlal_laneq_low_u32 (float32x2_t r, float16x4_t a, float16x8_t b)
+test_vfmlal_laneq_low_f16 (float32x2_t r, float16x4_t a, float16x8_t b)
 {
-  return vfmlal_laneq_low_u32 (r, a, b, 6);
+  return vfmlal_laneq_low_f16 (r, a, b, 6);
 }

 float32x2_t
-test_vfmlsl_laneq_low_u32 (float32x2_t r, float16x4_t a, float16x8_t b)
+test_vfmlsl_laneq_low_f16 (float32x2_t r, float16x4_t a, float16x8_t b)
 {
-  return vfmlsl_laneq_low_u32 (r, a, b, 6);
+  return vfmlsl_laneq_low_f16 (r, a, b, 6);
 }

 float32x4_t
-test_vfmlalq_lane_low_u32 (float32x4_t r, float16x8_t a, float16x4_t b)
+test_vfmlalq_lane_low_f16 (float32x4_t r, float16x8_t a, float16x4_t b)
 {
-  return vfmlalq_lane_low_u32 (r, a, b, 1);
+  return vfmlalq_lane_low_f16 (r, a, b, 1);
 }

 float32x4_t
-test_vfmlslq_lane_low_u32 (float32x4_t r, float16x8_t a, float16x4_t b)
+test_vfmlslq_lane_low_f16 (float32x4_t r, float16x8_t a, float16x4_t b)
 {
-  return vfmlslq_lane_low_u32 (r, a, b, 1);
+  return vfmlslq_lane_low_f16 (r, a, b, 1);
 }

 float32x4_t
-test_vfmlalq_laneq_low_u32 (float32x4_t r, float16x8_t a, float16x8_t b)
+test_vfmlalq_laneq_low_f16 (float32x4_t r, float16x8_t a, float16x8_t b)
 {
-  return vfmlalq_laneq_low_u32 (r, a, b, 7);
+  return vfmlalq_laneq_low_f16 (r, a, b, 7);
 }

 float32x4_t
-test_vfmlslq_laneq_low_u32 (float32x4_t r, float16x8_t a, float16x8_t b)
+test_vfmlslq_laneq_low_f16 (float32x4_t r, float16x8_t a, float16x8_t b)
 {
-  return vfmlslq_laneq_low_u32 (r, a, b, 7);
+  return vfmlslq_laneq_low_f16 (r, a, b, 7);
 }

 /* { dg-final { scan-assembler-times {vfmal.f16\td[0-9]+, s[123]?[02468], s[123]?[02468]\[0\]} 1 } } */

--- a/gcc/testsuite/gcc.target/arm/simd/fp16fml_low.c
+++ b/gcc/testsuite/gcc.target/arm/simd/fp16fml_low.c
@@ -5,27 +5,27 @@
 #include "arm_neon.h"

 float32x2_t
-test_vfmlal_low_u32 (float32x2_t r, float16x4_t a, float16x4_t b)
+test_vfmlal_low_f16 (float32x2_t r, float16x4_t a, float16x4_t b)
 {
-  return vfmlal_low_u32 (r, a, b);
+  return vfmlal_low_f16 (r, a, b);
 }

 float32x4_t
-test_vfmlalq_low_u32 (float32x4_t r, float16x8_t a, float16x8_t b)
+test_vfmlalq_low_f16 (float32x4_t r, float16x8_t a, float16x8_t b)
 {
-  return vfmlalq_low_u32 (r, a, b);
+  return vfmlalq_low_f16 (r, a, b);
 }

 float32x2_t
-test_vfmlsl_low_u32 (float32x2_t r, float16x4_t a, float16x4_t b)
+test_vfmlsl_low_f16 (float32x2_t r, float16x4_t a, float16x4_t b)
 {
-  return vfmlsl_low_u32 (r, a, b);
+  return vfmlsl_low_f16 (r, a, b);
 }

 float32x4_t
-test_vfmlslq_low_u32 (float32x4_t r, float16x8_t a, float16x8_t b)
+test_vfmlslq_low_f16 (float32x4_t r, float16x8_t a, float16x8_t b)
 {
-  return vfmlslq_low_u32 (r, a, b);
+  return vfmlslq_low_f16 (r, a, b);
 }

 /* { dg-final { scan-assembler-times {vfmal.f16\td[0-9]+, s[123]?[02468], s[123]?[02468]} 1 } } */