Commit bb6131db by Jiong Wang Committed by Jiong Wang

[AArch64][10/10] ARMv8.2-A FP16 lane scalar intrinsics

gcc/
	* config/aarch64/arm_neon.h (vfmah_lane_f16, vfmah_laneq_f16,
	vfmsh_lane_f16, vfmsh_laneq_f16, vmulh_lane_f16, vmulh_laneq_f16,
	vmulxh_lane_f16, vmulxh_laneq_f16): New.

From-SVN: r238725
parent 9a594ad6
2016-07-25 Jiong Wang <jiong.wang@arm.com> 2016-07-25 Jiong Wang <jiong.wang@arm.com>
* config/aarch64/arm_neon.h (vfmah_lane_f16, vfmah_laneq_f16,
vfmsh_lane_f16, vfmsh_laneq_f16, vmulh_lane_f16, vmulh_laneq_f16,
vmulxh_lane_f16, vmulxh_laneq_f16): New.
2016-07-25 Jiong Wang <jiong.wang@arm.com>
* config/aarch64/aarch64-simd-builtins.def: Register new builtins. * config/aarch64/aarch64-simd-builtins.def: Register new builtins.
* config/aarch64/aarch64.md (fma, fnma): Support HF. * config/aarch64/aarch64.md (fma, fnma): Support HF.
* config/aarch64/arm_fp16.h (vfmah_f16, vfmsh_f16): New. * config/aarch64/arm_fp16.h (vfmah_f16, vfmsh_f16): New.
......
...@@ -26777,6 +26777,20 @@ vfmsq_f16 (float16x8_t __a, float16x8_t __b, float16x8_t __c) ...@@ -26777,6 +26777,20 @@ vfmsq_f16 (float16x8_t __a, float16x8_t __b, float16x8_t __c)
/* ARMv8.2-A FP16 lane vector intrinsics. */ /* ARMv8.2-A FP16 lane vector intrinsics. */
__extension__ static __inline float16_t __attribute__ ((__always_inline__))
vfmah_lane_f16 (float16_t __a, float16_t __b,
float16x4_t __c, const int __lane)
{
return vfmah_f16 (__a, __b, __aarch64_vget_lane_any (__c, __lane));
}
__extension__ static __inline float16_t __attribute__ ((__always_inline__))
vfmah_laneq_f16 (float16_t __a, float16_t __b,
float16x8_t __c, const int __lane)
{
return vfmah_f16 (__a, __b, __aarch64_vget_lane_any (__c, __lane));
}
__extension__ static __inline float16x4_t __attribute__ ((__always_inline__)) __extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
vfma_lane_f16 (float16x4_t __a, float16x4_t __b, vfma_lane_f16 (float16x4_t __a, float16x4_t __b,
float16x4_t __c, const int __lane) float16x4_t __c, const int __lane)
...@@ -26817,6 +26831,20 @@ vfmaq_n_f16 (float16x8_t __a, float16x8_t __b, float16_t __c) ...@@ -26817,6 +26831,20 @@ vfmaq_n_f16 (float16x8_t __a, float16x8_t __b, float16_t __c)
return vfmaq_f16 (__a, __b, vdupq_n_f16 (__c)); return vfmaq_f16 (__a, __b, vdupq_n_f16 (__c));
} }
__extension__ static __inline float16_t __attribute__ ((__always_inline__))
vfmsh_lane_f16 (float16_t __a, float16_t __b,
float16x4_t __c, const int __lane)
{
return vfmsh_f16 (__a, __b, __aarch64_vget_lane_any (__c, __lane));
}
__extension__ static __inline float16_t __attribute__ ((__always_inline__))
vfmsh_laneq_f16 (float16_t __a, float16_t __b,
float16x8_t __c, const int __lane)
{
return vfmsh_f16 (__a, __b, __aarch64_vget_lane_any (__c, __lane));
}
__extension__ static __inline float16x4_t __attribute__ ((__always_inline__)) __extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
vfms_lane_f16 (float16x4_t __a, float16x4_t __b, vfms_lane_f16 (float16x4_t __a, float16x4_t __b,
float16x4_t __c, const int __lane) float16x4_t __c, const int __lane)
...@@ -26857,6 +26885,12 @@ vfmsq_n_f16 (float16x8_t __a, float16x8_t __b, float16_t __c) ...@@ -26857,6 +26885,12 @@ vfmsq_n_f16 (float16x8_t __a, float16x8_t __b, float16_t __c)
return vfmsq_f16 (__a, __b, vdupq_n_f16 (__c)); return vfmsq_f16 (__a, __b, vdupq_n_f16 (__c));
} }
__extension__ static __inline float16_t __attribute__ ((__always_inline__))
vmulh_lane_f16 (float16_t __a, float16x4_t __b, const int __lane)
{
return __a * __aarch64_vget_lane_any (__b, __lane);
}
__extension__ static __inline float16x4_t __attribute__ ((__always_inline__)) __extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
vmul_lane_f16 (float16x4_t __a, float16x4_t __b, const int __lane) vmul_lane_f16 (float16x4_t __a, float16x4_t __b, const int __lane)
{ {
...@@ -26869,6 +26903,12 @@ vmulq_lane_f16 (float16x8_t __a, float16x4_t __b, const int __lane) ...@@ -26869,6 +26903,12 @@ vmulq_lane_f16 (float16x8_t __a, float16x4_t __b, const int __lane)
return vmulq_f16 (__a, vdupq_n_f16 (__aarch64_vget_lane_any (__b, __lane))); return vmulq_f16 (__a, vdupq_n_f16 (__aarch64_vget_lane_any (__b, __lane)));
} }
__extension__ static __inline float16_t __attribute__ ((__always_inline__))
vmulh_laneq_f16 (float16_t __a, float16x8_t __b, const int __lane)
{
return __a * __aarch64_vget_lane_any (__b, __lane);
}
__extension__ static __inline float16x4_t __attribute__ ((__always_inline__)) __extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
vmul_laneq_f16 (float16x4_t __a, float16x8_t __b, const int __lane) vmul_laneq_f16 (float16x4_t __a, float16x8_t __b, const int __lane)
{ {
...@@ -26893,6 +26933,12 @@ vmulq_n_f16 (float16x8_t __a, float16_t __b) ...@@ -26893,6 +26933,12 @@ vmulq_n_f16 (float16x8_t __a, float16_t __b)
return vmulq_laneq_f16 (__a, vdupq_n_f16 (__b), 0); return vmulq_laneq_f16 (__a, vdupq_n_f16 (__b), 0);
} }
__extension__ static __inline float16_t __attribute__ ((__always_inline__))
vmulxh_lane_f16 (float16_t __a, float16x4_t __b, const int __lane)
{
return vmulxh_f16 (__a, __aarch64_vget_lane_any (__b, __lane));
}
__extension__ static __inline float16x4_t __attribute__ ((__always_inline__)) __extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
vmulx_lane_f16 (float16x4_t __a, float16x4_t __b, const int __lane) vmulx_lane_f16 (float16x4_t __a, float16x4_t __b, const int __lane)
{ {
...@@ -26905,6 +26951,12 @@ vmulxq_lane_f16 (float16x8_t __a, float16x4_t __b, const int __lane) ...@@ -26905,6 +26951,12 @@ vmulxq_lane_f16 (float16x8_t __a, float16x4_t __b, const int __lane)
return vmulxq_f16 (__a, __aarch64_vdupq_lane_f16 (__b, __lane)); return vmulxq_f16 (__a, __aarch64_vdupq_lane_f16 (__b, __lane));
} }
__extension__ static __inline float16_t __attribute__ ((__always_inline__))
vmulxh_laneq_f16 (float16_t __a, float16x8_t __b, const int __lane)
{
return vmulxh_f16 (__a, __aarch64_vget_lane_any (__b, __lane));
}
__extension__ static __inline float16x4_t __attribute__ ((__always_inline__)) __extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
vmulx_laneq_f16 (float16x4_t __a, float16x8_t __b, const int __lane) vmulx_laneq_f16 (float16x4_t __a, float16x8_t __b, const int __lane)
{ {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment