Commit 68ad28c3 by Jiong Wang Committed by Jiong Wang

[AArch64][8/10] ARMv8.2-A FP16 two operands scalar intrinsics

gcc/
	* config/aarch64/aarch64-simd-builtins.def: Register new builtins.
	* config/aarch64/aarch64.md (<FCVT_F2FIXED:fcvt_fixed_insn>hf<mode>3):
	New.
	(<FCVT_FIXED2F:fcvt_fixed_insn><mode>hf3): Likewise.
	(add<mode>3): Likewise.
	(sub<mode>3): Likewise.
	(mul<mode>3): Likewise.
	(div<mode>3): Likewise.
	(*div<mode>3): Likewise.
	(<fmaxmin><mode>3): Extend to HF.
	* config/aarch64/aarch64-simd.md (aarch64_rsqrts<mode>): Likewise.
	(fabd<mode>3): Likewise.
	(<FCVT_F2FIXED:fcvt_fixed_insn><VHSDF_HSDF:mode>3): Likewise.
	(<FCVT_FIXED2F:fcvt_fixed_insn><VHSDI_HSDI:mode>3): Likewise.
	(aarch64_fmulx<mode>): Likewise.
	(aarch64_fac<optab><mode>): Likewise.
	(aarch64_frecps<mode>): Likewise.
	(<FCVT_F2FIXED:fcvt_fixed_insn>hfhi3): New.
	(<FCVT_FIXED2F:fcvt_fixed_insn>hihf3): Likewise.
	* config/aarch64/iterators.md (VHSDF_SDF): Delete.
	(VSDQ_HSDI): Support HI.
	(fcvt_target, FCVT_TARGET): Likewise.
	* config/aarch64/arm_fp16.h (vaddh_f16, vsubh_f16, vabdh_f16,
	vcageh_f16, vcagth_f16, vcaleh_f16, vcalth_f16, vceqh_f16, vcgeh_f16,
	vcgth_f16, vcleh_f16, vclth_f16, vcvth_n_f16_s16, vcvth_n_f16_s32,
	vcvth_n_f16_s64, vcvth_n_f16_u16, vcvth_n_f16_u32, vcvth_n_f16_u64,
	vcvth_n_s16_f16, vcvth_n_s32_f16, vcvth_n_s64_f16, vcvth_n_u16_f16,
	vcvth_n_u32_f16, vcvth_n_u64_f16, vdivh_f16, vmaxh_f16, vmaxnmh_f16,
	vminh_f16, vminnmh_f16, vmulh_f16, vmulxh_f16, vrecpsh_f16,
	vrsqrtsh_f16): New.

From-SVN: r238723
parent d7f33f07
2016-07-25 Jiong Wang <jiong.wang@arm.com> 2016-07-25 Jiong Wang <jiong.wang@arm.com>
* config/aarch64/aarch64-simd-builtins.def: Register new builtins.
* config/aarch64/aarch64.md (<FCVT_F2FIXED:fcvt_fixed_insn>hf<mode>3):
New.
(<FCVT_FIXED2F:fcvt_fixed_insn><mode>hf3): Likewise.
(add<mode>3): Likewise.
(sub<mode>3): Likewise.
(mul<mode>3): Likewise.
(div<mode>3): Likewise.
(*div<mode>3): Likewise.
(<fmaxmin><mode>3): Extend to HF.
* config/aarch64/aarch64-simd.md (aarch64_rsqrts<mode>): Likewise.
(fabd<mode>3): Likewise.
(<FCVT_F2FIXED:fcvt_fixed_insn><VHSDF_HSDF:mode>3): Likewise.
(<FCVT_FIXED2F:fcvt_fixed_insn><VHSDI_HSDI:mode>3): Likewise.
(aarch64_fmulx<mode>): Likewise.
(aarch64_fac<optab><mode>): Likewise.
(aarch64_frecps<mode>): Likewise.
(<FCVT_F2FIXED:fcvt_fixed_insn>hfhi3): New.
(<FCVT_FIXED2F:fcvt_fixed_insn>hihf3): Likewise.
* config/aarch64/iterators.md (VHSDF_SDF): Delete.
(VSDQ_HSDI): Support HI.
(fcvt_target, FCVT_TARGET): Likewise.
* config/aarch64/arm_fp16.h (vaddh_f16, vsubh_f16, vabdh_f16,
vcageh_f16, vcagth_f16, vcaleh_f16, vcalth_f16, vceqh_f16, vcgeh_f16,
vcgth_f16, vcleh_f16, vclth_f16, vcvth_n_f16_s16, vcvth_n_f16_s32,
vcvth_n_f16_s64, vcvth_n_f16_u16, vcvth_n_f16_u32, vcvth_n_f16_u64,
vcvth_n_s16_f16, vcvth_n_s32_f16, vcvth_n_s64_f16, vcvth_n_u16_f16,
vcvth_n_u32_f16, vcvth_n_u64_f16, vdivh_f16, vmaxh_f16, vmaxnmh_f16,
vminh_f16, vminnmh_f16, vmulh_f16, vmulxh_f16, vrecpsh_f16,
vrsqrtsh_f16): New.
2016-07-25 Jiong Wang <jiong.wang@arm.com>
* config.gcc (aarch64*-*-*): Install arm_fp16.h. * config.gcc (aarch64*-*-*): Install arm_fp16.h.
* config/aarch64/aarch64-builtins.c (hi_UP): New. * config/aarch64/aarch64-builtins.c (hi_UP): New.
* config/aarch64/aarch64-simd-builtins.def: Register new builtins. * config/aarch64/aarch64-simd-builtins.def: Register new builtins.
...@@ -11,6 +44,7 @@ ...@@ -11,6 +44,7 @@
(l<fcvt_pattern><su_optab><GPF:mode><GPI:mode>2): Likewise. (l<fcvt_pattern><su_optab><GPF:mode><GPI:mode>2): Likewise.
(fix_trunc<GPF:mode><GPI:mode>2): Likewise. (fix_trunc<GPF:mode><GPI:mode>2): Likewise.
(sqrt<mode>2): Likewise. (sqrt<mode>2): Likewise.
(*sqrt<mode>2): Likewise.
(abs<mode>2): Likewise. (abs<mode>2): Likewise.
(<optab><mode>hf2): New pattern for HF mode. (<optab><mode>hf2): New pattern for HF mode.
(<optab>hihf2): Likewise. (<optab>hihf2): Likewise.
...@@ -58,7 +92,7 @@ ...@@ -58,7 +92,7 @@
(f, fp): Support HF modes. (f, fp): Support HF modes.
* config/aarch64/arm_neon.h (vfma_lane_f16, vfmaq_lane_f16, * config/aarch64/arm_neon.h (vfma_lane_f16, vfmaq_lane_f16,
vfma_laneq_f16, vfmaq_laneq_f16, vfma_n_f16, vfmaq_n_f16, vfms_lane_f16, vfma_laneq_f16, vfmaq_laneq_f16, vfma_n_f16, vfmaq_n_f16, vfms_lane_f16,
vfmsq_lane_f16, vfms_laneq_f16, vfmsq_laneq_f16, vfms_n_f16, vfmsq_lane_f16, vfms_laneq_f16, vfmsq_laneq_f16, vfms_n_f16,
vfmsq_n_f16, vmul_lane_f16, vmulq_lane_f16, vmul_laneq_f16, vfmsq_n_f16, vmul_lane_f16, vmulq_lane_f16, vmul_laneq_f16,
vmulq_laneq_f16, vmul_n_f16, vmulq_n_f16, vmulx_lane_f16, vmulq_laneq_f16, vmul_n_f16, vmulq_n_f16, vmulx_lane_f16,
vmulxq_lane_f16, vmulx_laneq_f16, vmulxq_laneq_f16): New. vmulxq_lane_f16, vmulx_laneq_f16, vmulxq_laneq_f16): New.
...@@ -159,7 +193,7 @@ ...@@ -159,7 +193,7 @@
and V8HFmode. and V8HFmode.
* config/aarch64/arm_neon.h (__INTERLEAVE_LIST): Support float16x4_t, * config/aarch64/arm_neon.h (__INTERLEAVE_LIST): Support float16x4_t,
float16x8_t. float16x8_t.
(__aarch64_vdup_lane_f16, __aarch64_vdup_laneq_f16, (__aarch64_vdup_lane_f16, __aarch64_vdup_laneq_f16,
__aarch64_vdupq_lane_f16, __aarch64_vdupq_laneq_f16, vbsl_f16, __aarch64_vdupq_lane_f16, __aarch64_vdupq_laneq_f16, vbsl_f16,
vbslq_f16, vdup_n_f16, vdupq_n_f16, vdup_lane_f16, vdup_laneq_f16, vbslq_f16, vdup_n_f16, vdupq_n_f16, vdup_lane_f16, vdup_laneq_f16,
vdupq_lane_f16, vdupq_laneq_f16, vduph_lane_f16, vduph_laneq_f16, vdupq_lane_f16, vdupq_laneq_f16, vduph_lane_f16, vduph_laneq_f16,
......
...@@ -41,7 +41,7 @@ ...@@ -41,7 +41,7 @@
BUILTIN_VDC (COMBINE, combine, 0) BUILTIN_VDC (COMBINE, combine, 0)
BUILTIN_VB (BINOP, pmul, 0) BUILTIN_VB (BINOP, pmul, 0)
BUILTIN_VHSDF_SDF (BINOP, fmulx, 0) BUILTIN_VHSDF_HSDF (BINOP, fmulx, 0)
BUILTIN_VHSDF_DF (UNOP, sqrt, 2) BUILTIN_VHSDF_DF (UNOP, sqrt, 2)
BUILTIN_VD_BHSI (BINOP, addp, 0) BUILTIN_VD_BHSI (BINOP, addp, 0)
VAR1 (UNOP, addp, 0, di) VAR1 (UNOP, addp, 0, di)
...@@ -393,13 +393,12 @@ ...@@ -393,13 +393,12 @@
/* Implemented by /* Implemented by
aarch64_frecp<FRECP:frecp_suffix><mode>. */ aarch64_frecp<FRECP:frecp_suffix><mode>. */
BUILTIN_GPF_F16 (UNOP, frecpe, 0) BUILTIN_GPF_F16 (UNOP, frecpe, 0)
BUILTIN_GPF (BINOP, frecps, 0)
BUILTIN_GPF_F16 (UNOP, frecpx, 0) BUILTIN_GPF_F16 (UNOP, frecpx, 0)
BUILTIN_VDQ_SI (UNOP, urecpe, 0) BUILTIN_VDQ_SI (UNOP, urecpe, 0)
BUILTIN_VHSDF (UNOP, frecpe, 0) BUILTIN_VHSDF (UNOP, frecpe, 0)
BUILTIN_VHSDF (BINOP, frecps, 0) BUILTIN_VHSDF_HSDF (BINOP, frecps, 0)
/* Implemented by a mixture of abs2 patterns. Note the DImode builtin is /* Implemented by a mixture of abs2 patterns. Note the DImode builtin is
only ever used for the int64x1_t intrinsic, there is no scalar version. */ only ever used for the int64x1_t intrinsic, there is no scalar version. */
...@@ -496,17 +495,23 @@ ...@@ -496,17 +495,23 @@
/* Implemented by <FCVT_F2FIXED/FIXED2F:fcvt_fixed_insn><*><*>3. */ /* Implemented by <FCVT_F2FIXED/FIXED2F:fcvt_fixed_insn><*><*>3. */
BUILTIN_VSDQ_HSDI (SHIFTIMM, scvtf, 3) BUILTIN_VSDQ_HSDI (SHIFTIMM, scvtf, 3)
BUILTIN_VSDQ_HSDI (FCVTIMM_SUS, ucvtf, 3) BUILTIN_VSDQ_HSDI (FCVTIMM_SUS, ucvtf, 3)
BUILTIN_VHSDF_SDF (SHIFTIMM, fcvtzs, 3) BUILTIN_VHSDF_HSDF (SHIFTIMM, fcvtzs, 3)
BUILTIN_VHSDF_SDF (SHIFTIMM_USS, fcvtzu, 3) BUILTIN_VHSDF_HSDF (SHIFTIMM_USS, fcvtzu, 3)
VAR1 (SHIFTIMM, scvtfsi, 3, hf)
VAR1 (SHIFTIMM, scvtfdi, 3, hf)
VAR1 (FCVTIMM_SUS, ucvtfsi, 3, hf)
VAR1 (FCVTIMM_SUS, ucvtfdi, 3, hf)
BUILTIN_GPI (SHIFTIMM, fcvtzshf, 3)
BUILTIN_GPI (SHIFTIMM_USS, fcvtzuhf, 3)
/* Implemented by aarch64_rsqrte<mode>. */ /* Implemented by aarch64_rsqrte<mode>. */
BUILTIN_VHSDF_HSDF (UNOP, rsqrte, 0) BUILTIN_VHSDF_HSDF (UNOP, rsqrte, 0)
/* Implemented by aarch64_rsqrts<mode>. */ /* Implemented by aarch64_rsqrts<mode>. */
BUILTIN_VHSDF_SDF (BINOP, rsqrts, 0) BUILTIN_VHSDF_HSDF (BINOP, rsqrts, 0)
/* Implemented by fabd<mode>3. */ /* Implemented by fabd<mode>3. */
BUILTIN_VHSDF_SDF (BINOP, fabd, 3) BUILTIN_VHSDF_HSDF (BINOP, fabd, 3)
/* Implemented by aarch64_faddp<mode>. */ /* Implemented by aarch64_faddp<mode>. */
BUILTIN_VHSDF (BINOP, faddp, 0) BUILTIN_VHSDF (BINOP, faddp, 0)
...@@ -522,10 +527,10 @@ ...@@ -522,10 +527,10 @@
BUILTIN_VHSDF_HSDF (UNOP, neg, 2) BUILTIN_VHSDF_HSDF (UNOP, neg, 2)
/* Implemented by aarch64_fac<optab><mode>. */ /* Implemented by aarch64_fac<optab><mode>. */
BUILTIN_VHSDF_SDF (BINOP_USS, faclt, 0) BUILTIN_VHSDF_HSDF (BINOP_USS, faclt, 0)
BUILTIN_VHSDF_SDF (BINOP_USS, facle, 0) BUILTIN_VHSDF_HSDF (BINOP_USS, facle, 0)
BUILTIN_VHSDF_SDF (BINOP_USS, facgt, 0) BUILTIN_VHSDF_HSDF (BINOP_USS, facgt, 0)
BUILTIN_VHSDF_SDF (BINOP_USS, facge, 0) BUILTIN_VHSDF_HSDF (BINOP_USS, facge, 0)
/* Implemented by sqrt<mode>2. */ /* Implemented by sqrt<mode>2. */
VAR1 (UNOP, sqrt, 2, hf) VAR1 (UNOP, sqrt, 2, hf)
...@@ -543,3 +548,7 @@ ...@@ -543,3 +548,7 @@
BUILTIN_GPI_I16 (UNOPUS, fixuns_trunchf, 2) BUILTIN_GPI_I16 (UNOPUS, fixuns_trunchf, 2)
BUILTIN_GPI (UNOPUS, fixuns_truncsf, 2) BUILTIN_GPI (UNOPUS, fixuns_truncsf, 2)
BUILTIN_GPI (UNOPUS, fixuns_truncdf, 2) BUILTIN_GPI (UNOPUS, fixuns_truncdf, 2)
/* Implemented by <fmaxmin><mode>3. */
VAR1 (BINOP, fmax, 3, hf)
VAR1 (BINOP, fmin, 3, hf)
...@@ -391,9 +391,9 @@ ...@@ -391,9 +391,9 @@
[(set_attr "type" "neon_fp_rsqrte_<stype><q>")]) [(set_attr "type" "neon_fp_rsqrte_<stype><q>")])
(define_insn "aarch64_rsqrts<mode>" (define_insn "aarch64_rsqrts<mode>"
[(set (match_operand:VHSDF_SDF 0 "register_operand" "=w") [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
(unspec:VHSDF_SDF [(match_operand:VHSDF_SDF 1 "register_operand" "w") (unspec:VHSDF_HSDF [(match_operand:VHSDF_HSDF 1 "register_operand" "w")
(match_operand:VHSDF_SDF 2 "register_operand" "w")] (match_operand:VHSDF_HSDF 2 "register_operand" "w")]
UNSPEC_RSQRTS))] UNSPEC_RSQRTS))]
"TARGET_SIMD" "TARGET_SIMD"
"frsqrts\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>" "frsqrts\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
...@@ -475,11 +475,11 @@ ...@@ -475,11 +475,11 @@
) )
(define_insn "fabd<mode>3" (define_insn "fabd<mode>3"
[(set (match_operand:VHSDF_SDF 0 "register_operand" "=w") [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
(abs:VHSDF_SDF (abs:VHSDF_HSDF
(minus:VHSDF_SDF (minus:VHSDF_HSDF
(match_operand:VHSDF_SDF 1 "register_operand" "w") (match_operand:VHSDF_HSDF 1 "register_operand" "w")
(match_operand:VHSDF_SDF 2 "register_operand" "w"))))] (match_operand:VHSDF_HSDF 2 "register_operand" "w"))))]
"TARGET_SIMD" "TARGET_SIMD"
"fabd\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>" "fabd\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
[(set_attr "type" "neon_fp_abd_<stype><q>")] [(set_attr "type" "neon_fp_abd_<stype><q>")]
...@@ -3078,10 +3078,10 @@ ...@@ -3078,10 +3078,10 @@
;; fmulx. ;; fmulx.
(define_insn "aarch64_fmulx<mode>" (define_insn "aarch64_fmulx<mode>"
[(set (match_operand:VHSDF_SDF 0 "register_operand" "=w") [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
(unspec:VHSDF_SDF (unspec:VHSDF_HSDF
[(match_operand:VHSDF_SDF 1 "register_operand" "w") [(match_operand:VHSDF_HSDF 1 "register_operand" "w")
(match_operand:VHSDF_SDF 2 "register_operand" "w")] (match_operand:VHSDF_HSDF 2 "register_operand" "w")]
UNSPEC_FMULX))] UNSPEC_FMULX))]
"TARGET_SIMD" "TARGET_SIMD"
"fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>" "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
...@@ -4341,10 +4341,10 @@ ...@@ -4341,10 +4341,10 @@
[(set (match_operand:<V_cmp_result> 0 "register_operand" "=w") [(set (match_operand:<V_cmp_result> 0 "register_operand" "=w")
(neg:<V_cmp_result> (neg:<V_cmp_result>
(FAC_COMPARISONS:<V_cmp_result> (FAC_COMPARISONS:<V_cmp_result>
(abs:VHSDF_SDF (abs:VHSDF_HSDF
(match_operand:VHSDF_SDF 1 "register_operand" "w")) (match_operand:VHSDF_HSDF 1 "register_operand" "w"))
(abs:VHSDF_SDF (abs:VHSDF_HSDF
(match_operand:VHSDF_SDF 2 "register_operand" "w")) (match_operand:VHSDF_HSDF 2 "register_operand" "w"))
)))] )))]
"TARGET_SIMD" "TARGET_SIMD"
"fac<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>" "fac<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>"
...@@ -5460,10 +5460,10 @@ ...@@ -5460,10 +5460,10 @@
) )
(define_insn "aarch64_frecps<mode>" (define_insn "aarch64_frecps<mode>"
[(set (match_operand:VHSDF_SDF 0 "register_operand" "=w") [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
(unspec:VHSDF_SDF (unspec:VHSDF_HSDF
[(match_operand:VHSDF_SDF 1 "register_operand" "w") [(match_operand:VHSDF_HSDF 1 "register_operand" "w")
(match_operand:VHSDF_SDF 2 "register_operand" "w")] (match_operand:VHSDF_HSDF 2 "register_operand" "w")]
UNSPEC_FRECPS))] UNSPEC_FRECPS))]
"TARGET_SIMD" "TARGET_SIMD"
"frecps\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>" "frecps\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
......
...@@ -4660,38 +4660,78 @@ ...@@ -4660,38 +4660,78 @@
(set_attr "simd" "*, yes")] (set_attr "simd" "*, yes")]
) )
(define_insn "<FCVT_F2FIXED:fcvt_fixed_insn>hf<mode>3"
[(set (match_operand:GPI 0 "register_operand" "=r")
(unspec:GPI [(match_operand:HF 1 "register_operand" "w")
(match_operand:SI 2 "immediate_operand" "i")]
FCVT_F2FIXED))]
"TARGET_FP_F16INST"
"<FCVT_F2FIXED:fcvt_fixed_insn>\t%<GPI:w>0, %h1, #%2"
[(set_attr "type" "f_cvtf2i")]
)
(define_insn "<FCVT_FIXED2F:fcvt_fixed_insn><mode>hf3"
[(set (match_operand:HF 0 "register_operand" "=w")
(unspec:HF [(match_operand:GPI 1 "register_operand" "r")
(match_operand:SI 2 "immediate_operand" "i")]
FCVT_FIXED2F))]
"TARGET_FP_F16INST"
"<FCVT_FIXED2F:fcvt_fixed_insn>\t%h0, %<GPI:w>1, #%2"
[(set_attr "type" "f_cvti2f")]
)
(define_insn "<FCVT_F2FIXED:fcvt_fixed_insn>hf3"
[(set (match_operand:HI 0 "register_operand" "=w")
(unspec:HI [(match_operand:HF 1 "register_operand" "w")
(match_operand:SI 2 "immediate_operand" "i")]
FCVT_F2FIXED))]
"TARGET_SIMD"
"<FCVT_F2FIXED:fcvt_fixed_insn>\t%h0, %h1, #%2"
[(set_attr "type" "neon_fp_to_int_s")]
)
(define_insn "<FCVT_FIXED2F:fcvt_fixed_insn>hi3"
[(set (match_operand:HF 0 "register_operand" "=w")
(unspec:HF [(match_operand:HI 1 "register_operand" "w")
(match_operand:SI 2 "immediate_operand" "i")]
FCVT_FIXED2F))]
"TARGET_SIMD"
"<FCVT_FIXED2F:fcvt_fixed_insn>\t%h0, %h1, #%2"
[(set_attr "type" "neon_int_to_fp_s")]
)
;; ------------------------------------------------------------------- ;; -------------------------------------------------------------------
;; Floating-point arithmetic ;; Floating-point arithmetic
;; ------------------------------------------------------------------- ;; -------------------------------------------------------------------
(define_insn "add<mode>3" (define_insn "add<mode>3"
[(set (match_operand:GPF 0 "register_operand" "=w") [(set (match_operand:GPF_F16 0 "register_operand" "=w")
(plus:GPF (plus:GPF_F16
(match_operand:GPF 1 "register_operand" "w") (match_operand:GPF_F16 1 "register_operand" "w")
(match_operand:GPF 2 "register_operand" "w")))] (match_operand:GPF_F16 2 "register_operand" "w")))]
"TARGET_FLOAT" "TARGET_FLOAT"
"fadd\\t%<s>0, %<s>1, %<s>2" "fadd\\t%<s>0, %<s>1, %<s>2"
[(set_attr "type" "fadd<s>")] [(set_attr "type" "fadd<stype>")]
) )
(define_insn "sub<mode>3" (define_insn "sub<mode>3"
[(set (match_operand:GPF 0 "register_operand" "=w") [(set (match_operand:GPF_F16 0 "register_operand" "=w")
(minus:GPF (minus:GPF_F16
(match_operand:GPF 1 "register_operand" "w") (match_operand:GPF_F16 1 "register_operand" "w")
(match_operand:GPF 2 "register_operand" "w")))] (match_operand:GPF_F16 2 "register_operand" "w")))]
"TARGET_FLOAT" "TARGET_FLOAT"
"fsub\\t%<s>0, %<s>1, %<s>2" "fsub\\t%<s>0, %<s>1, %<s>2"
[(set_attr "type" "fadd<s>")] [(set_attr "type" "fadd<stype>")]
) )
(define_insn "mul<mode>3" (define_insn "mul<mode>3"
[(set (match_operand:GPF 0 "register_operand" "=w") [(set (match_operand:GPF_F16 0 "register_operand" "=w")
(mult:GPF (mult:GPF_F16
(match_operand:GPF 1 "register_operand" "w") (match_operand:GPF_F16 1 "register_operand" "w")
(match_operand:GPF 2 "register_operand" "w")))] (match_operand:GPF_F16 2 "register_operand" "w")))]
"TARGET_FLOAT" "TARGET_FLOAT"
"fmul\\t%<s>0, %<s>1, %<s>2" "fmul\\t%<s>0, %<s>1, %<s>2"
[(set_attr "type" "fmul<s>")] [(set_attr "type" "fmul<stype>")]
) )
(define_insn "*fnmul<mode>3" (define_insn "*fnmul<mode>3"
...@@ -4715,9 +4755,9 @@ ...@@ -4715,9 +4755,9 @@
) )
(define_expand "div<mode>3" (define_expand "div<mode>3"
[(set (match_operand:GPF 0 "register_operand") [(set (match_operand:GPF_F16 0 "register_operand")
(div:GPF (match_operand:GPF 1 "general_operand") (div:GPF_F16 (match_operand:GPF_F16 1 "general_operand")
(match_operand:GPF 2 "register_operand")))] (match_operand:GPF_F16 2 "register_operand")))]
"TARGET_SIMD" "TARGET_SIMD"
{ {
if (aarch64_emit_approx_div (operands[0], operands[1], operands[2])) if (aarch64_emit_approx_div (operands[0], operands[1], operands[2]))
...@@ -4727,12 +4767,12 @@ ...@@ -4727,12 +4767,12 @@
}) })
(define_insn "*div<mode>3" (define_insn "*div<mode>3"
[(set (match_operand:GPF 0 "register_operand" "=w") [(set (match_operand:GPF_F16 0 "register_operand" "=w")
(div:GPF (match_operand:GPF 1 "register_operand" "w") (div:GPF_F16 (match_operand:GPF_F16 1 "register_operand" "w")
(match_operand:GPF 2 "register_operand" "w")))] (match_operand:GPF_F16 2 "register_operand" "w")))]
"TARGET_FLOAT" "TARGET_FLOAT"
"fdiv\\t%<s>0, %<s>1, %<s>2" "fdiv\\t%<s>0, %<s>1, %<s>2"
[(set_attr "type" "fdiv<s>")] [(set_attr "type" "fdiv<stype>")]
) )
(define_insn "neg<mode>2" (define_insn "neg<mode>2"
...@@ -4792,13 +4832,13 @@ ...@@ -4792,13 +4832,13 @@
;; Scalar forms for the IEEE-754 fmax()/fmin() functions ;; Scalar forms for the IEEE-754 fmax()/fmin() functions
(define_insn "<fmaxmin><mode>3" (define_insn "<fmaxmin><mode>3"
[(set (match_operand:GPF 0 "register_operand" "=w") [(set (match_operand:GPF_F16 0 "register_operand" "=w")
(unspec:GPF [(match_operand:GPF 1 "register_operand" "w") (unspec:GPF_F16 [(match_operand:GPF_F16 1 "register_operand" "w")
(match_operand:GPF 2 "register_operand" "w")] (match_operand:GPF_F16 2 "register_operand" "w")]
FMAXMIN))] FMAXMIN))]
"TARGET_FLOAT" "TARGET_FLOAT"
"<fmaxmin_op>\\t%<s>0, %<s>1, %<s>2" "<fmaxmin_op>\\t%<s>0, %<s>1, %<s>2"
[(set_attr "type" "f_minmax<s>")] [(set_attr "type" "f_minmax<stype>")]
) )
;; For copysign (x, y), we want to generate: ;; For copysign (x, y), we want to generate:
......
...@@ -360,6 +360,206 @@ vsqrth_f16 (float16_t __a) ...@@ -360,6 +360,206 @@ vsqrth_f16 (float16_t __a)
return __builtin_aarch64_sqrthf (__a); return __builtin_aarch64_sqrthf (__a);
} }
/* ARMv8.2-A FP16 two operands scalar intrinsics. */
__extension__ static __inline float16_t __attribute__ ((__always_inline__))
vaddh_f16 (float16_t __a, float16_t __b)
{
return __a + __b;
}
__extension__ static __inline float16_t __attribute__ ((__always_inline__))
vabdh_f16 (float16_t __a, float16_t __b)
{
return __builtin_aarch64_fabdhf (__a, __b);
}
__extension__ static __inline uint16_t __attribute__ ((__always_inline__))
vcageh_f16 (float16_t __a, float16_t __b)
{
return __builtin_aarch64_facgehf_uss (__a, __b);
}
__extension__ static __inline uint16_t __attribute__ ((__always_inline__))
vcagth_f16 (float16_t __a, float16_t __b)
{
return __builtin_aarch64_facgthf_uss (__a, __b);
}
__extension__ static __inline uint16_t __attribute__ ((__always_inline__))
vcaleh_f16 (float16_t __a, float16_t __b)
{
return __builtin_aarch64_faclehf_uss (__a, __b);
}
__extension__ static __inline uint16_t __attribute__ ((__always_inline__))
vcalth_f16 (float16_t __a, float16_t __b)
{
return __builtin_aarch64_faclthf_uss (__a, __b);
}
__extension__ static __inline uint16_t __attribute__ ((__always_inline__))
vceqh_f16 (float16_t __a, float16_t __b)
{
return __builtin_aarch64_cmeqhf_uss (__a, __b);
}
__extension__ static __inline uint16_t __attribute__ ((__always_inline__))
vcgeh_f16 (float16_t __a, float16_t __b)
{
return __builtin_aarch64_cmgehf_uss (__a, __b);
}
__extension__ static __inline uint16_t __attribute__ ((__always_inline__))
vcgth_f16 (float16_t __a, float16_t __b)
{
return __builtin_aarch64_cmgthf_uss (__a, __b);
}
__extension__ static __inline uint16_t __attribute__ ((__always_inline__))
vcleh_f16 (float16_t __a, float16_t __b)
{
return __builtin_aarch64_cmlehf_uss (__a, __b);
}
__extension__ static __inline uint16_t __attribute__ ((__always_inline__))
vclth_f16 (float16_t __a, float16_t __b)
{
return __builtin_aarch64_cmlthf_uss (__a, __b);
}
__extension__ static __inline float16_t __attribute__ ((__always_inline__))
vcvth_n_f16_s16 (int16_t __a, const int __b)
{
return __builtin_aarch64_scvtfhi (__a, __b);
}
__extension__ static __inline float16_t __attribute__ ((__always_inline__))
vcvth_n_f16_s32 (int32_t __a, const int __b)
{
return __builtin_aarch64_scvtfsihf (__a, __b);
}
__extension__ static __inline float16_t __attribute__ ((__always_inline__))
vcvth_n_f16_s64 (int64_t __a, const int __b)
{
return __builtin_aarch64_scvtfdihf (__a, __b);
}
__extension__ static __inline float16_t __attribute__ ((__always_inline__))
vcvth_n_f16_u16 (uint16_t __a, const int __b)
{
return __builtin_aarch64_ucvtfhi_sus (__a, __b);
}
__extension__ static __inline float16_t __attribute__ ((__always_inline__))
vcvth_n_f16_u32 (uint32_t __a, const int __b)
{
return __builtin_aarch64_ucvtfsihf_sus (__a, __b);
}
__extension__ static __inline float16_t __attribute__ ((__always_inline__))
vcvth_n_f16_u64 (uint64_t __a, const int __b)
{
return __builtin_aarch64_ucvtfdihf_sus (__a, __b);
}
__extension__ static __inline int16_t __attribute__ ((__always_inline__))
vcvth_n_s16_f16 (float16_t __a, const int __b)
{
return __builtin_aarch64_fcvtzshf (__a, __b);
}
__extension__ static __inline int32_t __attribute__ ((__always_inline__))
vcvth_n_s32_f16 (float16_t __a, const int __b)
{
return __builtin_aarch64_fcvtzshfsi (__a, __b);
}
__extension__ static __inline int64_t __attribute__ ((__always_inline__))
vcvth_n_s64_f16 (float16_t __a, const int __b)
{
return __builtin_aarch64_fcvtzshfdi (__a, __b);
}
__extension__ static __inline uint16_t __attribute__ ((__always_inline__))
vcvth_n_u16_f16 (float16_t __a, const int __b)
{
return __builtin_aarch64_fcvtzuhf_uss (__a, __b);
}
__extension__ static __inline uint32_t __attribute__ ((__always_inline__))
vcvth_n_u32_f16 (float16_t __a, const int __b)
{
return __builtin_aarch64_fcvtzuhfsi_uss (__a, __b);
}
__extension__ static __inline uint64_t __attribute__ ((__always_inline__))
vcvth_n_u64_f16 (float16_t __a, const int __b)
{
return __builtin_aarch64_fcvtzuhfdi_uss (__a, __b);
}
__extension__ static __inline float16_t __attribute__ ((__always_inline__))
vdivh_f16 (float16_t __a, float16_t __b)
{
return __a / __b;
}
__extension__ static __inline float16_t __attribute__ ((__always_inline__))
vmaxh_f16 (float16_t __a, float16_t __b)
{
return __builtin_aarch64_fmaxhf (__a, __b);
}
__extension__ static __inline float16_t __attribute__ ((__always_inline__))
vmaxnmh_f16 (float16_t __a, float16_t __b)
{
return __builtin_aarch64_fmaxhf (__a, __b);
}
__extension__ static __inline float16_t __attribute__ ((__always_inline__))
vminh_f16 (float16_t __a, float16_t __b)
{
return __builtin_aarch64_fminhf (__a, __b);
}
__extension__ static __inline float16_t __attribute__ ((__always_inline__))
vminnmh_f16 (float16_t __a, float16_t __b)
{
return __builtin_aarch64_fminhf (__a, __b);
}
__extension__ static __inline float16_t __attribute__ ((__always_inline__))
vmulh_f16 (float16_t __a, float16_t __b)
{
return __a * __b;
}
__extension__ static __inline float16_t __attribute__ ((__always_inline__))
vmulxh_f16 (float16_t __a, float16_t __b)
{
return __builtin_aarch64_fmulxhf (__a, __b);
}
__extension__ static __inline float16_t __attribute__ ((__always_inline__))
vrecpsh_f16 (float16_t __a, float16_t __b)
{
return __builtin_aarch64_frecpshf (__a, __b);
}
__extension__ static __inline float16_t __attribute__ ((__always_inline__))
vrsqrtsh_f16 (float16_t __a, float16_t __b)
{
return __builtin_aarch64_rsqrtshf (__a, __b);
}
__extension__ static __inline float16_t __attribute__ ((__always_inline__))
vsubh_f16 (float16_t __a, float16_t __b)
{
return __a - __b;
}
#pragma GCC pop_options #pragma GCC pop_options
#endif #endif
...@@ -105,9 +105,6 @@ ...@@ -105,9 +105,6 @@
(define_mode_iterator VHSDF_DF [(V4HF "TARGET_SIMD_F16INST") (define_mode_iterator VHSDF_DF [(V4HF "TARGET_SIMD_F16INST")
(V8HF "TARGET_SIMD_F16INST") (V8HF "TARGET_SIMD_F16INST")
V2SF V4SF V2DF DF]) V2SF V4SF V2DF DF])
(define_mode_iterator VHSDF_SDF [(V4HF "TARGET_SIMD_F16INST")
(V8HF "TARGET_SIMD_F16INST")
V2SF V4SF V2DF SF DF])
(define_mode_iterator VHSDF_HSDF [(V4HF "TARGET_SIMD_F16INST") (define_mode_iterator VHSDF_HSDF [(V4HF "TARGET_SIMD_F16INST")
(V8HF "TARGET_SIMD_F16INST") (V8HF "TARGET_SIMD_F16INST")
V2SF V4SF V2DF V2SF V4SF V2DF
...@@ -190,7 +187,9 @@ ...@@ -190,7 +187,9 @@
;; Scalar and Vector modes for S and D, Vector modes for H. ;; Scalar and Vector modes for S and D, Vector modes for H.
(define_mode_iterator VSDQ_HSDI [(V4HI "TARGET_SIMD_F16INST") (define_mode_iterator VSDQ_HSDI [(V4HI "TARGET_SIMD_F16INST")
(V8HI "TARGET_SIMD_F16INST") (V8HI "TARGET_SIMD_F16INST")
V2SI V4SI V2DI SI DI]) V2SI V4SI V2DI
(HI "TARGET_SIMD_F16INST")
SI DI])
;; Vector modes for Q and H types. ;; Vector modes for Q and H types.
(define_mode_iterator VDQQH [V8QI V16QI V4HI V8HI]) (define_mode_iterator VDQQH [V8QI V16QI V4HI V8HI])
...@@ -705,12 +704,12 @@ ...@@ -705,12 +704,12 @@
(V2DI "v2df") (V4SI "v4sf") (V2SI "v2sf") (V2DI "v2df") (V4SI "v4sf") (V2SI "v2sf")
(SF "si") (DF "di") (SI "sf") (DI "df") (SF "si") (DF "di") (SI "sf") (DI "df")
(V4HF "v4hi") (V8HF "v8hi") (V4HI "v4hf") (V4HF "v4hi") (V8HF "v8hi") (V4HI "v4hf")
(V8HI "v8hf")]) (V8HI "v8hf") (HF "hi") (HI "hf")])
(define_mode_attr FCVT_TARGET [(V2DF "V2DI") (V4SF "V4SI") (V2SF "V2SI") (define_mode_attr FCVT_TARGET [(V2DF "V2DI") (V4SF "V4SI") (V2SF "V2SI")
(V2DI "V2DF") (V4SI "V4SF") (V2SI "V2SF") (V2DI "V2DF") (V4SI "V4SF") (V2SI "V2SF")
(SF "SI") (DF "DI") (SI "SF") (DI "DF") (SF "SI") (DF "DI") (SI "SF") (DI "DF")
(V4HF "V4HI") (V8HF "V8HI") (V4HI "V4HF") (V4HF "V4HI") (V8HF "V8HI") (V4HI "V4HF")
(V8HI "V8HF")]) (V8HI "V8HF") (HF "HI") (HI "HF")])
;; for the inequal width integer to fp conversions ;; for the inequal width integer to fp conversions
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment