Commit daef0a8c by Jiong Wang Committed by Jiong Wang

[AArch64][2/10] ARMv8.2-A FP16 one operand vector intrinsics

gcc/
	* config/aarch64/aarch64-builtins.c (TYPES_BINOP_USS): New.
	* config/aarch64/aarch64-simd-builtins.def: Register new builtins.
	* config/aarch64/aarch64-simd.md (aarch64_rsqrte<mode>): Extend to HF modes.
	(neg<mode>2): Likewise.
	(abs<mode>2): Likewise.
	(<frint_pattern><mode>2): Likewise.
	(l<fcvt_pattern><su_optab><VDQF:mode><fcvt_target>2): Likewise.
	(<optab><VDQF:mode><fcvt_target>2): Likewise.
	(<fix_trunc_optab><VDQF:mode><fcvt_target>2): Likewise.
	(ftrunc<VDQF:mode>2): Likewise.
	(<optab><fcvt_target><VDQF:mode>2): Likewise.
	(sqrt<mode>2): Likewise.
	(*sqrt<mode>2): Likewise.
	(aarch64_frecpe<mode>): Likewise.
	(aarch64_cm<optab><mode>): Likewise.
	* config/aarch64/aarch64.c (aarch64_emit_approx_sqrt): Return
	false for V4HF and V8HF.
	* config/aarch64/iterators.md (VHSDF, VHSDF_DF, VHSDF_SDF): New.
	(VDQF_COND, fcvt_target, FCVT_TARGET, hcon): Extend mode attribute to HF modes.
	(stype): New.
	* config/aarch64/arm_neon.h (vdup_n_f16): New.
	(vdupq_n_f16): Likewise.
	(vld1_dup_f16): Use vdup_n_f16.
	(vld1q_dup_f16): Use vdupq_n_f16.
	(vabs_f16): New.
	(vabsq_f16, vceqz_f16, vceqzq_f16, vcgez_f16, vcgezq_f16, vcgtz_f16,
	vcgtzq_f16, vclez_f16, vclezq_f16, vcltz_f16, vcltzq_f16, vcvt_f16_s16,
	vcvtq_f16_s16, vcvt_f16_u16, vcvtq_f16_u16, vcvt_s16_f16, vcvtq_s16_f16,
	vcvt_u16_f16, vcvtq_u16_f16, vcvta_s16_f16, vcvtaq_s16_f16,
	vcvta_u16_f16, vcvtaq_u16_f16, vcvtm_s16_f16, vcvtmq_s16_f16,
	vcvtm_u16_f16, vcvtmq_u16_f16, vcvtn_s16_f16, vcvtnq_s16_f16,
	vcvtn_u16_f16, vcvtnq_u16_f16, vcvtp_s16_f16, vcvtpq_s16_f16,
	vcvtp_u16_f16, vcvtpq_u16_f16, vneg_f16, vnegq_f16, vrecpe_f16,
	vrecpeq_f16, vrnd_f16, vrndq_f16, vrnda_f16, vrndaq_f16, vrndi_f16,
	vrndiq_f16, vrndm_f16, vrndmq_f16, vrndn_f16, vrndnq_f16, vrndp_f16,
	vrndpq_f16, vrndx_f16, vrndxq_f16, vrsqrte_f16, vrsqrteq_f16, vsqrt_f16,
	vsqrtq_f16): Likewise.

From-SVN: r238716
parent 358decd5
2016-07-25 Jiong Wang <jiong.wang@arm.com> 2016-07-25 Jiong Wang <jiong.wang@arm.com>
* config/aarch64/aarch64-builtins.c (TYPES_BINOP_USS): New.
* config/aarch64/aarch64-simd-builtins.def: Register new builtins.
* config/aarch64/aarch64-simd.md (aarch64_rsqrte<mode>): Extend to HF modes.
(neg<mode>2): Likewise.
(abs<mode>2): Likewise.
(<frint_pattern><mode>2): Likewise.
(l<fcvt_pattern><su_optab><VDQF:mode><fcvt_target>2): Likewise.
(<optab><VDQF:mode><fcvt_target>2): Likewise.
(<fix_trunc_optab><VDQF:mode><fcvt_target>2): Likewise.
(ftrunc<VDQF:mode>2): Likewise.
(<optab><fcvt_target><VDQF:mode>2): Likewise.
(sqrt<mode>2): Likewise.
(*sqrt<mode>2): Likewise.
(aarch64_frecpe<mode>): Likewise.
(aarch64_cm<optab><mode>): Likewise.
* config/aarch64/aarch64.c (aarch64_emit_approx_sqrt): Return
false for V4HF and V8HF.
* config/aarch64/iterators.md (VHSDF, VHSDF_DF, VHSDF_SDF): New.
(VDQF_COND, fcvt_target, FCVT_TARGET, hcon): Extend mode attribute to HF modes.
(stype): New.
* config/aarch64/arm_neon.h (vdup_n_f16): New.
(vdupq_n_f16): Likewise.
(vld1_dup_f16): Use vdup_n_f16.
(vld1q_dup_f16): Use vdupq_n_f16.
(vabs_f16): New.
(vabsq_f16, vceqz_f16, vceqzq_f16, vcgez_f16, vcgezq_f16, vcgtz_f16,
vcgtzq_f16, vclez_f16, vclezq_f16, vcltz_f16, vcltzq_f16, vcvt_f16_s16,
vcvtq_f16_s16, vcvt_f16_u16, vcvtq_f16_u16, vcvt_s16_f16, vcvtq_s16_f16,
vcvt_u16_f16, vcvtq_u16_f16, vcvta_s16_f16, vcvtaq_s16_f16,
vcvta_u16_f16, vcvtaq_u16_f16, vcvtm_s16_f16, vcvtmq_s16_f16,
vcvtm_u16_f16, vcvtmq_u16_f16, vcvtn_s16_f16, vcvtnq_s16_f16,
vcvtn_u16_f16, vcvtnq_u16_f16, vcvtp_s16_f16, vcvtpq_s16_f16,
vcvtp_u16_f16, vcvtpq_u16_f16, vneg_f16, vnegq_f16, vrecpe_f16,
vrecpeq_f16, vrnd_f16, vrndq_f16, vrnda_f16, vrndaq_f16, vrndi_f16,
vrndiq_f16, vrndm_f16, vrndmq_f16, vrndn_f16, vrndnq_f16, vrndp_f16,
vrndpq_f16, vrndx_f16, vrndxq_f16, vrsqrte_f16, vrsqrteq_f16, vsqrt_f16,
vsqrtq_f16): Likewise.
2016-07-25 Jiong Wang <jiong.wang@arm.com>
* config/aarch64/aarch64-simd.md * config/aarch64/aarch64-simd.md
(aarch64_<PERMUTE:perm_insn><PERMUTE:perm_hilo><mode>): Use VALL_F16. (aarch64_<PERMUTE:perm_insn><PERMUTE:perm_hilo><mode>): Use VALL_F16.
(aarch64_ext<mode>): Likewise. (aarch64_ext<mode>): Likewise.
......
...@@ -139,6 +139,10 @@ aarch64_types_binop_ssu_qualifiers[SIMD_MAX_BUILTIN_ARGS] ...@@ -139,6 +139,10 @@ aarch64_types_binop_ssu_qualifiers[SIMD_MAX_BUILTIN_ARGS]
= { qualifier_none, qualifier_none, qualifier_unsigned }; = { qualifier_none, qualifier_none, qualifier_unsigned };
#define TYPES_BINOP_SSU (aarch64_types_binop_ssu_qualifiers) #define TYPES_BINOP_SSU (aarch64_types_binop_ssu_qualifiers)
static enum aarch64_type_qualifiers static enum aarch64_type_qualifiers
aarch64_types_binop_uss_qualifiers[SIMD_MAX_BUILTIN_ARGS]
= { qualifier_unsigned, qualifier_none, qualifier_none };
#define TYPES_BINOP_USS (aarch64_types_binop_uss_qualifiers)
static enum aarch64_type_qualifiers
aarch64_types_binopp_qualifiers[SIMD_MAX_BUILTIN_ARGS] aarch64_types_binopp_qualifiers[SIMD_MAX_BUILTIN_ARGS]
= { qualifier_poly, qualifier_poly, qualifier_poly }; = { qualifier_poly, qualifier_poly, qualifier_poly };
#define TYPES_BINOPP (aarch64_types_binopp_qualifiers) #define TYPES_BINOPP (aarch64_types_binopp_qualifiers)
......
...@@ -42,7 +42,7 @@ ...@@ -42,7 +42,7 @@
BUILTIN_VDC (COMBINE, combine, 0) BUILTIN_VDC (COMBINE, combine, 0)
BUILTIN_VB (BINOP, pmul, 0) BUILTIN_VB (BINOP, pmul, 0)
BUILTIN_VALLF (BINOP, fmulx, 0) BUILTIN_VALLF (BINOP, fmulx, 0)
BUILTIN_VDQF_DF (UNOP, sqrt, 2) BUILTIN_VHSDF_DF (UNOP, sqrt, 2)
BUILTIN_VD_BHSI (BINOP, addp, 0) BUILTIN_VD_BHSI (BINOP, addp, 0)
VAR1 (UNOP, addp, 0, di) VAR1 (UNOP, addp, 0, di)
BUILTIN_VDQ_BHSI (UNOP, clrsb, 2) BUILTIN_VDQ_BHSI (UNOP, clrsb, 2)
...@@ -266,23 +266,29 @@ ...@@ -266,23 +266,29 @@
BUILTIN_VDQF (BINOP, smin_nanp, 0) BUILTIN_VDQF (BINOP, smin_nanp, 0)
/* Implemented by <frint_pattern><mode>2. */ /* Implemented by <frint_pattern><mode>2. */
BUILTIN_VDQF (UNOP, btrunc, 2) BUILTIN_VHSDF (UNOP, btrunc, 2)
BUILTIN_VDQF (UNOP, ceil, 2) BUILTIN_VHSDF (UNOP, ceil, 2)
BUILTIN_VDQF (UNOP, floor, 2) BUILTIN_VHSDF (UNOP, floor, 2)
BUILTIN_VDQF (UNOP, nearbyint, 2) BUILTIN_VHSDF (UNOP, nearbyint, 2)
BUILTIN_VDQF (UNOP, rint, 2) BUILTIN_VHSDF (UNOP, rint, 2)
BUILTIN_VDQF (UNOP, round, 2) BUILTIN_VHSDF (UNOP, round, 2)
BUILTIN_VDQF_DF (UNOP, frintn, 2) BUILTIN_VHSDF_DF (UNOP, frintn, 2)
/* Implemented by l<fcvt_pattern><su_optab><VQDF:mode><vcvt_target>2. */ /* Implemented by l<fcvt_pattern><su_optab><VQDF:mode><vcvt_target>2. */
VAR1 (UNOP, lbtruncv4hf, 2, v4hi)
VAR1 (UNOP, lbtruncv8hf, 2, v8hi)
VAR1 (UNOP, lbtruncv2sf, 2, v2si) VAR1 (UNOP, lbtruncv2sf, 2, v2si)
VAR1 (UNOP, lbtruncv4sf, 2, v4si) VAR1 (UNOP, lbtruncv4sf, 2, v4si)
VAR1 (UNOP, lbtruncv2df, 2, v2di) VAR1 (UNOP, lbtruncv2df, 2, v2di)
VAR1 (UNOPUS, lbtruncuv4hf, 2, v4hi)
VAR1 (UNOPUS, lbtruncuv8hf, 2, v8hi)
VAR1 (UNOPUS, lbtruncuv2sf, 2, v2si) VAR1 (UNOPUS, lbtruncuv2sf, 2, v2si)
VAR1 (UNOPUS, lbtruncuv4sf, 2, v4si) VAR1 (UNOPUS, lbtruncuv4sf, 2, v4si)
VAR1 (UNOPUS, lbtruncuv2df, 2, v2di) VAR1 (UNOPUS, lbtruncuv2df, 2, v2di)
VAR1 (UNOP, lroundv4hf, 2, v4hi)
VAR1 (UNOP, lroundv8hf, 2, v8hi)
VAR1 (UNOP, lroundv2sf, 2, v2si) VAR1 (UNOP, lroundv2sf, 2, v2si)
VAR1 (UNOP, lroundv4sf, 2, v4si) VAR1 (UNOP, lroundv4sf, 2, v4si)
VAR1 (UNOP, lroundv2df, 2, v2di) VAR1 (UNOP, lroundv2df, 2, v2di)
...@@ -290,38 +296,52 @@ ...@@ -290,38 +296,52 @@
VAR1 (UNOP, lroundsf, 2, si) VAR1 (UNOP, lroundsf, 2, si)
VAR1 (UNOP, lrounddf, 2, di) VAR1 (UNOP, lrounddf, 2, di)
VAR1 (UNOPUS, lrounduv4hf, 2, v4hi)
VAR1 (UNOPUS, lrounduv8hf, 2, v8hi)
VAR1 (UNOPUS, lrounduv2sf, 2, v2si) VAR1 (UNOPUS, lrounduv2sf, 2, v2si)
VAR1 (UNOPUS, lrounduv4sf, 2, v4si) VAR1 (UNOPUS, lrounduv4sf, 2, v4si)
VAR1 (UNOPUS, lrounduv2df, 2, v2di) VAR1 (UNOPUS, lrounduv2df, 2, v2di)
VAR1 (UNOPUS, lroundusf, 2, si) VAR1 (UNOPUS, lroundusf, 2, si)
VAR1 (UNOPUS, lroundudf, 2, di) VAR1 (UNOPUS, lroundudf, 2, di)
VAR1 (UNOP, lceilv4hf, 2, v4hi)
VAR1 (UNOP, lceilv8hf, 2, v8hi)
VAR1 (UNOP, lceilv2sf, 2, v2si) VAR1 (UNOP, lceilv2sf, 2, v2si)
VAR1 (UNOP, lceilv4sf, 2, v4si) VAR1 (UNOP, lceilv4sf, 2, v4si)
VAR1 (UNOP, lceilv2df, 2, v2di) VAR1 (UNOP, lceilv2df, 2, v2di)
VAR1 (UNOPUS, lceiluv4hf, 2, v4hi)
VAR1 (UNOPUS, lceiluv8hf, 2, v8hi)
VAR1 (UNOPUS, lceiluv2sf, 2, v2si) VAR1 (UNOPUS, lceiluv2sf, 2, v2si)
VAR1 (UNOPUS, lceiluv4sf, 2, v4si) VAR1 (UNOPUS, lceiluv4sf, 2, v4si)
VAR1 (UNOPUS, lceiluv2df, 2, v2di) VAR1 (UNOPUS, lceiluv2df, 2, v2di)
VAR1 (UNOPUS, lceilusf, 2, si) VAR1 (UNOPUS, lceilusf, 2, si)
VAR1 (UNOPUS, lceiludf, 2, di) VAR1 (UNOPUS, lceiludf, 2, di)
VAR1 (UNOP, lfloorv4hf, 2, v4hi)
VAR1 (UNOP, lfloorv8hf, 2, v8hi)
VAR1 (UNOP, lfloorv2sf, 2, v2si) VAR1 (UNOP, lfloorv2sf, 2, v2si)
VAR1 (UNOP, lfloorv4sf, 2, v4si) VAR1 (UNOP, lfloorv4sf, 2, v4si)
VAR1 (UNOP, lfloorv2df, 2, v2di) VAR1 (UNOP, lfloorv2df, 2, v2di)
VAR1 (UNOPUS, lflooruv4hf, 2, v4hi)
VAR1 (UNOPUS, lflooruv8hf, 2, v8hi)
VAR1 (UNOPUS, lflooruv2sf, 2, v2si) VAR1 (UNOPUS, lflooruv2sf, 2, v2si)
VAR1 (UNOPUS, lflooruv4sf, 2, v4si) VAR1 (UNOPUS, lflooruv4sf, 2, v4si)
VAR1 (UNOPUS, lflooruv2df, 2, v2di) VAR1 (UNOPUS, lflooruv2df, 2, v2di)
VAR1 (UNOPUS, lfloorusf, 2, si) VAR1 (UNOPUS, lfloorusf, 2, si)
VAR1 (UNOPUS, lfloorudf, 2, di) VAR1 (UNOPUS, lfloorudf, 2, di)
VAR1 (UNOP, lfrintnv4hf, 2, v4hi)
VAR1 (UNOP, lfrintnv8hf, 2, v8hi)
VAR1 (UNOP, lfrintnv2sf, 2, v2si) VAR1 (UNOP, lfrintnv2sf, 2, v2si)
VAR1 (UNOP, lfrintnv4sf, 2, v4si) VAR1 (UNOP, lfrintnv4sf, 2, v4si)
VAR1 (UNOP, lfrintnv2df, 2, v2di) VAR1 (UNOP, lfrintnv2df, 2, v2di)
VAR1 (UNOP, lfrintnsf, 2, si) VAR1 (UNOP, lfrintnsf, 2, si)
VAR1 (UNOP, lfrintndf, 2, di) VAR1 (UNOP, lfrintndf, 2, di)
VAR1 (UNOPUS, lfrintnuv4hf, 2, v4hi)
VAR1 (UNOPUS, lfrintnuv8hf, 2, v8hi)
VAR1 (UNOPUS, lfrintnuv2sf, 2, v2si) VAR1 (UNOPUS, lfrintnuv2sf, 2, v2si)
VAR1 (UNOPUS, lfrintnuv4sf, 2, v4si) VAR1 (UNOPUS, lfrintnuv4sf, 2, v4si)
VAR1 (UNOPUS, lfrintnuv2df, 2, v2di) VAR1 (UNOPUS, lfrintnuv2df, 2, v2di)
...@@ -329,10 +349,14 @@ ...@@ -329,10 +349,14 @@
VAR1 (UNOPUS, lfrintnudf, 2, di) VAR1 (UNOPUS, lfrintnudf, 2, di)
/* Implemented by <optab><fcvt_target><VDQF:mode>2. */ /* Implemented by <optab><fcvt_target><VDQF:mode>2. */
VAR1 (UNOP, floatv4hi, 2, v4hf)
VAR1 (UNOP, floatv8hi, 2, v8hf)
VAR1 (UNOP, floatv2si, 2, v2sf) VAR1 (UNOP, floatv2si, 2, v2sf)
VAR1 (UNOP, floatv4si, 2, v4sf) VAR1 (UNOP, floatv4si, 2, v4sf)
VAR1 (UNOP, floatv2di, 2, v2df) VAR1 (UNOP, floatv2di, 2, v2df)
VAR1 (UNOP, floatunsv4hi, 2, v4hf)
VAR1 (UNOP, floatunsv8hi, 2, v8hf)
VAR1 (UNOP, floatunsv2si, 2, v2sf) VAR1 (UNOP, floatunsv2si, 2, v2sf)
VAR1 (UNOP, floatunsv4si, 2, v4sf) VAR1 (UNOP, floatunsv4si, 2, v4sf)
VAR1 (UNOP, floatunsv2di, 2, v2df) VAR1 (UNOP, floatunsv2di, 2, v2df)
...@@ -358,13 +382,13 @@ ...@@ -358,13 +382,13 @@
BUILTIN_VDQ_SI (UNOP, urecpe, 0) BUILTIN_VDQ_SI (UNOP, urecpe, 0)
BUILTIN_VDQF (UNOP, frecpe, 0) BUILTIN_VHSDF (UNOP, frecpe, 0)
BUILTIN_VDQF (BINOP, frecps, 0) BUILTIN_VDQF (BINOP, frecps, 0)
/* Implemented by a mixture of abs2 patterns. Note the DImode builtin is /* Implemented by a mixture of abs2 patterns. Note the DImode builtin is
only ever used for the int64x1_t intrinsic, there is no scalar version. */ only ever used for the int64x1_t intrinsic, there is no scalar version. */
BUILTIN_VSDQ_I_DI (UNOP, abs, 0) BUILTIN_VSDQ_I_DI (UNOP, abs, 0)
BUILTIN_VDQF (UNOP, abs, 2) BUILTIN_VHSDF (UNOP, abs, 2)
BUILTIN_VQ_HSF (UNOP, vec_unpacks_hi_, 10) BUILTIN_VQ_HSF (UNOP, vec_unpacks_hi_, 10)
VAR1 (BINOP, float_truncate_hi_, 0, v4sf) VAR1 (BINOP, float_truncate_hi_, 0, v4sf)
...@@ -457,7 +481,7 @@ ...@@ -457,7 +481,7 @@
BUILTIN_VALLF (SHIFTIMM_USS, fcvtzu, 3) BUILTIN_VALLF (SHIFTIMM_USS, fcvtzu, 3)
/* Implemented by aarch64_rsqrte<mode>. */ /* Implemented by aarch64_rsqrte<mode>. */
BUILTIN_VALLF (UNOP, rsqrte, 0) BUILTIN_VHSDF_SDF (UNOP, rsqrte, 0)
/* Implemented by aarch64_rsqrts<mode>. */ /* Implemented by aarch64_rsqrts<mode>. */
BUILTIN_VALLF (BINOP, rsqrts, 0) BUILTIN_VALLF (BINOP, rsqrts, 0)
...@@ -467,3 +491,13 @@ ...@@ -467,3 +491,13 @@
/* Implemented by aarch64_faddp<mode>. */ /* Implemented by aarch64_faddp<mode>. */
BUILTIN_VDQF (BINOP, faddp, 0) BUILTIN_VDQF (BINOP, faddp, 0)
/* Implemented by aarch64_cm<optab><mode>. */
BUILTIN_VHSDF_SDF (BINOP_USS, cmeq, 0)
BUILTIN_VHSDF_SDF (BINOP_USS, cmge, 0)
BUILTIN_VHSDF_SDF (BINOP_USS, cmgt, 0)
BUILTIN_VHSDF_SDF (BINOP_USS, cmle, 0)
BUILTIN_VHSDF_SDF (BINOP_USS, cmlt, 0)
/* Implemented by neg<mode>2. */
BUILTIN_VHSDF (UNOP, neg, 2)
...@@ -383,12 +383,12 @@ ...@@ -383,12 +383,12 @@
) )
(define_insn "aarch64_rsqrte<mode>" (define_insn "aarch64_rsqrte<mode>"
[(set (match_operand:VALLF 0 "register_operand" "=w") [(set (match_operand:VHSDF_SDF 0 "register_operand" "=w")
(unspec:VALLF [(match_operand:VALLF 1 "register_operand" "w")] (unspec:VHSDF_SDF [(match_operand:VHSDF_SDF 1 "register_operand" "w")]
UNSPEC_RSQRTE))] UNSPEC_RSQRTE))]
"TARGET_SIMD" "TARGET_SIMD"
"frsqrte\\t%<v>0<Vmtype>, %<v>1<Vmtype>" "frsqrte\\t%<v>0<Vmtype>, %<v>1<Vmtype>"
[(set_attr "type" "neon_fp_rsqrte_<Vetype><q>")]) [(set_attr "type" "neon_fp_rsqrte_<stype><q>")])
(define_insn "aarch64_rsqrts<mode>" (define_insn "aarch64_rsqrts<mode>"
[(set (match_operand:VALLF 0 "register_operand" "=w") [(set (match_operand:VALLF 0 "register_operand" "=w")
...@@ -1565,19 +1565,19 @@ ...@@ -1565,19 +1565,19 @@
) )
(define_insn "neg<mode>2" (define_insn "neg<mode>2"
[(set (match_operand:VDQF 0 "register_operand" "=w") [(set (match_operand:VHSDF 0 "register_operand" "=w")
(neg:VDQF (match_operand:VDQF 1 "register_operand" "w")))] (neg:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
"TARGET_SIMD" "TARGET_SIMD"
"fneg\\t%0.<Vtype>, %1.<Vtype>" "fneg\\t%0.<Vtype>, %1.<Vtype>"
[(set_attr "type" "neon_fp_neg_<Vetype><q>")] [(set_attr "type" "neon_fp_neg_<stype><q>")]
) )
(define_insn "abs<mode>2" (define_insn "abs<mode>2"
[(set (match_operand:VDQF 0 "register_operand" "=w") [(set (match_operand:VHSDF 0 "register_operand" "=w")
(abs:VDQF (match_operand:VDQF 1 "register_operand" "w")))] (abs:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
"TARGET_SIMD" "TARGET_SIMD"
"fabs\\t%0.<Vtype>, %1.<Vtype>" "fabs\\t%0.<Vtype>, %1.<Vtype>"
[(set_attr "type" "neon_fp_abs_<Vetype><q>")] [(set_attr "type" "neon_fp_abs_<stype><q>")]
) )
(define_insn "fma<mode>4" (define_insn "fma<mode>4"
...@@ -1735,24 +1735,24 @@ ...@@ -1735,24 +1735,24 @@
;; Vector versions of the floating-point frint patterns. ;; Vector versions of the floating-point frint patterns.
;; Expands to btrunc, ceil, floor, nearbyint, rint, round, frintn. ;; Expands to btrunc, ceil, floor, nearbyint, rint, round, frintn.
(define_insn "<frint_pattern><mode>2" (define_insn "<frint_pattern><mode>2"
[(set (match_operand:VDQF 0 "register_operand" "=w") [(set (match_operand:VHSDF 0 "register_operand" "=w")
(unspec:VDQF [(match_operand:VDQF 1 "register_operand" "w")] (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")]
FRINT))] FRINT))]
"TARGET_SIMD" "TARGET_SIMD"
"frint<frint_suffix>\\t%0.<Vtype>, %1.<Vtype>" "frint<frint_suffix>\\t%0.<Vtype>, %1.<Vtype>"
[(set_attr "type" "neon_fp_round_<Vetype><q>")] [(set_attr "type" "neon_fp_round_<stype><q>")]
) )
;; Vector versions of the fcvt standard patterns. ;; Vector versions of the fcvt standard patterns.
;; Expands to lbtrunc, lround, lceil, lfloor ;; Expands to lbtrunc, lround, lceil, lfloor
(define_insn "l<fcvt_pattern><su_optab><VDQF:mode><fcvt_target>2" (define_insn "l<fcvt_pattern><su_optab><VHSDF:mode><fcvt_target>2"
[(set (match_operand:<FCVT_TARGET> 0 "register_operand" "=w") [(set (match_operand:<FCVT_TARGET> 0 "register_operand" "=w")
(FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET> (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
[(match_operand:VDQF 1 "register_operand" "w")] [(match_operand:VHSDF 1 "register_operand" "w")]
FCVT)))] FCVT)))]
"TARGET_SIMD" "TARGET_SIMD"
"fcvt<frint_suffix><su>\\t%0.<Vtype>, %1.<Vtype>" "fcvt<frint_suffix><su>\\t%0.<Vtype>, %1.<Vtype>"
[(set_attr "type" "neon_fp_to_int_<Vetype><q>")] [(set_attr "type" "neon_fp_to_int_<stype><q>")]
) )
(define_insn "*aarch64_fcvt<su_optab><VDQF:mode><fcvt_target>2_mult" (define_insn "*aarch64_fcvt<su_optab><VDQF:mode><fcvt_target>2_mult"
...@@ -1775,36 +1775,36 @@ ...@@ -1775,36 +1775,36 @@
[(set_attr "type" "neon_fp_to_int_<Vetype><q>")] [(set_attr "type" "neon_fp_to_int_<Vetype><q>")]
) )
(define_expand "<optab><VDQF:mode><fcvt_target>2" (define_expand "<optab><VHSDF:mode><fcvt_target>2"
[(set (match_operand:<FCVT_TARGET> 0 "register_operand") [(set (match_operand:<FCVT_TARGET> 0 "register_operand")
(FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET> (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
[(match_operand:VDQF 1 "register_operand")] [(match_operand:VHSDF 1 "register_operand")]
UNSPEC_FRINTZ)))] UNSPEC_FRINTZ)))]
"TARGET_SIMD" "TARGET_SIMD"
{}) {})
(define_expand "<fix_trunc_optab><VDQF:mode><fcvt_target>2" (define_expand "<fix_trunc_optab><VHSDF:mode><fcvt_target>2"
[(set (match_operand:<FCVT_TARGET> 0 "register_operand") [(set (match_operand:<FCVT_TARGET> 0 "register_operand")
(FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET> (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
[(match_operand:VDQF 1 "register_operand")] [(match_operand:VHSDF 1 "register_operand")]
UNSPEC_FRINTZ)))] UNSPEC_FRINTZ)))]
"TARGET_SIMD" "TARGET_SIMD"
{}) {})
(define_expand "ftrunc<VDQF:mode>2" (define_expand "ftrunc<VHSDF:mode>2"
[(set (match_operand:VDQF 0 "register_operand") [(set (match_operand:VHSDF 0 "register_operand")
(unspec:VDQF [(match_operand:VDQF 1 "register_operand")] (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand")]
UNSPEC_FRINTZ))] UNSPEC_FRINTZ))]
"TARGET_SIMD" "TARGET_SIMD"
{}) {})
(define_insn "<optab><fcvt_target><VDQF:mode>2" (define_insn "<optab><fcvt_target><VHSDF:mode>2"
[(set (match_operand:VDQF 0 "register_operand" "=w") [(set (match_operand:VHSDF 0 "register_operand" "=w")
(FLOATUORS:VDQF (FLOATUORS:VHSDF
(match_operand:<FCVT_TARGET> 1 "register_operand" "w")))] (match_operand:<FCVT_TARGET> 1 "register_operand" "w")))]
"TARGET_SIMD" "TARGET_SIMD"
"<su_optab>cvtf\\t%0.<Vtype>, %1.<Vtype>" "<su_optab>cvtf\\t%0.<Vtype>, %1.<Vtype>"
[(set_attr "type" "neon_int_to_fp_<Vetype><q>")] [(set_attr "type" "neon_int_to_fp_<stype><q>")]
) )
;; Conversions between vectors of floats and doubles. ;; Conversions between vectors of floats and doubles.
...@@ -4296,14 +4296,14 @@ ...@@ -4296,14 +4296,14 @@
[(set (match_operand:<V_cmp_result> 0 "register_operand" "=w,w") [(set (match_operand:<V_cmp_result> 0 "register_operand" "=w,w")
(neg:<V_cmp_result> (neg:<V_cmp_result>
(COMPARISONS:<V_cmp_result> (COMPARISONS:<V_cmp_result>
(match_operand:VALLF 1 "register_operand" "w,w") (match_operand:VHSDF_SDF 1 "register_operand" "w,w")
(match_operand:VALLF 2 "aarch64_simd_reg_or_zero" "w,YDz") (match_operand:VHSDF_SDF 2 "aarch64_simd_reg_or_zero" "w,YDz")
)))] )))]
"TARGET_SIMD" "TARGET_SIMD"
"@ "@
fcm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype> fcm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>
fcm<optab>\t%<v>0<Vmtype>, %<v>1<Vmtype>, 0" fcm<optab>\t%<v>0<Vmtype>, %<v>1<Vmtype>, 0"
[(set_attr "type" "neon_fp_compare_<Vetype><q>")] [(set_attr "type" "neon_fp_compare_<stype><q>")]
) )
;; fac(ge|gt) ;; fac(ge|gt)
...@@ -4348,8 +4348,8 @@ ...@@ -4348,8 +4348,8 @@
;; sqrt ;; sqrt
(define_expand "sqrt<mode>2" (define_expand "sqrt<mode>2"
[(set (match_operand:VDQF 0 "register_operand") [(set (match_operand:VHSDF 0 "register_operand" "=w")
(sqrt:VDQF (match_operand:VDQF 1 "register_operand")))] (sqrt:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
"TARGET_SIMD" "TARGET_SIMD"
{ {
if (aarch64_emit_approx_sqrt (operands[0], operands[1], false)) if (aarch64_emit_approx_sqrt (operands[0], operands[1], false))
...@@ -4357,11 +4357,11 @@ ...@@ -4357,11 +4357,11 @@
}) })
(define_insn "*sqrt<mode>2" (define_insn "*sqrt<mode>2"
[(set (match_operand:VDQF 0 "register_operand" "=w") [(set (match_operand:VHSDF 0 "register_operand" "=w")
(sqrt:VDQF (match_operand:VDQF 1 "register_operand" "w")))] (sqrt:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
"TARGET_SIMD" "TARGET_SIMD"
"fsqrt\\t%0.<Vtype>, %1.<Vtype>" "fsqrt\\t%0.<Vtype>, %1.<Vtype>"
[(set_attr "type" "neon_fp_sqrt_<Vetype><q>")] [(set_attr "type" "neon_fp_sqrt_<stype><q>")]
) )
;; Patterns for vector struct loads and stores. ;; Patterns for vector struct loads and stores.
...@@ -5413,12 +5413,12 @@ ...@@ -5413,12 +5413,12 @@
) )
(define_insn "aarch64_frecpe<mode>" (define_insn "aarch64_frecpe<mode>"
[(set (match_operand:VDQF 0 "register_operand" "=w") [(set (match_operand:VHSDF 0 "register_operand" "=w")
(unspec:VDQF [(match_operand:VDQF 1 "register_operand" "w")] (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")]
UNSPEC_FRECPE))] UNSPEC_FRECPE))]
"TARGET_SIMD" "TARGET_SIMD"
"frecpe\\t%0.<Vtype>, %1.<Vtype>" "frecpe\\t%0.<Vtype>, %1.<Vtype>"
[(set_attr "type" "neon_fp_recpe_<Vetype><q>")] [(set_attr "type" "neon_fp_recpe_<stype><q>")]
) )
(define_insn "aarch64_frecp<FRECP:frecp_suffix><mode>" (define_insn "aarch64_frecp<FRECP:frecp_suffix><mode>"
......
...@@ -7485,6 +7485,10 @@ bool ...@@ -7485,6 +7485,10 @@ bool
aarch64_emit_approx_sqrt (rtx dst, rtx src, bool recp) aarch64_emit_approx_sqrt (rtx dst, rtx src, bool recp)
{ {
machine_mode mode = GET_MODE (dst); machine_mode mode = GET_MODE (dst);
if (GET_MODE_INNER (mode) == HFmode)
return false;
machine_mode mmsk = mode_for_vector machine_mode mmsk = mode_for_vector
(int_mode_for_mode (GET_MODE_INNER (mode)), (int_mode_for_mode (GET_MODE_INNER (mode)),
GET_MODE_NUNITS (mode)); GET_MODE_NUNITS (mode));
......
...@@ -88,11 +88,20 @@ ...@@ -88,11 +88,20 @@
;; Vector Float modes suitable for moving, loading and storing. ;; Vector Float modes suitable for moving, loading and storing.
(define_mode_iterator VDQF_F16 [V4HF V8HF V2SF V4SF V2DF]) (define_mode_iterator VDQF_F16 [V4HF V8HF V2SF V4SF V2DF])
;; Vector Float modes, barring HF modes. ;; Vector Float modes.
(define_mode_iterator VDQF [V2SF V4SF V2DF]) (define_mode_iterator VDQF [V2SF V4SF V2DF])
(define_mode_iterator VHSDF [(V4HF "TARGET_SIMD_F16INST")
(V8HF "TARGET_SIMD_F16INST")
V2SF V4SF V2DF])
;; Vector Float modes, and DF. ;; Vector Float modes, and DF.
(define_mode_iterator VDQF_DF [V2SF V4SF V2DF DF]) (define_mode_iterator VDQF_DF [V2SF V4SF V2DF DF])
(define_mode_iterator VHSDF_DF [(V4HF "TARGET_SIMD_F16INST")
(V8HF "TARGET_SIMD_F16INST")
V2SF V4SF V2DF DF])
(define_mode_iterator VHSDF_SDF [(V4HF "TARGET_SIMD_F16INST")
(V8HF "TARGET_SIMD_F16INST")
V2SF V4SF V2DF SF DF])
;; Vector single Float modes. ;; Vector single Float modes.
(define_mode_iterator VDQSF [V2SF V4SF]) (define_mode_iterator VDQSF [V2SF V4SF])
...@@ -366,7 +375,8 @@ ...@@ -366,7 +375,8 @@
(V4HI "") (V8HI "") (V4HI "") (V8HI "")
(V2SI "") (V4SI "") (V2SI "") (V4SI "")
(V2DI "") (V2SF "") (V2DI "") (V2SF "")
(V4SF "") (V2DF "")]) (V4SF "") (V4HF "")
(V8HF "") (V2DF "")])
;; For scalar usage of vector/FP registers, narrowing ;; For scalar usage of vector/FP registers, narrowing
(define_mode_attr vn2 [(QI "") (HI "b") (SI "h") (DI "s") (define_mode_attr vn2 [(QI "") (HI "b") (SI "h") (DI "s")
...@@ -447,6 +457,16 @@ ...@@ -447,6 +457,16 @@
(QI "b") (HI "h") (QI "b") (HI "h")
(SI "s") (DI "d")]) (SI "s") (DI "d")])
;; Vetype is used everywhere in scheduling type and assembly output,
;; sometimes they are not the same, for example HF modes on some
;; instructions. stype is defined to represent scheduling type
;; more accurately.
(define_mode_attr stype [(V8QI "b") (V16QI "b") (V4HI "s") (V8HI "s")
(V2SI "s") (V4SI "s") (V2DI "d") (V4HF "s")
(V8HF "s") (V2SF "s") (V4SF "s") (V2DF "d")
(HF "s") (SF "s") (DF "d") (QI "b") (HI "s")
(SI "s") (DI "d")])
;; Mode-to-bitwise operation type mapping. ;; Mode-to-bitwise operation type mapping.
(define_mode_attr Vbtype [(V8QI "8b") (V16QI "16b") (define_mode_attr Vbtype [(V8QI "8b") (V16QI "16b")
(V4HI "8b") (V8HI "16b") (V4HI "8b") (V8HI "16b")
...@@ -656,10 +676,14 @@ ...@@ -656,10 +676,14 @@
(define_mode_attr fcvt_target [(V2DF "v2di") (V4SF "v4si") (V2SF "v2si") (define_mode_attr fcvt_target [(V2DF "v2di") (V4SF "v4si") (V2SF "v2si")
(V2DI "v2df") (V4SI "v4sf") (V2SI "v2sf") (V2DI "v2df") (V4SI "v4sf") (V2SI "v2sf")
(SF "si") (DF "di") (SI "sf") (DI "df")]) (SF "si") (DF "di") (SI "sf") (DI "df")
(V4HF "v4hi") (V8HF "v8hi") (V4HI "v4hf")
(V8HI "v8hf")])
(define_mode_attr FCVT_TARGET [(V2DF "V2DI") (V4SF "V4SI") (V2SF "V2SI") (define_mode_attr FCVT_TARGET [(V2DF "V2DI") (V4SF "V4SI") (V2SF "V2SI")
(V2DI "V2DF") (V4SI "V4SF") (V2SI "V2SF") (V2DI "V2DF") (V4SI "V4SF") (V2SI "V2SF")
(SF "SI") (DF "DI") (SI "SF") (DI "DF")]) (SF "SI") (DF "DI") (SI "SF") (DI "DF")
(V4HF "V4HI") (V8HF "V8HI") (V4HI "V4HF")
(V8HI "V8HF")])
;; for the inequal width integer to fp conversions ;; for the inequal width integer to fp conversions
...@@ -687,6 +711,7 @@ ...@@ -687,6 +711,7 @@
;; the 'x' constraint. All other modes may use the 'w' constraint. ;; the 'x' constraint. All other modes may use the 'w' constraint.
(define_mode_attr h_con [(V2SI "w") (V4SI "w") (define_mode_attr h_con [(V2SI "w") (V4SI "w")
(V4HI "x") (V8HI "x") (V4HI "x") (V8HI "x")
(V4HF "w") (V8HF "w")
(V2SF "w") (V4SF "w") (V2SF "w") (V4SF "w")
(V2DF "w") (DF "w")]) (V2DF "w") (DF "w")])
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment