Commit daef0a8c by Jiong Wang Committed by Jiong Wang

[AArch64][2/10] ARMv8.2-A FP16 one operand vector intrinsics

gcc/
	* config/aarch64/aarch64-builtins.c (TYPES_BINOP_USS): New.
	* config/aarch64/aarch64-simd-builtins.def: Register new builtins.
	* config/aarch64/aarch64-simd.md (aarch64_rsqrte<mode>): Extend to HF modes.
	(neg<mode>2): Likewise.
	(abs<mode>2): Likewise.
	(<frint_pattern><mode>2): Likewise.
	(l<fcvt_pattern><su_optab><VDQF:mode><fcvt_target>2): Likewise.
	(<optab><VDQF:mode><fcvt_target>2): Likewise.
	(<fix_trunc_optab><VDQF:mode><fcvt_target>2): Likewise.
	(ftrunc<VDQF:mode>2): Likewise.
	(<optab><fcvt_target><VDQF:mode>2): Likewise.
	(sqrt<mode>2): Likewise.
	(*sqrt<mode>2): Likewise.
	(aarch64_frecpe<mode>): Likewise.
	(aarch64_cm<optab><mode>): Likewise.
	* config/aarch64/aarch64.c (aarch64_emit_approx_sqrt): Return
	false for V4HF and V8HF.
	* config/aarch64/iterators.md (VHSDF, VHSDF_DF, VHSDF_SDF): New.
	(VDQF_COND, fcvt_target, FCVT_TARGET, hcon): Extend mode attribute to HF modes.
	(stype): New.
	* config/aarch64/arm_neon.h (vdup_n_f16): New.
	(vdupq_n_f16): Likewise.
	(vld1_dup_f16): Use vdup_n_f16.
	(vld1q_dup_f16): Use vdupq_n_f16.
	(vabs_f16): New.
	(vabsq_f16, vceqz_f16, vceqzq_f16, vcgez_f16, vcgezq_f16, vcgtz_f16,
	vcgtzq_f16, vclez_f16, vclezq_f16, vcltz_f16, vcltzq_f16, vcvt_f16_s16,
	vcvtq_f16_s16, vcvt_f16_u16, vcvtq_f16_u16, vcvt_s16_f16, vcvtq_s16_f16,
	vcvt_u16_f16, vcvtq_u16_f16, vcvta_s16_f16, vcvtaq_s16_f16,
	vcvta_u16_f16, vcvtaq_u16_f16, vcvtm_s16_f16, vcvtmq_s16_f16,
	vcvtm_u16_f16, vcvtmq_u16_f16, vcvtn_s16_f16, vcvtnq_s16_f16,
	vcvtn_u16_f16, vcvtnq_u16_f16, vcvtp_s16_f16, vcvtpq_s16_f16,
	vcvtp_u16_f16, vcvtpq_u16_f16, vneg_f16, vnegq_f16, vrecpe_f16,
	vrecpeq_f16, vrnd_f16, vrndq_f16, vrnda_f16, vrndaq_f16, vrndi_f16,
	vrndiq_f16, vrndm_f16, vrndmq_f16, vrndn_f16, vrndnq_f16, vrndp_f16,
	vrndpq_f16, vrndx_f16, vrndxq_f16, vrsqrte_f16, vrsqrteq_f16, vsqrt_f16,
	vsqrtq_f16): Likewise.

From-SVN: r238716
parent 358decd5
2016-07-25 Jiong Wang <jiong.wang@arm.com> 2016-07-25 Jiong Wang <jiong.wang@arm.com>
* config/aarch64/aarch64-builtins.c (TYPES_BINOP_USS): New.
* config/aarch64/aarch64-simd-builtins.def: Register new builtins.
* config/aarch64/aarch64-simd.md (aarch64_rsqrte<mode>): Extend to HF modes.
(neg<mode>2): Likewise.
(abs<mode>2): Likewise.
(<frint_pattern><mode>2): Likewise.
(l<fcvt_pattern><su_optab><VDQF:mode><fcvt_target>2): Likewise.
(<optab><VDQF:mode><fcvt_target>2): Likewise.
(<fix_trunc_optab><VDQF:mode><fcvt_target>2): Likewise.
(ftrunc<VDQF:mode>2): Likewise.
(<optab><fcvt_target><VDQF:mode>2): Likewise.
(sqrt<mode>2): Likewise.
(*sqrt<mode>2): Likewise.
(aarch64_frecpe<mode>): Likewise.
(aarch64_cm<optab><mode>): Likewise.
* config/aarch64/aarch64.c (aarch64_emit_approx_sqrt): Return
false for V4HF and V8HF.
* config/aarch64/iterators.md (VHSDF, VHSDF_DF, VHSDF_SDF): New.
(VDQF_COND, fcvt_target, FCVT_TARGET, hcon): Extend mode attribute to HF modes.
(stype): New.
* config/aarch64/arm_neon.h (vdup_n_f16): New.
(vdupq_n_f16): Likewise.
(vld1_dup_f16): Use vdup_n_f16.
(vld1q_dup_f16): Use vdupq_n_f16.
(vabs_f16): New.
(vabsq_f16, vceqz_f16, vceqzq_f16, vcgez_f16, vcgezq_f16, vcgtz_f16,
vcgtzq_f16, vclez_f16, vclezq_f16, vcltz_f16, vcltzq_f16, vcvt_f16_s16,
vcvtq_f16_s16, vcvt_f16_u16, vcvtq_f16_u16, vcvt_s16_f16, vcvtq_s16_f16,
vcvt_u16_f16, vcvtq_u16_f16, vcvta_s16_f16, vcvtaq_s16_f16,
vcvta_u16_f16, vcvtaq_u16_f16, vcvtm_s16_f16, vcvtmq_s16_f16,
vcvtm_u16_f16, vcvtmq_u16_f16, vcvtn_s16_f16, vcvtnq_s16_f16,
vcvtn_u16_f16, vcvtnq_u16_f16, vcvtp_s16_f16, vcvtpq_s16_f16,
vcvtp_u16_f16, vcvtpq_u16_f16, vneg_f16, vnegq_f16, vrecpe_f16,
vrecpeq_f16, vrnd_f16, vrndq_f16, vrnda_f16, vrndaq_f16, vrndi_f16,
vrndiq_f16, vrndm_f16, vrndmq_f16, vrndn_f16, vrndnq_f16, vrndp_f16,
vrndpq_f16, vrndx_f16, vrndxq_f16, vrsqrte_f16, vrsqrteq_f16, vsqrt_f16,
vsqrtq_f16): Likewise.
2016-07-25 Jiong Wang <jiong.wang@arm.com>
* config/aarch64/aarch64-simd.md * config/aarch64/aarch64-simd.md
(aarch64_<PERMUTE:perm_insn><PERMUTE:perm_hilo><mode>): Use VALL_F16. (aarch64_<PERMUTE:perm_insn><PERMUTE:perm_hilo><mode>): Use VALL_F16.
(aarch64_ext<mode>): Likewise. (aarch64_ext<mode>): Likewise.
......
...@@ -139,6 +139,10 @@ aarch64_types_binop_ssu_qualifiers[SIMD_MAX_BUILTIN_ARGS] ...@@ -139,6 +139,10 @@ aarch64_types_binop_ssu_qualifiers[SIMD_MAX_BUILTIN_ARGS]
= { qualifier_none, qualifier_none, qualifier_unsigned }; = { qualifier_none, qualifier_none, qualifier_unsigned };
#define TYPES_BINOP_SSU (aarch64_types_binop_ssu_qualifiers) #define TYPES_BINOP_SSU (aarch64_types_binop_ssu_qualifiers)
static enum aarch64_type_qualifiers static enum aarch64_type_qualifiers
aarch64_types_binop_uss_qualifiers[SIMD_MAX_BUILTIN_ARGS]
= { qualifier_unsigned, qualifier_none, qualifier_none };
#define TYPES_BINOP_USS (aarch64_types_binop_uss_qualifiers)
static enum aarch64_type_qualifiers
aarch64_types_binopp_qualifiers[SIMD_MAX_BUILTIN_ARGS] aarch64_types_binopp_qualifiers[SIMD_MAX_BUILTIN_ARGS]
= { qualifier_poly, qualifier_poly, qualifier_poly }; = { qualifier_poly, qualifier_poly, qualifier_poly };
#define TYPES_BINOPP (aarch64_types_binopp_qualifiers) #define TYPES_BINOPP (aarch64_types_binopp_qualifiers)
......
...@@ -42,7 +42,7 @@ ...@@ -42,7 +42,7 @@
BUILTIN_VDC (COMBINE, combine, 0) BUILTIN_VDC (COMBINE, combine, 0)
BUILTIN_VB (BINOP, pmul, 0) BUILTIN_VB (BINOP, pmul, 0)
BUILTIN_VALLF (BINOP, fmulx, 0) BUILTIN_VALLF (BINOP, fmulx, 0)
BUILTIN_VDQF_DF (UNOP, sqrt, 2) BUILTIN_VHSDF_DF (UNOP, sqrt, 2)
BUILTIN_VD_BHSI (BINOP, addp, 0) BUILTIN_VD_BHSI (BINOP, addp, 0)
VAR1 (UNOP, addp, 0, di) VAR1 (UNOP, addp, 0, di)
BUILTIN_VDQ_BHSI (UNOP, clrsb, 2) BUILTIN_VDQ_BHSI (UNOP, clrsb, 2)
...@@ -266,23 +266,29 @@ ...@@ -266,23 +266,29 @@
BUILTIN_VDQF (BINOP, smin_nanp, 0) BUILTIN_VDQF (BINOP, smin_nanp, 0)
/* Implemented by <frint_pattern><mode>2. */ /* Implemented by <frint_pattern><mode>2. */
BUILTIN_VDQF (UNOP, btrunc, 2) BUILTIN_VHSDF (UNOP, btrunc, 2)
BUILTIN_VDQF (UNOP, ceil, 2) BUILTIN_VHSDF (UNOP, ceil, 2)
BUILTIN_VDQF (UNOP, floor, 2) BUILTIN_VHSDF (UNOP, floor, 2)
BUILTIN_VDQF (UNOP, nearbyint, 2) BUILTIN_VHSDF (UNOP, nearbyint, 2)
BUILTIN_VDQF (UNOP, rint, 2) BUILTIN_VHSDF (UNOP, rint, 2)
BUILTIN_VDQF (UNOP, round, 2) BUILTIN_VHSDF (UNOP, round, 2)
BUILTIN_VDQF_DF (UNOP, frintn, 2) BUILTIN_VHSDF_DF (UNOP, frintn, 2)
/* Implemented by l<fcvt_pattern><su_optab><VQDF:mode><vcvt_target>2. */ /* Implemented by l<fcvt_pattern><su_optab><VQDF:mode><vcvt_target>2. */
VAR1 (UNOP, lbtruncv4hf, 2, v4hi)
VAR1 (UNOP, lbtruncv8hf, 2, v8hi)
VAR1 (UNOP, lbtruncv2sf, 2, v2si) VAR1 (UNOP, lbtruncv2sf, 2, v2si)
VAR1 (UNOP, lbtruncv4sf, 2, v4si) VAR1 (UNOP, lbtruncv4sf, 2, v4si)
VAR1 (UNOP, lbtruncv2df, 2, v2di) VAR1 (UNOP, lbtruncv2df, 2, v2di)
VAR1 (UNOPUS, lbtruncuv4hf, 2, v4hi)
VAR1 (UNOPUS, lbtruncuv8hf, 2, v8hi)
VAR1 (UNOPUS, lbtruncuv2sf, 2, v2si) VAR1 (UNOPUS, lbtruncuv2sf, 2, v2si)
VAR1 (UNOPUS, lbtruncuv4sf, 2, v4si) VAR1 (UNOPUS, lbtruncuv4sf, 2, v4si)
VAR1 (UNOPUS, lbtruncuv2df, 2, v2di) VAR1 (UNOPUS, lbtruncuv2df, 2, v2di)
VAR1 (UNOP, lroundv4hf, 2, v4hi)
VAR1 (UNOP, lroundv8hf, 2, v8hi)
VAR1 (UNOP, lroundv2sf, 2, v2si) VAR1 (UNOP, lroundv2sf, 2, v2si)
VAR1 (UNOP, lroundv4sf, 2, v4si) VAR1 (UNOP, lroundv4sf, 2, v4si)
VAR1 (UNOP, lroundv2df, 2, v2di) VAR1 (UNOP, lroundv2df, 2, v2di)
...@@ -290,38 +296,52 @@ ...@@ -290,38 +296,52 @@
VAR1 (UNOP, lroundsf, 2, si) VAR1 (UNOP, lroundsf, 2, si)
VAR1 (UNOP, lrounddf, 2, di) VAR1 (UNOP, lrounddf, 2, di)
VAR1 (UNOPUS, lrounduv4hf, 2, v4hi)
VAR1 (UNOPUS, lrounduv8hf, 2, v8hi)
VAR1 (UNOPUS, lrounduv2sf, 2, v2si) VAR1 (UNOPUS, lrounduv2sf, 2, v2si)
VAR1 (UNOPUS, lrounduv4sf, 2, v4si) VAR1 (UNOPUS, lrounduv4sf, 2, v4si)
VAR1 (UNOPUS, lrounduv2df, 2, v2di) VAR1 (UNOPUS, lrounduv2df, 2, v2di)
VAR1 (UNOPUS, lroundusf, 2, si) VAR1 (UNOPUS, lroundusf, 2, si)
VAR1 (UNOPUS, lroundudf, 2, di) VAR1 (UNOPUS, lroundudf, 2, di)
VAR1 (UNOP, lceilv4hf, 2, v4hi)
VAR1 (UNOP, lceilv8hf, 2, v8hi)
VAR1 (UNOP, lceilv2sf, 2, v2si) VAR1 (UNOP, lceilv2sf, 2, v2si)
VAR1 (UNOP, lceilv4sf, 2, v4si) VAR1 (UNOP, lceilv4sf, 2, v4si)
VAR1 (UNOP, lceilv2df, 2, v2di) VAR1 (UNOP, lceilv2df, 2, v2di)
VAR1 (UNOPUS, lceiluv4hf, 2, v4hi)
VAR1 (UNOPUS, lceiluv8hf, 2, v8hi)
VAR1 (UNOPUS, lceiluv2sf, 2, v2si) VAR1 (UNOPUS, lceiluv2sf, 2, v2si)
VAR1 (UNOPUS, lceiluv4sf, 2, v4si) VAR1 (UNOPUS, lceiluv4sf, 2, v4si)
VAR1 (UNOPUS, lceiluv2df, 2, v2di) VAR1 (UNOPUS, lceiluv2df, 2, v2di)
VAR1 (UNOPUS, lceilusf, 2, si) VAR1 (UNOPUS, lceilusf, 2, si)
VAR1 (UNOPUS, lceiludf, 2, di) VAR1 (UNOPUS, lceiludf, 2, di)
VAR1 (UNOP, lfloorv4hf, 2, v4hi)
VAR1 (UNOP, lfloorv8hf, 2, v8hi)
VAR1 (UNOP, lfloorv2sf, 2, v2si) VAR1 (UNOP, lfloorv2sf, 2, v2si)
VAR1 (UNOP, lfloorv4sf, 2, v4si) VAR1 (UNOP, lfloorv4sf, 2, v4si)
VAR1 (UNOP, lfloorv2df, 2, v2di) VAR1 (UNOP, lfloorv2df, 2, v2di)
VAR1 (UNOPUS, lflooruv4hf, 2, v4hi)
VAR1 (UNOPUS, lflooruv8hf, 2, v8hi)
VAR1 (UNOPUS, lflooruv2sf, 2, v2si) VAR1 (UNOPUS, lflooruv2sf, 2, v2si)
VAR1 (UNOPUS, lflooruv4sf, 2, v4si) VAR1 (UNOPUS, lflooruv4sf, 2, v4si)
VAR1 (UNOPUS, lflooruv2df, 2, v2di) VAR1 (UNOPUS, lflooruv2df, 2, v2di)
VAR1 (UNOPUS, lfloorusf, 2, si) VAR1 (UNOPUS, lfloorusf, 2, si)
VAR1 (UNOPUS, lfloorudf, 2, di) VAR1 (UNOPUS, lfloorudf, 2, di)
VAR1 (UNOP, lfrintnv4hf, 2, v4hi)
VAR1 (UNOP, lfrintnv8hf, 2, v8hi)
VAR1 (UNOP, lfrintnv2sf, 2, v2si) VAR1 (UNOP, lfrintnv2sf, 2, v2si)
VAR1 (UNOP, lfrintnv4sf, 2, v4si) VAR1 (UNOP, lfrintnv4sf, 2, v4si)
VAR1 (UNOP, lfrintnv2df, 2, v2di) VAR1 (UNOP, lfrintnv2df, 2, v2di)
VAR1 (UNOP, lfrintnsf, 2, si) VAR1 (UNOP, lfrintnsf, 2, si)
VAR1 (UNOP, lfrintndf, 2, di) VAR1 (UNOP, lfrintndf, 2, di)
VAR1 (UNOPUS, lfrintnuv4hf, 2, v4hi)
VAR1 (UNOPUS, lfrintnuv8hf, 2, v8hi)
VAR1 (UNOPUS, lfrintnuv2sf, 2, v2si) VAR1 (UNOPUS, lfrintnuv2sf, 2, v2si)
VAR1 (UNOPUS, lfrintnuv4sf, 2, v4si) VAR1 (UNOPUS, lfrintnuv4sf, 2, v4si)
VAR1 (UNOPUS, lfrintnuv2df, 2, v2di) VAR1 (UNOPUS, lfrintnuv2df, 2, v2di)
...@@ -329,10 +349,14 @@ ...@@ -329,10 +349,14 @@
VAR1 (UNOPUS, lfrintnudf, 2, di) VAR1 (UNOPUS, lfrintnudf, 2, di)
/* Implemented by <optab><fcvt_target><VDQF:mode>2. */ /* Implemented by <optab><fcvt_target><VDQF:mode>2. */
VAR1 (UNOP, floatv4hi, 2, v4hf)
VAR1 (UNOP, floatv8hi, 2, v8hf)
VAR1 (UNOP, floatv2si, 2, v2sf) VAR1 (UNOP, floatv2si, 2, v2sf)
VAR1 (UNOP, floatv4si, 2, v4sf) VAR1 (UNOP, floatv4si, 2, v4sf)
VAR1 (UNOP, floatv2di, 2, v2df) VAR1 (UNOP, floatv2di, 2, v2df)
VAR1 (UNOP, floatunsv4hi, 2, v4hf)
VAR1 (UNOP, floatunsv8hi, 2, v8hf)
VAR1 (UNOP, floatunsv2si, 2, v2sf) VAR1 (UNOP, floatunsv2si, 2, v2sf)
VAR1 (UNOP, floatunsv4si, 2, v4sf) VAR1 (UNOP, floatunsv4si, 2, v4sf)
VAR1 (UNOP, floatunsv2di, 2, v2df) VAR1 (UNOP, floatunsv2di, 2, v2df)
...@@ -358,13 +382,13 @@ ...@@ -358,13 +382,13 @@
BUILTIN_VDQ_SI (UNOP, urecpe, 0) BUILTIN_VDQ_SI (UNOP, urecpe, 0)
BUILTIN_VDQF (UNOP, frecpe, 0) BUILTIN_VHSDF (UNOP, frecpe, 0)
BUILTIN_VDQF (BINOP, frecps, 0) BUILTIN_VDQF (BINOP, frecps, 0)
/* Implemented by a mixture of abs2 patterns. Note the DImode builtin is /* Implemented by a mixture of abs2 patterns. Note the DImode builtin is
only ever used for the int64x1_t intrinsic, there is no scalar version. */ only ever used for the int64x1_t intrinsic, there is no scalar version. */
BUILTIN_VSDQ_I_DI (UNOP, abs, 0) BUILTIN_VSDQ_I_DI (UNOP, abs, 0)
BUILTIN_VDQF (UNOP, abs, 2) BUILTIN_VHSDF (UNOP, abs, 2)
BUILTIN_VQ_HSF (UNOP, vec_unpacks_hi_, 10) BUILTIN_VQ_HSF (UNOP, vec_unpacks_hi_, 10)
VAR1 (BINOP, float_truncate_hi_, 0, v4sf) VAR1 (BINOP, float_truncate_hi_, 0, v4sf)
...@@ -457,7 +481,7 @@ ...@@ -457,7 +481,7 @@
BUILTIN_VALLF (SHIFTIMM_USS, fcvtzu, 3) BUILTIN_VALLF (SHIFTIMM_USS, fcvtzu, 3)
/* Implemented by aarch64_rsqrte<mode>. */ /* Implemented by aarch64_rsqrte<mode>. */
BUILTIN_VALLF (UNOP, rsqrte, 0) BUILTIN_VHSDF_SDF (UNOP, rsqrte, 0)
/* Implemented by aarch64_rsqrts<mode>. */ /* Implemented by aarch64_rsqrts<mode>. */
BUILTIN_VALLF (BINOP, rsqrts, 0) BUILTIN_VALLF (BINOP, rsqrts, 0)
...@@ -467,3 +491,13 @@ ...@@ -467,3 +491,13 @@
/* Implemented by aarch64_faddp<mode>. */ /* Implemented by aarch64_faddp<mode>. */
BUILTIN_VDQF (BINOP, faddp, 0) BUILTIN_VDQF (BINOP, faddp, 0)
/* Implemented by aarch64_cm<optab><mode>. */
BUILTIN_VHSDF_SDF (BINOP_USS, cmeq, 0)
BUILTIN_VHSDF_SDF (BINOP_USS, cmge, 0)
BUILTIN_VHSDF_SDF (BINOP_USS, cmgt, 0)
BUILTIN_VHSDF_SDF (BINOP_USS, cmle, 0)
BUILTIN_VHSDF_SDF (BINOP_USS, cmlt, 0)
/* Implemented by neg<mode>2. */
BUILTIN_VHSDF (UNOP, neg, 2)
...@@ -383,12 +383,12 @@ ...@@ -383,12 +383,12 @@
) )
(define_insn "aarch64_rsqrte<mode>" (define_insn "aarch64_rsqrte<mode>"
[(set (match_operand:VALLF 0 "register_operand" "=w") [(set (match_operand:VHSDF_SDF 0 "register_operand" "=w")
(unspec:VALLF [(match_operand:VALLF 1 "register_operand" "w")] (unspec:VHSDF_SDF [(match_operand:VHSDF_SDF 1 "register_operand" "w")]
UNSPEC_RSQRTE))] UNSPEC_RSQRTE))]
"TARGET_SIMD" "TARGET_SIMD"
"frsqrte\\t%<v>0<Vmtype>, %<v>1<Vmtype>" "frsqrte\\t%<v>0<Vmtype>, %<v>1<Vmtype>"
[(set_attr "type" "neon_fp_rsqrte_<Vetype><q>")]) [(set_attr "type" "neon_fp_rsqrte_<stype><q>")])
(define_insn "aarch64_rsqrts<mode>" (define_insn "aarch64_rsqrts<mode>"
[(set (match_operand:VALLF 0 "register_operand" "=w") [(set (match_operand:VALLF 0 "register_operand" "=w")
...@@ -1565,19 +1565,19 @@ ...@@ -1565,19 +1565,19 @@
) )
(define_insn "neg<mode>2" (define_insn "neg<mode>2"
[(set (match_operand:VDQF 0 "register_operand" "=w") [(set (match_operand:VHSDF 0 "register_operand" "=w")
(neg:VDQF (match_operand:VDQF 1 "register_operand" "w")))] (neg:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
"TARGET_SIMD" "TARGET_SIMD"
"fneg\\t%0.<Vtype>, %1.<Vtype>" "fneg\\t%0.<Vtype>, %1.<Vtype>"
[(set_attr "type" "neon_fp_neg_<Vetype><q>")] [(set_attr "type" "neon_fp_neg_<stype><q>")]
) )
(define_insn "abs<mode>2" (define_insn "abs<mode>2"
[(set (match_operand:VDQF 0 "register_operand" "=w") [(set (match_operand:VHSDF 0 "register_operand" "=w")
(abs:VDQF (match_operand:VDQF 1 "register_operand" "w")))] (abs:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
"TARGET_SIMD" "TARGET_SIMD"
"fabs\\t%0.<Vtype>, %1.<Vtype>" "fabs\\t%0.<Vtype>, %1.<Vtype>"
[(set_attr "type" "neon_fp_abs_<Vetype><q>")] [(set_attr "type" "neon_fp_abs_<stype><q>")]
) )
(define_insn "fma<mode>4" (define_insn "fma<mode>4"
...@@ -1735,24 +1735,24 @@ ...@@ -1735,24 +1735,24 @@
;; Vector versions of the floating-point frint patterns. ;; Vector versions of the floating-point frint patterns.
;; Expands to btrunc, ceil, floor, nearbyint, rint, round, frintn. ;; Expands to btrunc, ceil, floor, nearbyint, rint, round, frintn.
(define_insn "<frint_pattern><mode>2" (define_insn "<frint_pattern><mode>2"
[(set (match_operand:VDQF 0 "register_operand" "=w") [(set (match_operand:VHSDF 0 "register_operand" "=w")
(unspec:VDQF [(match_operand:VDQF 1 "register_operand" "w")] (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")]
FRINT))] FRINT))]
"TARGET_SIMD" "TARGET_SIMD"
"frint<frint_suffix>\\t%0.<Vtype>, %1.<Vtype>" "frint<frint_suffix>\\t%0.<Vtype>, %1.<Vtype>"
[(set_attr "type" "neon_fp_round_<Vetype><q>")] [(set_attr "type" "neon_fp_round_<stype><q>")]
) )
;; Vector versions of the fcvt standard patterns. ;; Vector versions of the fcvt standard patterns.
;; Expands to lbtrunc, lround, lceil, lfloor ;; Expands to lbtrunc, lround, lceil, lfloor
(define_insn "l<fcvt_pattern><su_optab><VDQF:mode><fcvt_target>2" (define_insn "l<fcvt_pattern><su_optab><VHSDF:mode><fcvt_target>2"
[(set (match_operand:<FCVT_TARGET> 0 "register_operand" "=w") [(set (match_operand:<FCVT_TARGET> 0 "register_operand" "=w")
(FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET> (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
[(match_operand:VDQF 1 "register_operand" "w")] [(match_operand:VHSDF 1 "register_operand" "w")]
FCVT)))] FCVT)))]
"TARGET_SIMD" "TARGET_SIMD"
"fcvt<frint_suffix><su>\\t%0.<Vtype>, %1.<Vtype>" "fcvt<frint_suffix><su>\\t%0.<Vtype>, %1.<Vtype>"
[(set_attr "type" "neon_fp_to_int_<Vetype><q>")] [(set_attr "type" "neon_fp_to_int_<stype><q>")]
) )
(define_insn "*aarch64_fcvt<su_optab><VDQF:mode><fcvt_target>2_mult" (define_insn "*aarch64_fcvt<su_optab><VDQF:mode><fcvt_target>2_mult"
...@@ -1775,36 +1775,36 @@ ...@@ -1775,36 +1775,36 @@
[(set_attr "type" "neon_fp_to_int_<Vetype><q>")] [(set_attr "type" "neon_fp_to_int_<Vetype><q>")]
) )
(define_expand "<optab><VDQF:mode><fcvt_target>2" (define_expand "<optab><VHSDF:mode><fcvt_target>2"
[(set (match_operand:<FCVT_TARGET> 0 "register_operand") [(set (match_operand:<FCVT_TARGET> 0 "register_operand")
(FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET> (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
[(match_operand:VDQF 1 "register_operand")] [(match_operand:VHSDF 1 "register_operand")]
UNSPEC_FRINTZ)))] UNSPEC_FRINTZ)))]
"TARGET_SIMD" "TARGET_SIMD"
{}) {})
(define_expand "<fix_trunc_optab><VDQF:mode><fcvt_target>2" (define_expand "<fix_trunc_optab><VHSDF:mode><fcvt_target>2"
[(set (match_operand:<FCVT_TARGET> 0 "register_operand") [(set (match_operand:<FCVT_TARGET> 0 "register_operand")
(FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET> (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
[(match_operand:VDQF 1 "register_operand")] [(match_operand:VHSDF 1 "register_operand")]
UNSPEC_FRINTZ)))] UNSPEC_FRINTZ)))]
"TARGET_SIMD" "TARGET_SIMD"
{}) {})
(define_expand "ftrunc<VDQF:mode>2" (define_expand "ftrunc<VHSDF:mode>2"
[(set (match_operand:VDQF 0 "register_operand") [(set (match_operand:VHSDF 0 "register_operand")
(unspec:VDQF [(match_operand:VDQF 1 "register_operand")] (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand")]
UNSPEC_FRINTZ))] UNSPEC_FRINTZ))]
"TARGET_SIMD" "TARGET_SIMD"
{}) {})
(define_insn "<optab><fcvt_target><VDQF:mode>2" (define_insn "<optab><fcvt_target><VHSDF:mode>2"
[(set (match_operand:VDQF 0 "register_operand" "=w") [(set (match_operand:VHSDF 0 "register_operand" "=w")
(FLOATUORS:VDQF (FLOATUORS:VHSDF
(match_operand:<FCVT_TARGET> 1 "register_operand" "w")))] (match_operand:<FCVT_TARGET> 1 "register_operand" "w")))]
"TARGET_SIMD" "TARGET_SIMD"
"<su_optab>cvtf\\t%0.<Vtype>, %1.<Vtype>" "<su_optab>cvtf\\t%0.<Vtype>, %1.<Vtype>"
[(set_attr "type" "neon_int_to_fp_<Vetype><q>")] [(set_attr "type" "neon_int_to_fp_<stype><q>")]
) )
;; Conversions between vectors of floats and doubles. ;; Conversions between vectors of floats and doubles.
...@@ -4296,14 +4296,14 @@ ...@@ -4296,14 +4296,14 @@
[(set (match_operand:<V_cmp_result> 0 "register_operand" "=w,w") [(set (match_operand:<V_cmp_result> 0 "register_operand" "=w,w")
(neg:<V_cmp_result> (neg:<V_cmp_result>
(COMPARISONS:<V_cmp_result> (COMPARISONS:<V_cmp_result>
(match_operand:VALLF 1 "register_operand" "w,w") (match_operand:VHSDF_SDF 1 "register_operand" "w,w")
(match_operand:VALLF 2 "aarch64_simd_reg_or_zero" "w,YDz") (match_operand:VHSDF_SDF 2 "aarch64_simd_reg_or_zero" "w,YDz")
)))] )))]
"TARGET_SIMD" "TARGET_SIMD"
"@ "@
fcm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype> fcm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>
fcm<optab>\t%<v>0<Vmtype>, %<v>1<Vmtype>, 0" fcm<optab>\t%<v>0<Vmtype>, %<v>1<Vmtype>, 0"
[(set_attr "type" "neon_fp_compare_<Vetype><q>")] [(set_attr "type" "neon_fp_compare_<stype><q>")]
) )
;; fac(ge|gt) ;; fac(ge|gt)
...@@ -4348,8 +4348,8 @@ ...@@ -4348,8 +4348,8 @@
;; sqrt ;; sqrt
(define_expand "sqrt<mode>2" (define_expand "sqrt<mode>2"
[(set (match_operand:VDQF 0 "register_operand") [(set (match_operand:VHSDF 0 "register_operand" "=w")
(sqrt:VDQF (match_operand:VDQF 1 "register_operand")))] (sqrt:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
"TARGET_SIMD" "TARGET_SIMD"
{ {
if (aarch64_emit_approx_sqrt (operands[0], operands[1], false)) if (aarch64_emit_approx_sqrt (operands[0], operands[1], false))
...@@ -4357,11 +4357,11 @@ ...@@ -4357,11 +4357,11 @@
}) })
(define_insn "*sqrt<mode>2" (define_insn "*sqrt<mode>2"
[(set (match_operand:VDQF 0 "register_operand" "=w") [(set (match_operand:VHSDF 0 "register_operand" "=w")
(sqrt:VDQF (match_operand:VDQF 1 "register_operand" "w")))] (sqrt:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
"TARGET_SIMD" "TARGET_SIMD"
"fsqrt\\t%0.<Vtype>, %1.<Vtype>" "fsqrt\\t%0.<Vtype>, %1.<Vtype>"
[(set_attr "type" "neon_fp_sqrt_<Vetype><q>")] [(set_attr "type" "neon_fp_sqrt_<stype><q>")]
) )
;; Patterns for vector struct loads and stores. ;; Patterns for vector struct loads and stores.
...@@ -5413,12 +5413,12 @@ ...@@ -5413,12 +5413,12 @@
) )
(define_insn "aarch64_frecpe<mode>" (define_insn "aarch64_frecpe<mode>"
[(set (match_operand:VDQF 0 "register_operand" "=w") [(set (match_operand:VHSDF 0 "register_operand" "=w")
(unspec:VDQF [(match_operand:VDQF 1 "register_operand" "w")] (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")]
UNSPEC_FRECPE))] UNSPEC_FRECPE))]
"TARGET_SIMD" "TARGET_SIMD"
"frecpe\\t%0.<Vtype>, %1.<Vtype>" "frecpe\\t%0.<Vtype>, %1.<Vtype>"
[(set_attr "type" "neon_fp_recpe_<Vetype><q>")] [(set_attr "type" "neon_fp_recpe_<stype><q>")]
) )
(define_insn "aarch64_frecp<FRECP:frecp_suffix><mode>" (define_insn "aarch64_frecp<FRECP:frecp_suffix><mode>"
......
...@@ -7485,6 +7485,10 @@ bool ...@@ -7485,6 +7485,10 @@ bool
aarch64_emit_approx_sqrt (rtx dst, rtx src, bool recp) aarch64_emit_approx_sqrt (rtx dst, rtx src, bool recp)
{ {
machine_mode mode = GET_MODE (dst); machine_mode mode = GET_MODE (dst);
if (GET_MODE_INNER (mode) == HFmode)
return false;
machine_mode mmsk = mode_for_vector machine_mode mmsk = mode_for_vector
(int_mode_for_mode (GET_MODE_INNER (mode)), (int_mode_for_mode (GET_MODE_INNER (mode)),
GET_MODE_NUNITS (mode)); GET_MODE_NUNITS (mode));
......
...@@ -26028,6 +26028,365 @@ __INTERLEAVE_LIST (zip) ...@@ -26028,6 +26028,365 @@ __INTERLEAVE_LIST (zip)
/* End of optimal implementations in approved order. */ /* End of optimal implementations in approved order. */
#pragma GCC pop_options
/* ARMv8.2-A FP16 intrinsics. */
#pragma GCC push_options
#pragma GCC target ("arch=armv8.2-a+fp16")
/* ARMv8.2-A FP16 one operand vector intrinsics. */
__extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
vabs_f16 (float16x4_t __a)
{
return __builtin_aarch64_absv4hf (__a);
}
__extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
vabsq_f16 (float16x8_t __a)
{
return __builtin_aarch64_absv8hf (__a);
}
__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
vceqz_f16 (float16x4_t __a)
{
return __builtin_aarch64_cmeqv4hf_uss (__a, vdup_n_f16 (0.0f));
}
__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vceqzq_f16 (float16x8_t __a)
{
return __builtin_aarch64_cmeqv8hf_uss (__a, vdupq_n_f16 (0.0f));
}
__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
vcgez_f16 (float16x4_t __a)
{
return __builtin_aarch64_cmgev4hf_uss (__a, vdup_n_f16 (0.0f));
}
__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vcgezq_f16 (float16x8_t __a)
{
return __builtin_aarch64_cmgev8hf_uss (__a, vdupq_n_f16 (0.0f));
}
__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
vcgtz_f16 (float16x4_t __a)
{
return __builtin_aarch64_cmgtv4hf_uss (__a, vdup_n_f16 (0.0f));
}
__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vcgtzq_f16 (float16x8_t __a)
{
return __builtin_aarch64_cmgtv8hf_uss (__a, vdupq_n_f16 (0.0f));
}
__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
vclez_f16 (float16x4_t __a)
{
return __builtin_aarch64_cmlev4hf_uss (__a, vdup_n_f16 (0.0f));
}
__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vclezq_f16 (float16x8_t __a)
{
return __builtin_aarch64_cmlev8hf_uss (__a, vdupq_n_f16 (0.0f));
}
__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
vcltz_f16 (float16x4_t __a)
{
return __builtin_aarch64_cmltv4hf_uss (__a, vdup_n_f16 (0.0f));
}
__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vcltzq_f16 (float16x8_t __a)
{
return __builtin_aarch64_cmltv8hf_uss (__a, vdupq_n_f16 (0.0f));
}
__extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
vcvt_f16_s16 (int16x4_t __a)
{
return __builtin_aarch64_floatv4hiv4hf (__a);
}
__extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
vcvtq_f16_s16 (int16x8_t __a)
{
return __builtin_aarch64_floatv8hiv8hf (__a);
}
__extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
vcvt_f16_u16 (uint16x4_t __a)
{
return __builtin_aarch64_floatunsv4hiv4hf ((int16x4_t) __a);
}
__extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
vcvtq_f16_u16 (uint16x8_t __a)
{
return __builtin_aarch64_floatunsv8hiv8hf ((int16x8_t) __a);
}
__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
vcvt_s16_f16 (float16x4_t __a)
{
return __builtin_aarch64_lbtruncv4hfv4hi (__a);
}
__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
vcvtq_s16_f16 (float16x8_t __a)
{
return __builtin_aarch64_lbtruncv8hfv8hi (__a);
}
__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
vcvt_u16_f16 (float16x4_t __a)
{
return __builtin_aarch64_lbtruncuv4hfv4hi_us (__a);
}
__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vcvtq_u16_f16 (float16x8_t __a)
{
return __builtin_aarch64_lbtruncuv8hfv8hi_us (__a);
}
__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
vcvta_s16_f16 (float16x4_t __a)
{
return __builtin_aarch64_lroundv4hfv4hi (__a);
}
__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
vcvtaq_s16_f16 (float16x8_t __a)
{
return __builtin_aarch64_lroundv8hfv8hi (__a);
}
__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
vcvta_u16_f16 (float16x4_t __a)
{
return __builtin_aarch64_lrounduv4hfv4hi_us (__a);
}
__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vcvtaq_u16_f16 (float16x8_t __a)
{
return __builtin_aarch64_lrounduv8hfv8hi_us (__a);
}
__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
vcvtm_s16_f16 (float16x4_t __a)
{
return __builtin_aarch64_lfloorv4hfv4hi (__a);
}
__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
vcvtmq_s16_f16 (float16x8_t __a)
{
return __builtin_aarch64_lfloorv8hfv8hi (__a);
}
__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
vcvtm_u16_f16 (float16x4_t __a)
{
return __builtin_aarch64_lflooruv4hfv4hi_us (__a);
}
__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vcvtmq_u16_f16 (float16x8_t __a)
{
return __builtin_aarch64_lflooruv8hfv8hi_us (__a);
}
__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
vcvtn_s16_f16 (float16x4_t __a)
{
return __builtin_aarch64_lfrintnv4hfv4hi (__a);
}
__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
vcvtnq_s16_f16 (float16x8_t __a)
{
return __builtin_aarch64_lfrintnv8hfv8hi (__a);
}
__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
vcvtn_u16_f16 (float16x4_t __a)
{
return __builtin_aarch64_lfrintnuv4hfv4hi_us (__a);
}
__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vcvtnq_u16_f16 (float16x8_t __a)
{
return __builtin_aarch64_lfrintnuv8hfv8hi_us (__a);
}
__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
vcvtp_s16_f16 (float16x4_t __a)
{
return __builtin_aarch64_lceilv4hfv4hi (__a);
}
__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
vcvtpq_s16_f16 (float16x8_t __a)
{
return __builtin_aarch64_lceilv8hfv8hi (__a);
}
__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
vcvtp_u16_f16 (float16x4_t __a)
{
return __builtin_aarch64_lceiluv4hfv4hi_us (__a);
}
__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vcvtpq_u16_f16 (float16x8_t __a)
{
return __builtin_aarch64_lceiluv8hfv8hi_us (__a);
}
__extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
vneg_f16 (float16x4_t __a)
{
return -__a;
}
__extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
vnegq_f16 (float16x8_t __a)
{
return -__a;
}
__extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
vrecpe_f16 (float16x4_t __a)
{
return __builtin_aarch64_frecpev4hf (__a);
}
__extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
vrecpeq_f16 (float16x8_t __a)
{
return __builtin_aarch64_frecpev8hf (__a);
}
__extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
vrnd_f16 (float16x4_t __a)
{
return __builtin_aarch64_btruncv4hf (__a);
}
__extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
vrndq_f16 (float16x8_t __a)
{
return __builtin_aarch64_btruncv8hf (__a);
}
__extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
vrnda_f16 (float16x4_t __a)
{
return __builtin_aarch64_roundv4hf (__a);
}
__extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
vrndaq_f16 (float16x8_t __a)
{
return __builtin_aarch64_roundv8hf (__a);
}
__extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
vrndi_f16 (float16x4_t __a)
{
return __builtin_aarch64_nearbyintv4hf (__a);
}
__extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
vrndiq_f16 (float16x8_t __a)
{
return __builtin_aarch64_nearbyintv8hf (__a);
}
__extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
vrndm_f16 (float16x4_t __a)
{
return __builtin_aarch64_floorv4hf (__a);
}
__extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
vrndmq_f16 (float16x8_t __a)
{
return __builtin_aarch64_floorv8hf (__a);
}
__extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
vrndn_f16 (float16x4_t __a)
{
return __builtin_aarch64_frintnv4hf (__a);
}
__extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
vrndnq_f16 (float16x8_t __a)
{
return __builtin_aarch64_frintnv8hf (__a);
}
__extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
vrndp_f16 (float16x4_t __a)
{
return __builtin_aarch64_ceilv4hf (__a);
}
__extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
vrndpq_f16 (float16x8_t __a)
{
return __builtin_aarch64_ceilv8hf (__a);
}
__extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
vrndx_f16 (float16x4_t __a)
{
return __builtin_aarch64_rintv4hf (__a);
}
__extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
vrndxq_f16 (float16x8_t __a)
{
return __builtin_aarch64_rintv8hf (__a);
}
__extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
vrsqrte_f16 (float16x4_t a)
{
return __builtin_aarch64_rsqrtev4hf (a);
}
__extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
vrsqrteq_f16 (float16x8_t a)
{
return __builtin_aarch64_rsqrtev8hf (a);
}
__extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
vsqrt_f16 (float16x4_t a)
{
return __builtin_aarch64_sqrtv4hf (a);
}
__extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
vsqrtq_f16 (float16x8_t a)
{
return __builtin_aarch64_sqrtv8hf (a);
}
#pragma GCC pop_options
#undef __aarch64_vget_lane_any #undef __aarch64_vget_lane_any
#undef __aarch64_vdup_lane_any #undef __aarch64_vdup_lane_any
...@@ -26084,6 +26443,4 @@ __INTERLEAVE_LIST (zip) ...@@ -26084,6 +26443,4 @@ __INTERLEAVE_LIST (zip)
#undef __aarch64_vdupq_laneq_u32 #undef __aarch64_vdupq_laneq_u32
#undef __aarch64_vdupq_laneq_u64 #undef __aarch64_vdupq_laneq_u64
#pragma GCC pop_options
#endif #endif
...@@ -88,11 +88,20 @@ ...@@ -88,11 +88,20 @@
;; Vector Float modes suitable for moving, loading and storing. ;; Vector Float modes suitable for moving, loading and storing.
(define_mode_iterator VDQF_F16 [V4HF V8HF V2SF V4SF V2DF]) (define_mode_iterator VDQF_F16 [V4HF V8HF V2SF V4SF V2DF])
;; Vector Float modes, barring HF modes. ;; Vector Float modes.
(define_mode_iterator VDQF [V2SF V4SF V2DF]) (define_mode_iterator VDQF [V2SF V4SF V2DF])
(define_mode_iterator VHSDF [(V4HF "TARGET_SIMD_F16INST")
(V8HF "TARGET_SIMD_F16INST")
V2SF V4SF V2DF])
;; Vector Float modes, and DF. ;; Vector Float modes, and DF.
(define_mode_iterator VDQF_DF [V2SF V4SF V2DF DF]) (define_mode_iterator VDQF_DF [V2SF V4SF V2DF DF])
(define_mode_iterator VHSDF_DF [(V4HF "TARGET_SIMD_F16INST")
(V8HF "TARGET_SIMD_F16INST")
V2SF V4SF V2DF DF])
(define_mode_iterator VHSDF_SDF [(V4HF "TARGET_SIMD_F16INST")
(V8HF "TARGET_SIMD_F16INST")
V2SF V4SF V2DF SF DF])
;; Vector single Float modes. ;; Vector single Float modes.
(define_mode_iterator VDQSF [V2SF V4SF]) (define_mode_iterator VDQSF [V2SF V4SF])
...@@ -366,7 +375,8 @@ ...@@ -366,7 +375,8 @@
(V4HI "") (V8HI "") (V4HI "") (V8HI "")
(V2SI "") (V4SI "") (V2SI "") (V4SI "")
(V2DI "") (V2SF "") (V2DI "") (V2SF "")
(V4SF "") (V2DF "")]) (V4SF "") (V4HF "")
(V8HF "") (V2DF "")])
;; For scalar usage of vector/FP registers, narrowing ;; For scalar usage of vector/FP registers, narrowing
(define_mode_attr vn2 [(QI "") (HI "b") (SI "h") (DI "s") (define_mode_attr vn2 [(QI "") (HI "b") (SI "h") (DI "s")
...@@ -447,6 +457,16 @@ ...@@ -447,6 +457,16 @@
(QI "b") (HI "h") (QI "b") (HI "h")
(SI "s") (DI "d")]) (SI "s") (DI "d")])
;; Vetype is used everywhere in scheduling type and assembly output,
;; sometimes they are not the same, for example HF modes on some
;; instructions. stype is defined to represent scheduling type
;; more accurately.
(define_mode_attr stype [(V8QI "b") (V16QI "b") (V4HI "s") (V8HI "s")
(V2SI "s") (V4SI "s") (V2DI "d") (V4HF "s")
(V8HF "s") (V2SF "s") (V4SF "s") (V2DF "d")
(HF "s") (SF "s") (DF "d") (QI "b") (HI "s")
(SI "s") (DI "d")])
;; Mode-to-bitwise operation type mapping. ;; Mode-to-bitwise operation type mapping.
(define_mode_attr Vbtype [(V8QI "8b") (V16QI "16b") (define_mode_attr Vbtype [(V8QI "8b") (V16QI "16b")
(V4HI "8b") (V8HI "16b") (V4HI "8b") (V8HI "16b")
...@@ -656,10 +676,14 @@ ...@@ -656,10 +676,14 @@
(define_mode_attr fcvt_target [(V2DF "v2di") (V4SF "v4si") (V2SF "v2si") (define_mode_attr fcvt_target [(V2DF "v2di") (V4SF "v4si") (V2SF "v2si")
(V2DI "v2df") (V4SI "v4sf") (V2SI "v2sf") (V2DI "v2df") (V4SI "v4sf") (V2SI "v2sf")
(SF "si") (DF "di") (SI "sf") (DI "df")]) (SF "si") (DF "di") (SI "sf") (DI "df")
(V4HF "v4hi") (V8HF "v8hi") (V4HI "v4hf")
(V8HI "v8hf")])
(define_mode_attr FCVT_TARGET [(V2DF "V2DI") (V4SF "V4SI") (V2SF "V2SI") (define_mode_attr FCVT_TARGET [(V2DF "V2DI") (V4SF "V4SI") (V2SF "V2SI")
(V2DI "V2DF") (V4SI "V4SF") (V2SI "V2SF") (V2DI "V2DF") (V4SI "V4SF") (V2SI "V2SF")
(SF "SI") (DF "DI") (SI "SF") (DI "DF")]) (SF "SI") (DF "DI") (SI "SF") (DI "DF")
(V4HF "V4HI") (V8HF "V8HI") (V4HI "V4HF")
(V8HI "V8HF")])
;; for the inequal width integer to fp conversions ;; for the inequal width integer to fp conversions
...@@ -687,6 +711,7 @@ ...@@ -687,6 +711,7 @@
;; the 'x' constraint. All other modes may use the 'w' constraint. ;; the 'x' constraint. All other modes may use the 'w' constraint.
(define_mode_attr h_con [(V2SI "w") (V4SI "w") (define_mode_attr h_con [(V2SI "w") (V4SI "w")
(V4HI "x") (V8HI "x") (V4HI "x") (V8HI "x")
(V4HF "w") (V8HF "w")
(V2SF "w") (V4SF "w") (V2SF "w") (V4SF "w")
(V2DF "w") (DF "w")]) (V2DF "w") (DF "w")])
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment