Commit 17a13507 by Mihail Ionescu

[GCC][PATCH][ARM] Add vreinterpret, vdup, vget and vset bfloat16 intrinsics

This patch adds support for the bf16 vector create, get, set,
duplicate and reinterpret intrinsics.
ACLE documents are at https://developer.arm.com/docs/101028/latest
ISA documents are at https://developer.arm.com/docs/ddi0596/latest

gcc/ChangeLog:

2020-02-27  Mihail Ionescu  <mihail.ionescu@arm.com>

	* (__ARM_NUM_LANES, __arm_lane, __arm_lane_q): Move to the
	beginning of the file.
	(vcreate_bf16, vcombine_bf16): New.
	(vdup_n_bf16, vdupq_n_bf16): New.
	(vdup_lane_bf16, vdup_laneq_bf16): New.
	(vdupq_lane_bf16, vdupq_laneq_bf16): New.
	(vduph_lane_bf16, vduph_laneq_bf16): New.
	(vset_lane_bf16, vsetq_lane_bf16): New.
	(vget_lane_bf16, vgetq_lane_bf16): New.
	(vget_high_bf16, vget_low_bf16): New.
	(vreinterpret_bf16_u8, vreinterpretq_bf16_u8): New.
	(vreinterpret_bf16_u16, vreinterpretq_bf16_u16): New.
	(vreinterpret_bf16_u32, vreinterpretq_bf16_u32): New.
	(vreinterpret_bf16_u64, vreinterpretq_bf16_u64): New.
	(vreinterpret_bf16_s8, vreinterpretq_bf16_s8): New.
	(vreinterpret_bf16_s16, vreinterpretq_bf16_s16): New.
	(vreinterpret_bf16_s32, vreinterpretq_bf16_s32): New.
	(vreinterpret_bf16_s64, vreinterpretq_bf16_s64): New.
	(vreinterpret_bf16_p8, vreinterpretq_bf16_p8): New.
	(vreinterpret_bf16_p16, vreinterpretq_bf16_p16): New.
	(vreinterpret_bf16_p64, vreinterpretq_bf16_p64): New.
	(vreinterpret_bf16_f32, vreinterpretq_bf16_f32): New.
	(vreinterpret_bf16_f64, vreinterpretq_bf16_f64): New.
	(vreinterpretq_bf16_p128): New.
	(vreinterpret_s8_bf16, vreinterpretq_s8_bf16): New.
	(vreinterpret_s16_bf16, vreinterpretq_s16_bf16): New.
	(vreinterpret_s32_bf16, vreinterpretq_s32_bf16): New.
	(vreinterpret_s64_bf16, vreinterpretq_s64_bf16): New.
	(vreinterpret_u8_bf16, vreinterpretq_u8_bf16): New.
	(vreinterpret_u16_bf16, vreinterpretq_u16_bf16): New.
	(vreinterpret_u32_bf16, vreinterpretq_u32_bf16): New.
	(vreinterpret_u64_bf16, vreinterpretq_u64_bf16): New.
	(vreinterpret_p8_bf16, vreinterpretq_p8_bf16): New.
	(vreinterpret_p16_bf16, vreinterpretq_p16_bf16): New.
	(vreinterpret_p64_bf16, vreinterpretq_p64_bf16): New.
	(vreinterpret_f32_bf16, vreinterpretq_f32_bf16): New.
	(vreinterpretq_p128_bf16): New.
	* config/arm/arm_neon_builtins.def (VDX): Add V4BF.
	(V_elem): Likewise.
	(V_elem_l): Likewise.
	(VD_LANE): Likewise.
	(VQX) Add V8BF.
	(V_DOUBLE): Likewise.
	(VDQX): Add V4BF and V8BF.
	(V_two_elem, V_three_elem, V_four_elem): Likewise.
	(V_reg): Likewise.
	(V_HALF): Likewise.
	(V_double_vector_mode): Likewise.
	(V_cmp_result): Likewise.
	(V_uf_sclr): Likewise.
	(V_sz_elem): Likewise.
	(Is_d_reg): Likewise.
	(V_mode_nunits): Likewise.
	* config/arm/neon.md (neon_vdup_lane): Enable for BFloat.

gcc/testsuite/ChangeLog:

2020-02-27  Mihail Ionescu  <mihail.ionescu@arm.com>

	* gcc.target/arm/bf16_dup.c: New test.
	* gcc.target/arm/bf16_reinterpret.c: Likewise.
parent dc941ea9
2020-02-27 Mihail Ionescu <mihail.ionescu@arm.com>
* (__ARM_NUM_LANES, __arm_lane, __arm_lane_q): Move to the
beginning of the file.
(vcreate_bf16, vcombine_bf16): New.
(vdup_n_bf16, vdupq_n_bf16): New.
(vdup_lane_bf16, vdup_laneq_bf16): New.
(vdupq_lane_bf16, vdupq_laneq_bf16): New.
(vduph_lane_bf16, vduph_laneq_bf16): New.
(vset_lane_bf16, vsetq_lane_bf16): New.
(vget_lane_bf16, vgetq_lane_bf16): New.
(vget_high_bf16, vget_low_bf16): New.
(vreinterpret_bf16_u8, vreinterpretq_bf16_u8): New.
(vreinterpret_bf16_u16, vreinterpretq_bf16_u16): New.
(vreinterpret_bf16_u32, vreinterpretq_bf16_u32): New.
(vreinterpret_bf16_u64, vreinterpretq_bf16_u64): New.
(vreinterpret_bf16_s8, vreinterpretq_bf16_s8): New.
(vreinterpret_bf16_s16, vreinterpretq_bf16_s16): New.
(vreinterpret_bf16_s32, vreinterpretq_bf16_s32): New.
(vreinterpret_bf16_s64, vreinterpretq_bf16_s64): New.
(vreinterpret_bf16_p8, vreinterpretq_bf16_p8): New.
(vreinterpret_bf16_p16, vreinterpretq_bf16_p16): New.
(vreinterpret_bf16_p64, vreinterpretq_bf16_p64): New.
(vreinterpret_bf16_f32, vreinterpretq_bf16_f32): New.
(vreinterpret_bf16_f64, vreinterpretq_bf16_f64): New.
(vreinterpretq_bf16_p128): New.
(vreinterpret_s8_bf16, vreinterpretq_s8_bf16): New.
(vreinterpret_s16_bf16, vreinterpretq_s16_bf16): New.
(vreinterpret_s32_bf16, vreinterpretq_s32_bf16): New.
(vreinterpret_s64_bf16, vreinterpretq_s64_bf16): New.
(vreinterpret_u8_bf16, vreinterpretq_u8_bf16): New.
(vreinterpret_u16_bf16, vreinterpretq_u16_bf16): New.
(vreinterpret_u32_bf16, vreinterpretq_u32_bf16): New.
(vreinterpret_u64_bf16, vreinterpretq_u64_bf16): New.
(vreinterpret_p8_bf16, vreinterpretq_p8_bf16): New.
(vreinterpret_p16_bf16, vreinterpretq_p16_bf16): New.
(vreinterpret_p64_bf16, vreinterpretq_p64_bf16): New.
(vreinterpret_f32_bf16, vreinterpretq_f32_bf16): New.
(vreinterpretq_p128_bf16): New.
* config/arm/arm_neon_builtins.def (VDX): Add V4BF.
(V_elem): Likewise.
(V_elem_l): Likewise.
(VD_LANE): Likewise.
(VQX) Add V8BF.
(V_DOUBLE): Likewise.
(VDQX): Add V4BF and V8BF.
(V_two_elem, V_three_elem, V_four_elem): Likewise.
(V_reg): Likewise.
(V_HALF): Likewise.
(V_double_vector_mode): Likewise.
(V_cmp_result): Likewise.
(V_uf_sclr): Likewise.
(V_sz_elem): Likewise.
(Is_d_reg): Likewise.
(V_mode_nunits): Likewise.
* config/arm/neon.md (neon_vdup_lane): Enable for BFloat16.
2020-02-27 Andrew Stubbs <ams@codesourcery.com> 2020-02-27 Andrew Stubbs <ams@codesourcery.com>
* config/gcn/gcn-valu.md (VEC_SUBDWORD_MODE): New mode iterator. * config/gcn/gcn-valu.md (VEC_SUBDWORD_MODE): New mode iterator.
......
...@@ -221,13 +221,13 @@ VAR10 (SETLANE, vset_lane, ...@@ -221,13 +221,13 @@ VAR10 (SETLANE, vset_lane,
VAR5 (UNOP, vcreate, v8qi, v4hi, v2si, v2sf, di) VAR5 (UNOP, vcreate, v8qi, v4hi, v2si, v2sf, di)
VAR10 (UNOP, vdup_n, VAR10 (UNOP, vdup_n,
v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di)
VAR2 (UNOP, vdup_n, v8hf, v4hf) VAR4 (UNOP, vdup_n, v8hf, v4hf, v8bf, v4bf)
VAR10 (GETLANE, vdup_lane, VAR10 (GETLANE, vdup_lane,
v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di)
VAR2 (GETLANE, vdup_lane, v8hf, v4hf) VAR4 (GETLANE, vdup_lane, v8hf, v4hf, v8bf, v4bf)
VAR6 (COMBINE, vcombine, v8qi, v4hi, v4hf, v2si, v2sf, di) VAR7 (COMBINE, vcombine, v8qi, v4hi, v4hf, v2si, v2sf, di, v4bf)
VAR6 (UNOP, vget_high, v16qi, v8hi, v8hf, v4si, v4sf, v2di) VAR7 (UNOP, vget_high, v16qi, v8hi, v8hf, v8bf, v4si, v4sf, v2di)
VAR6 (UNOP, vget_low, v16qi, v8hi, v8hf, v4si, v4sf, v2di) VAR7 (UNOP, vget_low, v16qi, v8hi, v8hf, v8bf, v4si, v4sf, v2di)
VAR3 (UNOP, vmovn, v8hi, v4si, v2di) VAR3 (UNOP, vmovn, v8hi, v4si, v2di)
VAR3 (UNOP, vqmovns, v8hi, v4si, v2di) VAR3 (UNOP, vqmovns, v8hi, v4si, v2di)
VAR3 (UNOP, vqmovnu, v8hi, v4si, v2di) VAR3 (UNOP, vqmovnu, v8hi, v4si, v2di)
......
...@@ -82,14 +82,14 @@ ...@@ -82,14 +82,14 @@
(define_mode_iterator VD_RE [V8QI V4HI V2SI V2SF DI]) (define_mode_iterator VD_RE [V8QI V4HI V2SI V2SF DI])
;; Double-width vector modes plus 64-bit elements. ;; Double-width vector modes plus 64-bit elements.
(define_mode_iterator VDX [V8QI V4HI V4HF V2SI V2SF DI]) (define_mode_iterator VDX [V8QI V4HI V4HF V4BF V2SI V2SF DI])
;; Double-width vector modes plus 64-bit elements, ;; Double-width vector modes plus 64-bit elements,
;; with V4BFmode added, suitable for moves. ;; with V4BFmode added, suitable for moves.
(define_mode_iterator VDXMOV [V8QI V4HI V4HF V4BF V2SI V2SF DI]) (define_mode_iterator VDXMOV [V8QI V4HI V4HF V4BF V2SI V2SF DI])
;; Double-width vector modes, with V4HF - for vldN_lane and vstN_lane. ;; Double-width vector modes, with V4HF - for vldN_lane and vstN_lane.
(define_mode_iterator VD_LANE [V8QI V4HI V4HF V2SI V2SF]) (define_mode_iterator VD_LANE [V8QI V4HI V4HF V4BF V2SI V2SF])
;; Double-width vector modes without floating-point elements. ;; Double-width vector modes without floating-point elements.
(define_mode_iterator VDI [V8QI V4HI V2SI]) (define_mode_iterator VDI [V8QI V4HI V2SI])
...@@ -104,7 +104,7 @@ ...@@ -104,7 +104,7 @@
(define_mode_iterator VQ_HS [V8HI V8HF V4SI V4SF]) (define_mode_iterator VQ_HS [V8HI V8HF V4SI V4SF])
;; Quad-width vector modes plus 64-bit elements. ;; Quad-width vector modes plus 64-bit elements.
(define_mode_iterator VQX [V16QI V8HI V8HF V4SI V4SF V2DI]) (define_mode_iterator VQX [V16QI V8HI V8HF V8BF V4SI V4SF V2DI])
;; Quad-width vector modes without floating-point elements. ;; Quad-width vector modes without floating-point elements.
(define_mode_iterator VQI [V16QI V8HI V4SI]) (define_mode_iterator VQI [V16QI V8HI V4SI])
...@@ -153,7 +153,7 @@ ...@@ -153,7 +153,7 @@
;; Vector modes, including 64-bit integer elements. ;; Vector modes, including 64-bit integer elements.
(define_mode_iterator VDQX [V8QI V16QI V4HI V8HI V2SI V4SI (define_mode_iterator VDQX [V8QI V16QI V4HI V8HI V2SI V4SI
V4HF V8HF V2SF V4SF DI V2DI]) V4HF V8HF V4BF V8BF V2SF V4SF DI V2DI])
;; Vector modes including 64-bit integer elements, but no floats. ;; Vector modes including 64-bit integer elements, but no floats.
(define_mode_iterator VDQIX [V8QI V16QI V4HI V8HI V2SI V4SI DI V2DI]) (define_mode_iterator VDQIX [V8QI V16QI V4HI V8HI V2SI V4SI DI V2DI])
...@@ -522,6 +522,7 @@ ...@@ -522,6 +522,7 @@
(define_mode_attr V_elem [(V8QI "QI") (V16QI "QI") (define_mode_attr V_elem [(V8QI "QI") (V16QI "QI")
(V4HI "HI") (V8HI "HI") (V4HI "HI") (V8HI "HI")
(V4HF "HF") (V8HF "HF") (V4HF "HF") (V8HF "HF")
(V4BF "BF") (V8BF "BF")
(V2SI "SI") (V4SI "SI") (V2SI "SI") (V4SI "SI")
(V2SF "SF") (V4SF "SF") (V2SF "SF") (V4SF "SF")
(DI "DI") (V2DI "DI")]) (DI "DI") (V2DI "DI")])
...@@ -530,6 +531,7 @@ ...@@ -530,6 +531,7 @@
(define_mode_attr V_elem_l [(V8QI "qi") (V16QI "qi") (define_mode_attr V_elem_l [(V8QI "qi") (V16QI "qi")
(V4HI "hi") (V8HI "hi") (V4HI "hi") (V8HI "hi")
(V4HF "hf") (V8HF "hf") (V4HF "hf") (V8HF "hf")
(V4BF "bf") (V8BF "bf")
(V2SI "si") (V4SI "si") (V2SI "si") (V4SI "si")
(V2SF "sf") (V4SF "sf") (V2SF "sf") (V4SF "sf")
(DI "di") (V2DI "di")]) (DI "di") (V2DI "di")])
...@@ -547,6 +549,7 @@ ...@@ -547,6 +549,7 @@
(define_mode_attr V_two_elem [(V8QI "HI") (V16QI "HI") (define_mode_attr V_two_elem [(V8QI "HI") (V16QI "HI")
(V4HI "SI") (V8HI "SI") (V4HI "SI") (V8HI "SI")
(V4HF "SF") (V8HF "SF") (V4HF "SF") (V8HF "SF")
(V4BF "BF") (V8BF "BF")
(V2SI "V2SI") (V4SI "V2SI") (V2SI "V2SI") (V4SI "V2SI")
(V2SF "V2SF") (V4SF "V2SF") (V2SF "V2SF") (V4SF "V2SF")
(DI "V2DI") (V2DI "V2DI")]) (DI "V2DI") (V2DI "V2DI")])
...@@ -567,6 +570,7 @@ ...@@ -567,6 +570,7 @@
(define_mode_attr V_three_elem [(V8QI "BLK") (V16QI "BLK") (define_mode_attr V_three_elem [(V8QI "BLK") (V16QI "BLK")
(V4HI "BLK") (V8HI "BLK") (V4HI "BLK") (V8HI "BLK")
(V4HF "BLK") (V8HF "BLK") (V4HF "BLK") (V8HF "BLK")
(V4BF "BLK") (V8BF "BLK")
(V2SI "BLK") (V4SI "BLK") (V2SI "BLK") (V4SI "BLK")
(V2SF "BLK") (V4SF "BLK") (V2SF "BLK") (V4SF "BLK")
(DI "EI") (V2DI "EI")]) (DI "EI") (V2DI "EI")])
...@@ -575,6 +579,7 @@ ...@@ -575,6 +579,7 @@
(define_mode_attr V_four_elem [(V8QI "SI") (V16QI "SI") (define_mode_attr V_four_elem [(V8QI "SI") (V16QI "SI")
(V4HI "V4HI") (V8HI "V4HI") (V4HI "V4HI") (V8HI "V4HI")
(V4HF "V4HF") (V8HF "V4HF") (V4HF "V4HF") (V8HF "V4HF")
(V4BF "V4BF") (V8BF "V4BF")
(V2SI "V4SI") (V4SI "V4SI") (V2SI "V4SI") (V4SI "V4SI")
(V2SF "V4SF") (V4SF "V4SF") (V2SF "V4SF") (V4SF "V4SF")
(DI "OI") (V2DI "OI")]) (DI "OI") (V2DI "OI")])
...@@ -583,6 +588,7 @@ ...@@ -583,6 +588,7 @@
(define_mode_attr V_reg [(V8QI "P") (V16QI "q") (define_mode_attr V_reg [(V8QI "P") (V16QI "q")
(V4HI "P") (V8HI "q") (V4HI "P") (V8HI "q")
(V4HF "P") (V8HF "q") (V4HF "P") (V8HF "q")
(V4BF "P") (V8BF "q")
(V2SI "P") (V4SI "q") (V2SI "P") (V4SI "q")
(V2SF "P") (V4SF "q") (V2SF "P") (V4SF "q")
(DI "P") (V2DI "q") (DI "P") (V2DI "q")
...@@ -613,7 +619,8 @@ ...@@ -613,7 +619,8 @@
(define_mode_attr V_HALF [(V16QI "V8QI") (V8HI "V4HI") (define_mode_attr V_HALF [(V16QI "V8QI") (V8HI "V4HI")
(V8HF "V4HF") (V4SI "V2SI") (V8HF "V4HF") (V4SI "V2SI")
(V4SF "V2SF") (V2DF "DF") (V4SF "V2SF") (V2DF "DF")
(V2DI "DI") (V4HF "HF")]) (V2DI "DI") (V4HF "HF")
(V4BF "BF") (V8BF "V4BF")])
;; Same, but lower-case. ;; Same, but lower-case.
(define_mode_attr V_half [(V16QI "v8qi") (V8HI "v4hi") (define_mode_attr V_half [(V16QI "v8qi") (V8HI "v4hi")
...@@ -624,7 +631,7 @@ ...@@ -624,7 +631,7 @@
(define_mode_attr V_DOUBLE [(V8QI "V16QI") (V4HI "V8HI") (define_mode_attr V_DOUBLE [(V8QI "V16QI") (V4HI "V8HI")
(V2SI "V4SI") (V4HF "V8HF") (V2SI "V4SI") (V4HF "V8HF")
(V2SF "V4SF") (DF "V2DF") (V2SF "V4SF") (DF "V2DF")
(DI "V2DI")]) (DI "V2DI") (V4BF "V8BF")])
;; Same, but lower-case. ;; Same, but lower-case.
(define_mode_attr V_double [(V8QI "v16qi") (V4HI "v8hi") (define_mode_attr V_double [(V8QI "v16qi") (V4HI "v8hi")
...@@ -643,6 +650,7 @@ ...@@ -643,6 +650,7 @@
(V4SI "V2SI") (V4SF "V2SF") (V4SI "V2SI") (V4SF "V2SF")
(V8QI "V8QI") (V4HI "V4HI") (V8QI "V8QI") (V4HI "V4HI")
(V2SI "V2SI") (V2SF "V2SF") (V2SI "V2SI") (V2SF "V2SF")
(V8BF "V4BF") (V4BF "V4BF")
(V8HF "V4HF") (V4HF "V4HF")]) (V8HF "V4HF") (V4HF "V4HF")])
;; Mode of result of comparison operations (and bit-select operand 1). ;; Mode of result of comparison operations (and bit-select operand 1).
...@@ -650,6 +658,7 @@ ...@@ -650,6 +658,7 @@
(V4HI "V4HI") (V8HI "V8HI") (V4HI "V4HI") (V8HI "V8HI")
(V2SI "V2SI") (V4SI "V4SI") (V2SI "V2SI") (V4SI "V4SI")
(V4HF "V4HI") (V8HF "V8HI") (V4HF "V4HI") (V8HF "V8HI")
(V4BF "V4HI") (V8BF "V8HI")
(V2SF "V2SI") (V4SF "V4SI") (V2SF "V2SI") (V4SF "V4SI")
(DI "DI") (V2DI "V2DI")]) (DI "DI") (V2DI "V2DI")])
...@@ -691,6 +700,7 @@ ...@@ -691,6 +700,7 @@
(V4HI "u16") (V8HI "u16") (V4HI "u16") (V8HI "u16")
(V2SI "32") (V4SI "32") (V2SI "32") (V4SI "32")
(V4HF "u16") (V8HF "u16") (V4HF "u16") (V8HF "u16")
(V4BF "u16") (V8BF "u16")
(V2SF "32") (V4SF "32")]) (V2SF "32") (V4SF "32")])
(define_mode_attr V_sz_elem [(V8QI "8") (V16QI "8") (define_mode_attr V_sz_elem [(V8QI "8") (V16QI "8")
...@@ -698,6 +708,7 @@ ...@@ -698,6 +708,7 @@
(V2SI "32") (V4SI "32") (V2SI "32") (V4SI "32")
(DI "64") (V2DI "64") (DI "64") (V2DI "64")
(V4HF "16") (V8HF "16") (V4HF "16") (V8HF "16")
(V4BF "16") (V8BF "16")
(V2SF "32") (V4SF "32")]) (V2SF "32") (V4SF "32")])
(define_mode_attr V_elem_ch [(V8QI "b") (V16QI "b") (define_mode_attr V_elem_ch [(V8QI "b") (V16QI "b")
...@@ -768,10 +779,12 @@ ...@@ -768,10 +779,12 @@
(V2SI "true") (V4SI "false") (V2SI "true") (V4SI "false")
(V2SF "true") (V4SF "false") (V2SF "true") (V4SF "false")
(DI "true") (V2DI "false") (DI "true") (V2DI "false")
(V4BF "true") (V8BF "false")
(V4HF "true") (V8HF "false")]) (V4HF "true") (V8HF "false")])
(define_mode_attr V_mode_nunits [(V8QI "8") (V16QI "16") (define_mode_attr V_mode_nunits [(V8QI "8") (V16QI "16")
(V4HF "4") (V8HF "8") (V4HF "4") (V8HF "8")
(V4BF "4") (V8BF "8")
(V4HI "4") (V8HI "8") (V4HI "4") (V8HI "8")
(V2SI "2") (V4SI "4") (V2SI "2") (V4SI "4")
(V2SF "2") (V4SF "4") (V2SF "2") (V4SF "4")
......
...@@ -3737,6 +3737,22 @@ if (BYTES_BIG_ENDIAN) ...@@ -3737,6 +3737,22 @@ if (BYTES_BIG_ENDIAN)
[(set_attr "type" "neon_from_gp_q")] [(set_attr "type" "neon_from_gp_q")]
) )
(define_insn "neon_vdup_nv4bf"
[(set (match_operand:V4BF 0 "s_register_operand" "=w")
(vec_duplicate:V4BF (match_operand:BF 1 "s_register_operand" "r")))]
"TARGET_NEON"
"vdup.16\t%P0, %1"
[(set_attr "type" "neon_from_gp")]
)
(define_insn "neon_vdup_nv8bf"
[(set (match_operand:V8BF 0 "s_register_operand" "=w")
(vec_duplicate:V8BF (match_operand:BF 1 "s_register_operand" "r")))]
"TARGET_NEON"
"vdup.16\t%q0, %1"
[(set_attr "type" "neon_from_gp_q")]
)
(define_insn "neon_vdup_n<mode>" (define_insn "neon_vdup_n<mode>"
[(set (match_operand:V32 0 "s_register_operand" "=w,w") [(set (match_operand:V32 0 "s_register_operand" "=w,w")
(vec_duplicate:V32 (match_operand:<V_elem> 1 "s_register_operand" "r,t")))] (vec_duplicate:V32 (match_operand:<V_elem> 1 "s_register_operand" "r,t")))]
...@@ -3791,12 +3807,12 @@ if (BYTES_BIG_ENDIAN) ...@@ -3791,12 +3807,12 @@ if (BYTES_BIG_ENDIAN)
) )
(define_insn "neon_vdup_lane<mode>_internal" (define_insn "neon_vdup_lane<mode>_internal"
[(set (match_operand:VH 0 "s_register_operand" "=w") [(set (match_operand:VHFBF 0 "s_register_operand" "=w")
(vec_duplicate:VH (vec_duplicate:VHFBF
(vec_select:<V_elem> (vec_select:<V_elem>
(match_operand:<V_double_vector_mode> 1 "s_register_operand" "w") (match_operand:<V_double_vector_mode> 1 "s_register_operand" "w")
(parallel [(match_operand:SI 2 "immediate_operand" "i")]))))] (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
"TARGET_NEON && TARGET_FP16" "TARGET_NEON && (TARGET_FP16 || TARGET_BF16_SIMD)"
{ {
if (BYTES_BIG_ENDIAN) if (BYTES_BIG_ENDIAN)
{ {
...@@ -3832,10 +3848,10 @@ if (BYTES_BIG_ENDIAN) ...@@ -3832,10 +3848,10 @@ if (BYTES_BIG_ENDIAN)
}) })
(define_expand "neon_vdup_lane<mode>" (define_expand "neon_vdup_lane<mode>"
[(match_operand:VH 0 "s_register_operand") [(match_operand:VHFBF 0 "s_register_operand")
(match_operand:<V_double_vector_mode> 1 "s_register_operand") (match_operand:<V_double_vector_mode> 1 "s_register_operand")
(match_operand:SI 2 "immediate_operand")] (match_operand:SI 2 "immediate_operand")]
"TARGET_NEON && TARGET_FP16" "TARGET_NEON && (TARGET_FP16 || TARGET_BF16_SIMD)"
{ {
if (BYTES_BIG_ENDIAN) if (BYTES_BIG_ENDIAN)
{ {
......
2020-02-27 Mihail Ionescu <mihail.ionescu@arm.com>
* gcc.target/arm/bf16_dup.c: New test.
* gcc.target/arm/bf16_reinterpret.c: Likewise.
2020-02-27 Will Schmidt <will_schmidt@vnet.ibm.com> 2020-02-27 Will Schmidt <will_schmidt@vnet.ibm.com>
* lib/target_supports.exp (check_effective_target_has_arch_pwr5): New. * lib/target_supports.exp (check_effective_target_has_arch_pwr5): New.
......
/* { dg-do assemble { target { arm*-*-* } } } */
/* { dg-require-effective-target arm_v8_2a_bf16_neon_ok } */
/* { dg-add-options arm_v8_2a_bf16_neon } */
/* { dg-additional-options "-save-temps -march=armv8.2-a+bf16+fp16 -mfloat-abi=softfp" } */
#include "arm_neon.h"
float32x2_t
test_vbfdot_vcreate (float32x2_t r, uint64_t a, uint64_t b)
{
bfloat16x4_t _a = vcreate_bf16(a);
bfloat16x4_t _b = vcreate_bf16(b);
return vbfdot_f32 (r, _a, _b);
}
/* { dg-final { scan-assembler {vdot.bf16\td[0-9]+, d[0-9]+, d[0-9]+} } } */
bfloat16x8_t test_vcombine_bf16 (bfloat16x4_t a, bfloat16x4_t b)
{
return vcombine_bf16 (a, b);
}
bfloat16x4_t test_vget_high_bf16 (bfloat16x8_t a)
{
return vget_high_bf16 (a);
}
bfloat16x4_t test_vget_low_bf16 (bfloat16x8_t a)
{
return vget_low_bf16 (a);
}
bfloat16_t test_vget_lane_bf16 (bfloat16x4_t a)
{
return vget_lane_bf16 (a, 1);
}
bfloat16_t test_vgetq_lane_bf16 (bfloat16x8_t a)
{
return vgetq_lane_bf16 (a, 7);
}
bfloat16x4_t test_vset_lane_bf16 (bfloat16_t a, bfloat16x4_t b)
{
return vset_lane_bf16 (a, b, 1);
}
bfloat16x8_t test_vsetq_lane_bf16 (bfloat16_t a, bfloat16x8_t b)
{
return vsetq_lane_bf16 (a, b, 7);
}
bfloat16x4_t vdup_test (bfloat16_t a)
{
return vdup_n_bf16 (a);
}
/* { dg-final { scan-assembler {vdup\.16\td[0-9]+, r[0-9]+} } } */
bfloat16x8_t vdupq_test (bfloat16_t a)
{
return vdupq_n_bf16 (a);
}
/* { dg-final { scan-assembler {vdup\.16\tq[0-9]+, r[0-9]+} } } */
bfloat16x4_t test_vdup_lane_bf16 (bfloat16x4_t a)
{
return vdup_lane_bf16 (a, 1);
}
/* { dg-final { scan-assembler-times {vdup\.16\td[0-9]+, d[0-9]+\[1\]} 1 } } */
bfloat16x8_t test_vdupq_lane_bf16 (bfloat16x4_t a)
{
return vdupq_lane_bf16 (a, 1);
}
/* { dg-final { scan-assembler-times {vdup\.16\tq[0-9]+, d[0-9]+\[1\]} 1 } } */
bfloat16x4_t test_vdup_laneq_bf16 (bfloat16x8_t a)
{
return vdup_laneq_bf16 (a, 3);
}
bfloat16x8_t test_vdupq_laneq_bf16 (bfloat16x8_t a)
{
return vdupq_laneq_bf16 (a, 3);
}
bfloat16_t test_vduph_lane_bf16 (bfloat16x4_t a)
{
return vduph_lane_bf16 (a, 1);
}
bfloat16_t test_vduph_laneq_bf16 (bfloat16x8_t a)
{
return vduph_laneq_bf16 (a, 7);
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment