Commit ff229375 by Delia Burduv Committed by Kyrylo Tkachov

ACLE intrinsics: BFloat16 store (vst<n>{q}_bf16) intrinsics for AArch32

2020-03-06  Delia Burduv  <delia.burduv@arm.com>

	* config/arm/arm_neon.h (bfloat16x4x2_t): New typedef.
	(bfloat16x8x2_t): New typedef.
	(bfloat16x4x3_t): New typedef.
	(bfloat16x8x3_t): New typedef.
	(bfloat16x4x4_t): New typedef.
	(bfloat16x8x4_t): New typedef.
	(vst2_bf16): New.
	(vst2q_bf16): New.
	(vst3_bf16): New.
	(vst3q_bf16): New.
	(vst4_bf16): New.
	(vst4q_bf16): New.
	* config/arm/arm-builtins.c (v2bf_UP): Define.
	(VAR13): New.
	(arm_init_simd_builtin_types): Init Bfloat16x2_t eltype.
	* config/arm/arm-modes.def (V2BF): New mode.
	* config/arm/arm-simd-builtin-types.def
	(Bfloat16x2_t): New entry.
	* config/arm/arm_neon_builtins.def
	(vst2): Changed to VAR13 and added v4bf, v8bf
	(vst3): Changed to VAR13 and added v4bf, v8bf
	(vst4): Changed to VAR13 and added v4bf, v8bf
	* config/arm/iterators.md (VDXBF): New iterator.
	(VQ2BF): New iterator.
	*config/arm/neon.md (neon_vst2<mode>): Used new iterators.
	(neon_vst2<mode>): Used new iterators.
	(neon_vst3<mode>): Used new iterators.
	(neon_vst3<mode>): Used new iterators.
	(neon_vst3qa<mode>): Used new iterators.
	(neon_vst3qb<mode>): Used new iterators.
	(neon_vst4<mode>): Used new iterators.
	(neon_vst4<mode>): Used new iterators.
	(neon_vst4qa<mode>): Used new iterators.
	(neon_vst4qb<mode>): Used new iterators.

	* gcc.target/arm/simd/bf16_vstn_1.c: New test.
parent 12007097
2020-03-06 Delia Burduv <delia.burduv@arm.com> 2020-03-06 Delia Burduv <delia.burduv@arm.com>
* config/arm/arm_neon.h (bfloat16x4x2_t): New typedef.
(bfloat16x8x2_t): New typedef.
(bfloat16x4x3_t): New typedef.
(bfloat16x8x3_t): New typedef.
(bfloat16x4x4_t): New typedef.
(bfloat16x8x4_t): New typedef.
(vst2_bf16): New.
(vst2q_bf16): New.
(vst3_bf16): New.
(vst3q_bf16): New.
(vst4_bf16): New.
(vst4q_bf16): New.
* config/arm/arm-builtins.c (v2bf_UP): Define.
(VAR13): New.
(arm_init_simd_builtin_types): Init Bfloat16x2_t eltype.
* config/arm/arm-modes.def (V2BF): New mode.
* config/arm/arm-simd-builtin-types.def
(Bfloat16x2_t): New entry.
* config/arm/arm_neon_builtins.def
(vst2): Changed to VAR13 and added v4bf, v8bf
(vst3): Changed to VAR13 and added v4bf, v8bf
(vst4): Changed to VAR13 and added v4bf, v8bf
* config/arm/iterators.md (VDXBF): New iterator.
(VQ2BF): New iterator.
*config/arm/neon.md (neon_vst2<mode>): Used new iterators.
(neon_vst2<mode>): Used new iterators.
(neon_vst3<mode>): Used new iterators.
(neon_vst3<mode>): Used new iterators.
(neon_vst3qa<mode>): Used new iterators.
(neon_vst3qb<mode>): Used new iterators.
(neon_vst4<mode>): Used new iterators.
(neon_vst4<mode>): Used new iterators.
(neon_vst4qa<mode>): Used new iterators.
(neon_vst4qb<mode>): Used new iterators.
2020-03-06 Delia Burduv <delia.burduv@arm.com>
* config/aarch64/aarch64-simd-builtins.def * config/aarch64/aarch64-simd-builtins.def
(bfcvtn): New built-in function. (bfcvtn): New built-in function.
(bfcvtn_q): New built-in function. (bfcvtn_q): New built-in function.
......
...@@ -342,6 +342,7 @@ arm_set_sat_qualifiers[SIMD_MAX_BUILTIN_ARGS] ...@@ -342,6 +342,7 @@ arm_set_sat_qualifiers[SIMD_MAX_BUILTIN_ARGS]
#define v4bf_UP E_V4BFmode #define v4bf_UP E_V4BFmode
#define v2si_UP E_V2SImode #define v2si_UP E_V2SImode
#define v2sf_UP E_V2SFmode #define v2sf_UP E_V2SFmode
#define v2bf_UP E_V2BFmode
#define di_UP E_DImode #define di_UP E_DImode
#define v16qi_UP E_V16QImode #define v16qi_UP E_V16QImode
#define v8hi_UP E_V8HImode #define v8hi_UP E_V8HImode
...@@ -405,6 +406,9 @@ typedef struct { ...@@ -405,6 +406,9 @@ typedef struct {
#define VAR12(T, N, A, B, C, D, E, F, G, H, I, J, K, L) \ #define VAR12(T, N, A, B, C, D, E, F, G, H, I, J, K, L) \
VAR11 (T, N, A, B, C, D, E, F, G, H, I, J, K) \ VAR11 (T, N, A, B, C, D, E, F, G, H, I, J, K) \
VAR1 (T, N, L) VAR1 (T, N, L)
#define VAR13(T, N, A, B, C, D, E, F, G, H, I, J, K, L, M) \
VAR12 (T, N, A, B, C, D, E, F, G, H, I, J, K, L) \
VAR1 (T, N, M)
/* The builtin data can be found in arm_neon_builtins.def, arm_vfp_builtins.def /* The builtin data can be found in arm_neon_builtins.def, arm_vfp_builtins.def
and arm_acle_builtins.def. The entries in arm_neon_builtins.def require and arm_acle_builtins.def. The entries in arm_neon_builtins.def require
...@@ -1037,6 +1041,7 @@ arm_init_simd_builtin_types (void) ...@@ -1037,6 +1041,7 @@ arm_init_simd_builtin_types (void)
arm_simd_types[Float32x4_t].eltype = float_type_node; arm_simd_types[Float32x4_t].eltype = float_type_node;
/* Init Bfloat vector types with underlying __bf16 scalar type. */ /* Init Bfloat vector types with underlying __bf16 scalar type. */
arm_simd_types[Bfloat16x2_t].eltype = arm_bf16_type_node;
arm_simd_types[Bfloat16x4_t].eltype = arm_bf16_type_node; arm_simd_types[Bfloat16x4_t].eltype = arm_bf16_type_node;
arm_simd_types[Bfloat16x8_t].eltype = arm_bf16_type_node; arm_simd_types[Bfloat16x8_t].eltype = arm_bf16_type_node;
......
...@@ -80,6 +80,7 @@ VECTOR_MODE (FLOAT, HF, 2); /* V2HF */ ...@@ -80,6 +80,7 @@ VECTOR_MODE (FLOAT, HF, 2); /* V2HF */
FLOAT_MODE (BF, 2, 0); FLOAT_MODE (BF, 2, 0);
ADJUST_FLOAT_FORMAT (BF, &arm_bfloat_half_format); ADJUST_FLOAT_FORMAT (BF, &arm_bfloat_half_format);
VECTOR_MODE (FLOAT, BF, 2); /* V2BF. */
VECTOR_MODE (FLOAT, BF, 4); /* V4BF. */ VECTOR_MODE (FLOAT, BF, 4); /* V4BF. */
VECTOR_MODE (FLOAT, BF, 8); /* V8BF. */ VECTOR_MODE (FLOAT, BF, 8); /* V8BF. */
......
...@@ -48,5 +48,6 @@ ...@@ -48,5 +48,6 @@
ENTRY (Float16x8_t, V8HF, none, 128, float16, 19) ENTRY (Float16x8_t, V8HF, none, 128, float16, 19)
ENTRY (Float32x4_t, V4SF, none, 128, float32, 19) ENTRY (Float32x4_t, V4SF, none, 128, float32, 19)
ENTRY (Bfloat16x2_t, V2BF, none, 32, bfloat16, 20)
ENTRY (Bfloat16x4_t, V4BF, none, 64, bfloat16, 20) ENTRY (Bfloat16x4_t, V4BF, none, 64, bfloat16, 20)
ENTRY (Bfloat16x8_t, V8BF, none, 128, bfloat16, 20) ENTRY (Bfloat16x8_t, V8BF, none, 128, bfloat16, 20)
...@@ -19382,6 +19382,36 @@ vbfdotq_lane_f32 (float32x4_t __r, bfloat16x8_t __a, bfloat16x4_t __b, ...@@ -19382,6 +19382,36 @@ vbfdotq_lane_f32 (float32x4_t __r, bfloat16x8_t __a, bfloat16x4_t __b,
#pragma GCC push_options #pragma GCC push_options
#pragma GCC target ("arch=armv8.2-a+bf16") #pragma GCC target ("arch=armv8.2-a+bf16")
typedef struct bfloat16x4x2_t
{
bfloat16x4_t val[2];
} bfloat16x4x2_t;
typedef struct bfloat16x8x2_t
{
bfloat16x8_t val[2];
} bfloat16x8x2_t;
typedef struct bfloat16x4x3_t
{
bfloat16x4_t val[3];
} bfloat16x4x3_t;
typedef struct bfloat16x8x3_t
{
bfloat16x8_t val[3];
} bfloat16x8x3_t;
typedef struct bfloat16x4x4_t
{
bfloat16x4_t val[4];
} bfloat16x4x4_t;
typedef struct bfloat16x8x4_t
{
bfloat16x8_t val[4];
} bfloat16x8x4_t;
__extension__ extern __inline float32x4_t __extension__ extern __inline float32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vcvt_f32_bf16 (bfloat16x4_t __a) vcvt_f32_bf16 (bfloat16x4_t __a)
...@@ -19479,6 +19509,54 @@ vbfmlaltq_laneq_f32 (float32x4_t __r, bfloat16x8_t __a, bfloat16x8_t __b, ...@@ -19479,6 +19509,54 @@ vbfmlaltq_laneq_f32 (float32x4_t __r, bfloat16x8_t __a, bfloat16x8_t __b,
return __builtin_neon_vfmat_laneqv8bf (__r, __a, __b, __index); return __builtin_neon_vfmat_laneqv8bf (__r, __a, __b, __index);
} }
__extension__ extern __inline void
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vst2_bf16 (bfloat16_t * __ptr, bfloat16x4x2_t __val)
{
union { bfloat16x4x2_t __i; __builtin_neon_ti __o; } __bu = { __val };
return __builtin_neon_vst2v4bf (__ptr, __bu.__o);
}
__extension__ extern __inline void
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vst2q_bf16 (bfloat16_t * __ptr, bfloat16x8x2_t __val)
{
union { bfloat16x8x2_t __i; __builtin_neon_oi __o; } __bu = { __val };
return __builtin_neon_vst2v8bf (__ptr, __bu.__o);
}
__extension__ extern __inline void
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vst3_bf16 (bfloat16_t * __ptr, bfloat16x4x3_t __val)
{
union { bfloat16x4x3_t __i; __builtin_neon_ei __o; } __bu = { __val };
return __builtin_neon_vst3v4bf (__ptr, __bu.__o);
}
__extension__ extern __inline void
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vst3q_bf16 (bfloat16_t * __ptr, bfloat16x8x3_t __val)
{
union { bfloat16x8x3_t __i; __builtin_neon_ci __o; } __bu = { __val };
return __builtin_neon_vst3v8bf (__ptr, __bu.__o);
}
__extension__ extern __inline void
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vst4_bf16 (bfloat16_t * __ptr, bfloat16x4x4_t __val)
{
union { bfloat16x4x4_t __i; __builtin_neon_oi __o; } __bu = { __val };
return __builtin_neon_vst4v4bf (__ptr, __bu.__o);
}
__extension__ extern __inline void
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vst4q_bf16 (bfloat16_t * __ptr, bfloat16x8x4_t __val)
{
union { bfloat16x8x4_t __i; __builtin_neon_xi __o; } __bu = { __val };
return __builtin_neon_vst4v8bf (__ptr, __bu.__o);
}
#pragma GCC pop_options #pragma GCC pop_options
#ifdef __cplusplus #ifdef __cplusplus
......
...@@ -325,8 +325,8 @@ VAR11 (LOAD1, vld2, ...@@ -325,8 +325,8 @@ VAR11 (LOAD1, vld2,
VAR9 (LOAD1LANE, vld2_lane, VAR9 (LOAD1LANE, vld2_lane,
v8qi, v4hi, v4hf, v2si, v2sf, v8hi, v8hf, v4si, v4sf) v8qi, v4hi, v4hf, v2si, v2sf, v8hi, v8hf, v4si, v4sf)
VAR6 (LOAD1, vld2_dup, v8qi, v4hi, v4hf, v2si, v2sf, di) VAR6 (LOAD1, vld2_dup, v8qi, v4hi, v4hf, v2si, v2sf, di)
VAR11 (STORE1, vst2, VAR13 (STORE1, vst2,
v8qi, v4hi, v4hf, v2si, v2sf, di, v16qi, v8hi, v8hf, v4si, v4sf) v8qi, v4hi, v4hf, v4bf, v2si, v2sf, di, v16qi, v8hi, v8hf, v8bf, v4si, v4sf)
VAR9 (STORE1LANE, vst2_lane, VAR9 (STORE1LANE, vst2_lane,
v8qi, v4hi, v4hf, v2si, v2sf, v8hi, v8hf, v4si, v4sf) v8qi, v4hi, v4hf, v2si, v2sf, v8hi, v8hf, v4si, v4sf)
VAR11 (LOAD1, vld3, VAR11 (LOAD1, vld3,
...@@ -334,8 +334,8 @@ VAR11 (LOAD1, vld3, ...@@ -334,8 +334,8 @@ VAR11 (LOAD1, vld3,
VAR9 (LOAD1LANE, vld3_lane, VAR9 (LOAD1LANE, vld3_lane,
v8qi, v4hi, v4hf, v2si, v2sf, v8hi, v8hf, v4si, v4sf) v8qi, v4hi, v4hf, v2si, v2sf, v8hi, v8hf, v4si, v4sf)
VAR6 (LOAD1, vld3_dup, v8qi, v4hi, v4hf, v2si, v2sf, di) VAR6 (LOAD1, vld3_dup, v8qi, v4hi, v4hf, v2si, v2sf, di)
VAR11 (STORE1, vst3, VAR13 (STORE1, vst3,
v8qi, v4hi, v4hf, v2si, v2sf, di, v16qi, v8hi, v8hf, v4si, v4sf) v8qi, v4hi, v4hf, v4bf, v2si, v2sf, di, v16qi, v8hi, v8hf, v8bf, v4si, v4sf)
VAR9 (STORE1LANE, vst3_lane, VAR9 (STORE1LANE, vst3_lane,
v8qi, v4hi, v4hf, v2si, v2sf, v8hi, v8hf, v4si, v4sf) v8qi, v4hi, v4hf, v2si, v2sf, v8hi, v8hf, v4si, v4sf)
VAR11 (LOAD1, vld4, VAR11 (LOAD1, vld4,
...@@ -343,8 +343,8 @@ VAR11 (LOAD1, vld4, ...@@ -343,8 +343,8 @@ VAR11 (LOAD1, vld4,
VAR9 (LOAD1LANE, vld4_lane, VAR9 (LOAD1LANE, vld4_lane,
v8qi, v4hi, v4hf, v2si, v2sf, v8hi, v8hf, v4si, v4sf) v8qi, v4hi, v4hf, v2si, v2sf, v8hi, v8hf, v4si, v4sf)
VAR6 (LOAD1, vld4_dup, v8qi, v4hi, v4hf, v2si, v2sf, di) VAR6 (LOAD1, vld4_dup, v8qi, v4hi, v4hf, v2si, v2sf, di)
VAR11 (STORE1, vst4, VAR13 (STORE1, vst4,
v8qi, v4hi, v4hf, v2si, v2sf, di, v16qi, v8hi, v8hf, v4si, v4sf) v8qi, v4hi, v4hf, v4bf, v2si, v2sf, di, v16qi, v8hi, v8hf, v8bf, v4si, v4sf)
VAR9 (STORE1LANE, vst4_lane, VAR9 (STORE1LANE, vst4_lane,
v8qi, v4hi, v4hf, v2si, v2sf, v8hi, v8hf, v4si, v4sf) v8qi, v4hi, v4hf, v2si, v2sf, v8hi, v8hf, v4si, v4sf)
VAR2 (TERNOP, sdot, v8qi, v16qi) VAR2 (TERNOP, sdot, v8qi, v16qi)
......
...@@ -84,6 +84,9 @@ ...@@ -84,6 +84,9 @@
;; Double-width vector modes plus 64-bit elements. ;; Double-width vector modes plus 64-bit elements.
(define_mode_iterator VDX [V8QI V4HI V4HF V4BF V2SI V2SF DI]) (define_mode_iterator VDX [V8QI V4HI V4HF V4BF V2SI V2SF DI])
;; Double-width vector modes plus 64-bit elements, including V4BF.
(define_mode_iterator VDXBF [V8QI V4HI V4HF (V4BF "TARGET_BF16_SIMD") V2SI V2SF DI])
;; Double-width vector modes plus 64-bit elements, ;; Double-width vector modes plus 64-bit elements,
;; with V4BFmode added, suitable for moves. ;; with V4BFmode added, suitable for moves.
(define_mode_iterator VDXMOV [V8QI V4HI V4HF V4BF V2SI V2SF DI]) (define_mode_iterator VDXMOV [V8QI V4HI V4HF V4BF V2SI V2SF DI])
...@@ -100,6 +103,9 @@ ...@@ -100,6 +103,9 @@
;; Quad-width vector modes, including V8HF. ;; Quad-width vector modes, including V8HF.
(define_mode_iterator VQ2 [V16QI V8HI V8HF V4SI V4SF]) (define_mode_iterator VQ2 [V16QI V8HI V8HF V4SI V4SF])
;; Quad-width vector modes, including V8BF.
(define_mode_iterator VQ2BF [V16QI V8HI V8HF (V8BF "TARGET_BF16_SIMD") V4SI V4SF])
;; Quad-width vector modes with 16- or 32-bit elements ;; Quad-width vector modes with 16- or 32-bit elements
(define_mode_iterator VQ_HS [V8HI V8HF V4SI V4SF]) (define_mode_iterator VQ_HS [V8HI V8HF V4SI V4SF])
......
...@@ -5541,7 +5541,7 @@ if (BYTES_BIG_ENDIAN) ...@@ -5541,7 +5541,7 @@ if (BYTES_BIG_ENDIAN)
(define_insn "neon_vst2<mode>" (define_insn "neon_vst2<mode>"
[(set (match_operand:TI 0 "neon_struct_operand" "=Um") [(set (match_operand:TI 0 "neon_struct_operand" "=Um")
(unspec:TI [(match_operand:TI 1 "s_register_operand" "w") (unspec:TI [(match_operand:TI 1 "s_register_operand" "w")
(unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] (unspec:VDXBF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
UNSPEC_VST2))] UNSPEC_VST2))]
"TARGET_NEON" "TARGET_NEON"
{ {
...@@ -5566,7 +5566,7 @@ if (BYTES_BIG_ENDIAN) ...@@ -5566,7 +5566,7 @@ if (BYTES_BIG_ENDIAN)
(define_insn "neon_vst2<mode>" (define_insn "neon_vst2<mode>"
[(set (match_operand:OI 0 "neon_struct_operand" "=Um") [(set (match_operand:OI 0 "neon_struct_operand" "=Um")
(unspec:OI [(match_operand:OI 1 "s_register_operand" "w") (unspec:OI [(match_operand:OI 1 "s_register_operand" "w")
(unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] (unspec:VQ2BF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
UNSPEC_VST2))] UNSPEC_VST2))]
"TARGET_NEON" "TARGET_NEON"
"vst2.<V_sz_elem>\t%h1, %A0" "vst2.<V_sz_elem>\t%h1, %A0"
...@@ -5810,7 +5810,7 @@ if (BYTES_BIG_ENDIAN) ...@@ -5810,7 +5810,7 @@ if (BYTES_BIG_ENDIAN)
(define_insn "neon_vst3<mode>" (define_insn "neon_vst3<mode>"
[(set (match_operand:EI 0 "neon_struct_operand" "=Um") [(set (match_operand:EI 0 "neon_struct_operand" "=Um")
(unspec:EI [(match_operand:EI 1 "s_register_operand" "w") (unspec:EI [(match_operand:EI 1 "s_register_operand" "w")
(unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] (unspec:VDXBF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
UNSPEC_VST3))] UNSPEC_VST3))]
"TARGET_NEON" "TARGET_NEON"
{ {
...@@ -5837,7 +5837,7 @@ if (BYTES_BIG_ENDIAN) ...@@ -5837,7 +5837,7 @@ if (BYTES_BIG_ENDIAN)
(define_expand "neon_vst3<mode>" (define_expand "neon_vst3<mode>"
[(match_operand:CI 0 "neon_struct_operand") [(match_operand:CI 0 "neon_struct_operand")
(match_operand:CI 1 "s_register_operand") (match_operand:CI 1 "s_register_operand")
(unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] (unspec:VQ2BF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
"TARGET_NEON" "TARGET_NEON"
{ {
rtx mem; rtx mem;
...@@ -5852,7 +5852,7 @@ if (BYTES_BIG_ENDIAN) ...@@ -5852,7 +5852,7 @@ if (BYTES_BIG_ENDIAN)
(define_insn "neon_vst3qa<mode>" (define_insn "neon_vst3qa<mode>"
[(set (match_operand:EI 0 "neon_struct_operand" "=Um") [(set (match_operand:EI 0 "neon_struct_operand" "=Um")
(unspec:EI [(match_operand:CI 1 "s_register_operand" "w") (unspec:EI [(match_operand:CI 1 "s_register_operand" "w")
(unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] (unspec:VQ2BF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
UNSPEC_VST3A))] UNSPEC_VST3A))]
"TARGET_NEON" "TARGET_NEON"
{ {
...@@ -5871,7 +5871,7 @@ if (BYTES_BIG_ENDIAN) ...@@ -5871,7 +5871,7 @@ if (BYTES_BIG_ENDIAN)
(define_insn "neon_vst3qb<mode>" (define_insn "neon_vst3qb<mode>"
[(set (match_operand:EI 0 "neon_struct_operand" "=Um") [(set (match_operand:EI 0 "neon_struct_operand" "=Um")
(unspec:EI [(match_operand:CI 1 "s_register_operand" "w") (unspec:EI [(match_operand:CI 1 "s_register_operand" "w")
(unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] (unspec:VQ2BF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
UNSPEC_VST3B))] UNSPEC_VST3B))]
"TARGET_NEON" "TARGET_NEON"
{ {
...@@ -6135,7 +6135,7 @@ if (BYTES_BIG_ENDIAN) ...@@ -6135,7 +6135,7 @@ if (BYTES_BIG_ENDIAN)
(define_insn "neon_vst4<mode>" (define_insn "neon_vst4<mode>"
[(set (match_operand:OI 0 "neon_struct_operand" "=Um") [(set (match_operand:OI 0 "neon_struct_operand" "=Um")
(unspec:OI [(match_operand:OI 1 "s_register_operand" "w") (unspec:OI [(match_operand:OI 1 "s_register_operand" "w")
(unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] (unspec:VDXBF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
UNSPEC_VST4))] UNSPEC_VST4))]
"TARGET_NEON" "TARGET_NEON"
{ {
...@@ -6163,7 +6163,7 @@ if (BYTES_BIG_ENDIAN) ...@@ -6163,7 +6163,7 @@ if (BYTES_BIG_ENDIAN)
(define_expand "neon_vst4<mode>" (define_expand "neon_vst4<mode>"
[(match_operand:XI 0 "neon_struct_operand") [(match_operand:XI 0 "neon_struct_operand")
(match_operand:XI 1 "s_register_operand") (match_operand:XI 1 "s_register_operand")
(unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] (unspec:VQ2BF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
"TARGET_NEON" "TARGET_NEON"
{ {
rtx mem; rtx mem;
...@@ -6178,7 +6178,7 @@ if (BYTES_BIG_ENDIAN) ...@@ -6178,7 +6178,7 @@ if (BYTES_BIG_ENDIAN)
(define_insn "neon_vst4qa<mode>" (define_insn "neon_vst4qa<mode>"
[(set (match_operand:OI 0 "neon_struct_operand" "=Um") [(set (match_operand:OI 0 "neon_struct_operand" "=Um")
(unspec:OI [(match_operand:XI 1 "s_register_operand" "w") (unspec:OI [(match_operand:XI 1 "s_register_operand" "w")
(unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] (unspec:VQ2BF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
UNSPEC_VST4A))] UNSPEC_VST4A))]
"TARGET_NEON" "TARGET_NEON"
{ {
...@@ -6198,7 +6198,7 @@ if (BYTES_BIG_ENDIAN) ...@@ -6198,7 +6198,7 @@ if (BYTES_BIG_ENDIAN)
(define_insn "neon_vst4qb<mode>" (define_insn "neon_vst4qb<mode>"
[(set (match_operand:OI 0 "neon_struct_operand" "=Um") [(set (match_operand:OI 0 "neon_struct_operand" "=Um")
(unspec:OI [(match_operand:XI 1 "s_register_operand" "w") (unspec:OI [(match_operand:XI 1 "s_register_operand" "w")
(unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] (unspec:VQ2BF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
UNSPEC_VST4B))] UNSPEC_VST4B))]
"TARGET_NEON" "TARGET_NEON"
{ {
......
2020-03-06 Delia Burduv <delia.burduv@arm.com>
* gcc.target/arm/simd/bf16_vstn_1.c: New test.
2020-03-06 Kito Cheng <kito.cheng@sifive.com> 2020-03-06 Kito Cheng <kito.cheng@sifive.com>
* gcc.target/riscv/pr93304.c: Update expected output and comment. * gcc.target/riscv/pr93304.c: Update expected output and comment.
......
/* { dg-do assemble } */
/* { dg-require-effective-target arm_v8_2a_bf16_neon_ok } */
/* { dg-add-options arm_v8_2a_bf16_neon } */
/* { dg-additional-options "-save-temps -O2 -mfloat-abi=hard" } */
/* { dg-final { check-function-bodies "**" "" } } */
#include "arm_neon.h"
/*
**test_vst2_bf16:
** ...
** vst2.16 {d0-d1}, \[r0\]
** bx lr
*/
void
test_vst2_bf16 (bfloat16_t *ptr, bfloat16x4x2_t val)
{
vst2_bf16 (ptr, val);
}
/*
**test_vst2q_bf16:
** ...
** vst2.16 {d0-d3}, \[r0\]
** bx lr
*/
void
test_vst2q_bf16 (bfloat16_t *ptr, bfloat16x8x2_t val)
{
vst2q_bf16 (ptr, val);
}
/*
**test_vst3_bf16:
** ...
** vst3.16 {d0-d2}, \[r0\]
** bx lr
*/
void
test_vst3_bf16 (bfloat16_t *ptr, bfloat16x4x3_t val)
{
vst3_bf16 (ptr, val);
}
/*
**test_vst3q_bf16:
** ...
** vst3.16 {d17, d19, d21}, \[r0\]
** bx lr
*/
void
test_vst3q_bf16 (bfloat16_t *ptr, bfloat16x8x3_t val)
{
vst3q_bf16 (ptr, val);
}
/*
**test_vst4_bf16:
** ...
** vst4.16 {d0-d3}, \[r0\]
** bx lr
*/
void
test_vst4_bf16 (bfloat16_t *ptr, bfloat16x4x4_t val)
{
vst4_bf16 (ptr, val);
}
/*
**test_vst4q_bf16:
** ...
** vst4.16 {d1, d3, d5, d7}, \[r0\]
** bx lr
*/
void
test_vst4q_bf16 (bfloat16_t *ptr, bfloat16x8x4_t val)
{
vst4q_bf16 (ptr, val);
}
int main()
{
return 0;
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment