Commit 1f520d34 by Delia Burduv Committed by Richard Sandiford

aarch64: ACLE intrinsics for BFCVTN, BFCVTN2 and BFCVT

This patch adds the Armv8.6-a ACLE intrinsics for bfcvtn, bfcvtn2 and
bfcvt as part of the BFloat16 extension.
(https://developer.arm.com/architectures/instruction-sets/simd-isas/neon/intrinsics)
The intrinsics are declared in arm_bf16.h and arm_neon.h and the RTL
patterns are defined in aarch64-simd.md.

2020-03-06  Delia Burduv  <delia.burduv@arm.com>

gcc/
	* config/aarch64/aarch64-simd-builtins.def
	(bfcvtn): New built-in function.
	(bfcvtn_q): New built-in function.
	(bfcvtn2): New built-in function.
	(bfcvt): New built-in function.
	* config/aarch64/aarch64-simd.md
	(aarch64_bfcvtn<q><mode>): New pattern.
	(aarch64_bfcvtn2v8bf): New pattern.
	(aarch64_bfcvtbf): New pattern.
	* config/aarch64/arm_bf16.h (float32_t): New typedef.
	(vcvth_bf16_f32): New intrinsic.
	* config/aarch64/arm_bf16.h (vcvt_bf16_f32): New intrinsic.
	(vcvtq_low_bf16_f32): New intrinsic.
	(vcvtq_high_bf16_f32): New intrinsic.
	* config/aarch64/iterators.md (V4SF_TO_BF): New mode iterator.
	(UNSPEC_BFCVTN): New UNSPEC.
	(UNSPEC_BFCVTN2): New UNSPEC.
	(UNSPEC_BFCVT): New UNSPEC.
	* config/arm/types.md (bf_cvt): New type.

gcc/testsuite/
	* gcc.target/aarch64/advsimd-intrinsics/bfcvt-compile.c: New test.
	* gcc.target/aarch64/advsimd-intrinsics/bfcvt-nobf16.c: New test.
	* gcc.target/aarch64/advsimd-intrinsics/bfcvt-nosimd.c: New test.
	* gcc.target/aarch64/advsimd-intrinsics/bfcvtnq2-untied.c: New test.
parent 655e5c29
2020-03-06 Delia Burduv <delia.burduv@arm.com>
* config/aarch64/aarch64-simd-builtins.def
(bfcvtn): New built-in function.
(bfcvtn_q): New built-in function.
(bfcvtn2): New built-in function.
(bfcvt): New built-in function.
* config/aarch64/aarch64-simd.md
(aarch64_bfcvtn<q><mode>): New pattern.
(aarch64_bfcvtn2v8bf): New pattern.
(aarch64_bfcvtbf): New pattern.
* config/aarch64/arm_bf16.h (float32_t): New typedef.
(vcvth_bf16_f32): New intrinsic.
* config/aarch64/arm_bf16.h (vcvt_bf16_f32): New intrinsic.
(vcvtq_low_bf16_f32): New intrinsic.
(vcvtq_high_bf16_f32): New intrinsic.
* config/aarch64/iterators.md (V4SF_TO_BF): New mode iterator.
(UNSPEC_BFCVTN): New UNSPEC.
(UNSPEC_BFCVTN2): New UNSPEC.
(UNSPEC_BFCVT): New UNSPEC.
* config/arm/types.md (bf_cvt): New type.
2020-03-05 H.J. Lu <hongjiu.lu@intel.com> 2020-03-05 H.J. Lu <hongjiu.lu@intel.com>
PR target/89229 PR target/89229
......
...@@ -714,3 +714,9 @@ ...@@ -714,3 +714,9 @@
VAR1 (TERNOP, simd_smmla, 0, v16qi) VAR1 (TERNOP, simd_smmla, 0, v16qi)
VAR1 (TERNOPU, simd_ummla, 0, v16qi) VAR1 (TERNOPU, simd_ummla, 0, v16qi)
VAR1 (TERNOP_SSUS, simd_usmmla, 0, v16qi) VAR1 (TERNOP_SSUS, simd_usmmla, 0, v16qi)
/* Implemented by aarch64_bfcvtn{q}{2}<mode> */
VAR1 (UNOP, bfcvtn, 0, v4bf)
VAR1 (UNOP, bfcvtn_q, 0, v8bf)
VAR1 (BINOP, bfcvtn2, 0, v8bf)
VAR1 (UNOP, bfcvt, 0, bf)
...@@ -7207,3 +7207,32 @@ ...@@ -7207,3 +7207,32 @@
"<sur>mmla\\t%0.4s, %2.16b, %3.16b" "<sur>mmla\\t%0.4s, %2.16b, %3.16b"
[(set_attr "type" "neon_mla_s_q")] [(set_attr "type" "neon_mla_s_q")]
) )
;; bfcvtn
(define_insn "aarch64_bfcvtn<q><mode>"
[(set (match_operand:V4SF_TO_BF 0 "register_operand" "=w")
(unspec:V4SF_TO_BF [(match_operand:V4SF 1 "register_operand" "w")]
UNSPEC_BFCVTN))]
"TARGET_BF16_SIMD"
"bfcvtn\\t%0.4h, %1.4s"
[(set_attr "type" "neon_fp_cvt_narrow_s_q")]
)
(define_insn "aarch64_bfcvtn2v8bf"
[(set (match_operand:V8BF 0 "register_operand" "=w")
(unspec:V8BF [(match_operand:V8BF 1 "register_operand" "0")
(match_operand:V4SF 2 "register_operand" "w")]
UNSPEC_BFCVTN2))]
"TARGET_BF16_SIMD"
"bfcvtn2\\t%0.8h, %2.4s"
[(set_attr "type" "neon_fp_cvt_narrow_s_q")]
)
(define_insn "aarch64_bfcvtbf"
[(set (match_operand:BF 0 "register_operand" "=w")
(unspec:BF [(match_operand:SF 1 "register_operand" "w")]
UNSPEC_BFCVT))]
"TARGET_BF16_FP"
"bfcvt\\t%h0, %s1"
[(set_attr "type" "f_cvt")]
)
...@@ -28,5 +28,18 @@ ...@@ -28,5 +28,18 @@
#define _AARCH64_BF16_H_ #define _AARCH64_BF16_H_
typedef __bf16 bfloat16_t; typedef __bf16 bfloat16_t;
typedef float float32_t;
#pragma GCC push_options
#pragma GCC target ("+nothing+bf16+nosimd")
__extension__ extern __inline bfloat16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vcvth_bf16_f32 (float32_t __a)
{
return __builtin_aarch64_bfcvtbf (__a);
}
#pragma GCC pop_options
#endif #endif
...@@ -35634,6 +35634,27 @@ vbfmlaltq_laneq_f32 (float32x4_t __r, bfloat16x8_t __a, bfloat16x8_t __b, ...@@ -35634,6 +35634,27 @@ vbfmlaltq_laneq_f32 (float32x4_t __r, bfloat16x8_t __a, bfloat16x8_t __b,
return __builtin_aarch64_bfmlalt_lane_qv4sf (__r, __a, __b, __index); return __builtin_aarch64_bfmlalt_lane_qv4sf (__r, __a, __b, __index);
} }
__extension__ extern __inline bfloat16x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vcvt_bf16_f32 (float32x4_t __a)
{
return __builtin_aarch64_bfcvtnv4bf (__a);
}
__extension__ extern __inline bfloat16x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vcvtq_low_bf16_f32 (float32x4_t __a)
{
return __builtin_aarch64_bfcvtn_qv8bf (__a);
}
__extension__ extern __inline bfloat16x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vcvtq_high_bf16_f32 (bfloat16x8_t __inactive, float32x4_t __a)
{
return __builtin_aarch64_bfcvtn2v8bf (__inactive, __a);
}
#pragma GCC pop_options #pragma GCC pop_options
/* AdvSIMD 8-bit Integer Matrix Multiply (I8MM) intrinsics. */ /* AdvSIMD 8-bit Integer Matrix Multiply (I8MM) intrinsics. */
...@@ -462,6 +462,9 @@ ...@@ -462,6 +462,9 @@
;; SVE predicate modes that control 16-bit, 32-bit or 64-bit elements. ;; SVE predicate modes that control 16-bit, 32-bit or 64-bit elements.
(define_mode_iterator PRED_HSD [VNx8BI VNx4BI VNx2BI]) (define_mode_iterator PRED_HSD [VNx8BI VNx4BI VNx2BI])
;; Bfloat16 modes to which V4SF can be converted
(define_mode_iterator V4SF_TO_BF [V4BF V8BF])
;; ------------------------------------------------------------------ ;; ------------------------------------------------------------------
;; Unspec enumerations for Advance SIMD. These could well go into ;; Unspec enumerations for Advance SIMD. These could well go into
;; aarch64.md but for their use in int_iterators here. ;; aarch64.md but for their use in int_iterators here.
...@@ -828,6 +831,9 @@ ...@@ -828,6 +831,9 @@
UNSPEC_BFMLALB ; Used in aarch64-sve.md. UNSPEC_BFMLALB ; Used in aarch64-sve.md.
UNSPEC_BFMLALT ; Used in aarch64-sve.md. UNSPEC_BFMLALT ; Used in aarch64-sve.md.
UNSPEC_BFMMLA ; Used in aarch64-sve.md. UNSPEC_BFMMLA ; Used in aarch64-sve.md.
UNSPEC_BFCVTN ; Used in aarch64-simd.md.
UNSPEC_BFCVTN2 ; Used in aarch64-simd.md.
UNSPEC_BFCVT ; Used in aarch64-simd.md.
]) ])
;; ------------------------------------------------------------------ ;; ------------------------------------------------------------------
......
2020-03-06 Delia Burduv <delia.burduv@arm.com>
* gcc.target/aarch64/advsimd-intrinsics/bfcvt-compile.c: New test.
* gcc.target/aarch64/advsimd-intrinsics/bfcvt-nobf16.c: New test.
* gcc.target/aarch64/advsimd-intrinsics/bfcvt-nosimd.c: New test.
* gcc.target/aarch64/advsimd-intrinsics/bfcvtnq2-untied.c: New test.
2020-03-06 Kito Cheng <kito.cheng@sifive.com> 2020-03-06 Kito Cheng <kito.cheng@sifive.com>
PR tree-optimization/90883 PR tree-optimization/90883
......
/* { dg-do assemble { target { aarch64*-*-* } } } */
/* { dg-require-effective-target arm_v8_2a_bf16_neon_ok } */
/* { dg-add-options arm_v8_2a_bf16_neon } */
/* { dg-additional-options "-save-temps" } */
/* { dg-final { check-function-bodies "**" "" {-O[^0]} } } */
/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */
#include <arm_neon.h>
/*
**test_bfcvtn:
** bfcvtn v0.4h, v0.4s
** ret
*/
bfloat16x4_t test_bfcvtn (float32x4_t a)
{
return vcvt_bf16_f32 (a);
}
/*
**test_bfcvtnq:
** bfcvtn v0.4h, v0.4s
** ret
*/
bfloat16x8_t test_bfcvtnq (float32x4_t a)
{
return vcvtq_low_bf16_f32 (a);
}
/*
**test_bfcvtnq2:
** bfcvtn2 v0.8h, v1.4s
** ret
*/
bfloat16x8_t test_bfcvtnq2 (bfloat16x8_t inactive, float32x4_t a)
{
return vcvtq_high_bf16_f32 (inactive, a);
}
/*
**test_bfcvt:
** bfcvt h0, s0
** ret
*/
bfloat16_t test_bfcvt (float32_t a)
{
return vcvth_bf16_f32 (a);
}
/* { dg-do assemble { target { aarch64*-*-* } } } */
/* { dg-additional-options "-march=armv8.2-a+nobf16" } */
#include <arm_neon.h>
bfloat16_t test_bfcvt (float32_t a)
{
/* { dg-error "inlining failed .* 'vcvth_bf16_f32" "" { target *-*-* } 0 } */
return vcvth_bf16_f32 (a);
}
/* { dg-do assemble { target { aarch64*-*-* } } } */
/* { dg-require-effective-target aarch64_asm_bf16_ok } */
/* { dg-additional-options "-save-temps -march=armv8.2-a+bf16+nosimd" } */
/* { dg-final { check-function-bodies "**" "" "-O[^0]" } } */
#include <arm_neon.h>
/*
**test_bfcvt:
** bfcvt h0, s0
** ret
*/
bfloat16_t test_bfcvt (float32_t a)
{
return vcvth_bf16_f32 (a);
}
/* { dg-do assemble { target { aarch64*-*-* } } } */
/* { dg-additional-options "-march=armv8.2-a+nobf16" } */
#include <arm_neon.h>
bfloat16x4_t test_bfcvtn (float32x4_t a)
{
/* { dg-error "inlining failed .* 'vcvt_bf16_f32" "" { target *-*-* } 0 } */
return vcvt_bf16_f32 (a);
}
/* { dg-do assemble { target { aarch64*-*-* } } } */
/* { dg-require-effective-target arm_v8_2a_bf16_neon_ok } */
/* { dg-add-options arm_v8_2a_bf16_neon } */
/* { dg-additional-options "-save-temps" } */
/* { dg-final { check-function-bodies "**" "" {-O[^0]} } } */
/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */
#include <arm_neon.h>
/*
**test_bfcvtnq2_untied:
** mov v0.16b, v1.16b
** bfcvtn2 v0.8h, v2.4s
** ret
*/
bfloat16x8_t test_bfcvtnq2_untied (bfloat16x8_t unused, bfloat16x8_t inactive,
float32x4_t a)
{
return vcvtq_high_bf16_f32 (inactive, a);
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment