Commit 8ea6c1b8 by Mihail Ionescu Committed by Richard Sandiford

aarch64: Add bfloat16 vdup and vreinterpret ACLE intrinsics

This patch adds support for the bf16 duplicate and reinterpret intrinsics.
ACLE documents are at https://developer.arm.com/docs/101028/latest
ISA documents are at https://developer.arm.com/docs/ddi0596/latest

2020-02-25  Mihail Ionescu  <mihail.ionescu@arm.com>

gcc/
	* config/aarch64/iterators.md (VDQF_F16) Add V4BF and V8BF.
	(VALL_F16): Likewise.
	(VALLDI_F16): Likewise.
	(Vtype): Likewise.
	(Vetype): Likewise.
	(vswap_width_name): Likewise.
	(VSWAP_WIDTH): Likewise.
	(Vel): Likewise.
	(VEL): Likewise.
	(q): Likewise.
	* config/aarch64/arm_neon.h (vset_lane_bf16, vsetq_lane_bf16): New.
	(vget_lane_bf16, vgetq_lane_bf16): New.
	(vcreate_bf16): New.
	(vdup_n_bf16, vdupq_n_bf16): New.
	(vdup_lane_bf16, vdup_laneq_bf16): New.
	(vdupq_lane_bf16, vdupq_laneq_bf16): New.
	(vduph_lane_bf16, vduph_laneq_bf16): New.
	(vreinterpret_bf16_u8, vreinterpretq_bf16_u8): New.
	(vreinterpret_bf16_u16, vreinterpretq_bf16_u16): New.
	(vreinterpret_bf16_u32, vreinterpretq_bf16_u32): New.
	(vreinterpret_bf16_u64, vreinterpretq_bf16_u64): New.
	(vreinterpret_bf16_s8, vreinterpretq_bf16_s8): New.
	(vreinterpret_bf16_s16, vreinterpretq_bf16_s16): New.
	(vreinterpret_bf16_s32, vreinterpretq_bf16_s32): New.
	(vreinterpret_bf16_s64, vreinterpretq_bf16_s64): New.
	(vreinterpret_bf16_p8, vreinterpretq_bf16_p8): New.
	(vreinterpret_bf16_p16, vreinterpretq_bf16_p16): New.
	(vreinterpret_bf16_p64, vreinterpretq_bf16_p64): New
	(vreinterpret_bf16_f16, vreinterpretq_bf16_f16): New
	(vreinterpret_bf16_f32, vreinterpretq_bf16_f32): New.
	(vreinterpret_bf16_f64, vreinterpretq_bf16_f64): New.
	(vreinterpretq_bf16_p128): New.
	(vreinterpret_s8_bf16, vreinterpretq_s8_bf16): New.
	(vreinterpret_s16_bf16, vreinterpretq_s16_bf16): New.
	(vreinterpret_s32_bf16, vreinterpretq_s32_bf16): New.
	(vreinterpret_s64_bf16, vreinterpretq_s64_bf16): New.
	(vreinterpret_u8_bf16, vreinterpretq_u8_bf16): New.
	(vreinterpret_u16_bf16, vreinterpretq_u16_bf16): New.
	(vreinterpret_u32_bf16, vreinterpretq_u32_bf16): New.
	(vreinterpret_u64_bf16, vreinterpretq_u64_bf16): New.
	(vreinterpret_p8_bf16, vreinterpretq_p8_bf16): New.
	(vreinterpret_p16_bf16, vreinterpretq_p16_bf16): New.
	(vreinterpret_p64_bf16, vreinterpretq_p64_bf16): New.
	(vreinterpret_f32_bf16, vreinterpretq_f32_bf16): New.
	(vreinterpret_f64_bf16,vreinterpretq_f64_bf16): New.
	(vreinterpret_f16_bf16,vreinterpretq_f16_bf16): New.
	(vreinterpretq_p128_bf16): New.

gcc/testsuite/
	* gcc.target/aarch64/advsimd-intrinsics/bf16_dup.c: New test.
	* gcc.target/aarch64/advsimd-intrinsics/bf16_reinterpret.c: New test.
parent 76a8c0f6
2020-02-25 Mihail Ionescu <mihail.ionescu@arm.com>
* config/aarch64/iterators.md (VDQF_F16) Add V4BF and V8BF.
(VALL_F16): Likewise.
(VALLDI_F16): Likewise.
(Vtype): Likewise.
(Vetype): Likewise.
(vswap_width_name): Likewise.
(VSWAP_WIDTH): Likewise.
(Vel): Likewise.
(VEL): Likewise.
(q): Likewise.
* config/aarch64/arm_neon.h (vset_lane_bf16, vsetq_lane_bf16): New.
(vget_lane_bf16, vgetq_lane_bf16): New.
(vcreate_bf16): New.
(vdup_n_bf16, vdupq_n_bf16): New.
(vdup_lane_bf16, vdup_laneq_bf16): New.
(vdupq_lane_bf16, vdupq_laneq_bf16): New.
(vduph_lane_bf16, vduph_laneq_bf16): New.
(vreinterpret_bf16_u8, vreinterpretq_bf16_u8): New.
(vreinterpret_bf16_u16, vreinterpretq_bf16_u16): New.
(vreinterpret_bf16_u32, vreinterpretq_bf16_u32): New.
(vreinterpret_bf16_u64, vreinterpretq_bf16_u64): New.
(vreinterpret_bf16_s8, vreinterpretq_bf16_s8): New.
(vreinterpret_bf16_s16, vreinterpretq_bf16_s16): New.
(vreinterpret_bf16_s32, vreinterpretq_bf16_s32): New.
(vreinterpret_bf16_s64, vreinterpretq_bf16_s64): New.
(vreinterpret_bf16_p8, vreinterpretq_bf16_p8): New.
(vreinterpret_bf16_p16, vreinterpretq_bf16_p16): New.
(vreinterpret_bf16_p64, vreinterpretq_bf16_p64): New
(vreinterpret_bf16_f16, vreinterpretq_bf16_f16): New
(vreinterpret_bf16_f32, vreinterpretq_bf16_f32): New.
(vreinterpret_bf16_f64, vreinterpretq_bf16_f64): New.
(vreinterpretq_bf16_p128): New.
(vreinterpret_s8_bf16, vreinterpretq_s8_bf16): New.
(vreinterpret_s16_bf16, vreinterpretq_s16_bf16): New.
(vreinterpret_s32_bf16, vreinterpretq_s32_bf16): New.
(vreinterpret_s64_bf16, vreinterpretq_s64_bf16): New.
(vreinterpret_u8_bf16, vreinterpretq_u8_bf16): New.
(vreinterpret_u16_bf16, vreinterpretq_u16_bf16): New.
(vreinterpret_u32_bf16, vreinterpretq_u32_bf16): New.
(vreinterpret_u64_bf16, vreinterpretq_u64_bf16): New.
(vreinterpret_p8_bf16, vreinterpretq_p8_bf16): New.
(vreinterpret_p16_bf16, vreinterpretq_p16_bf16): New.
(vreinterpret_p64_bf16, vreinterpretq_p64_bf16): New.
(vreinterpret_f32_bf16, vreinterpretq_f32_bf16): New.
(vreinterpret_f64_bf16,vreinterpretq_f64_bf16): New.
(vreinterpret_f16_bf16,vreinterpretq_f16_bf16): New.
(vreinterpretq_p128_bf16): New.
2020-02-25 Dennis Zhang <dennis.zhang@arm.com> 2020-02-25 Dennis Zhang <dennis.zhang@arm.com>
* config/arm/arm_neon.h (vbfdot_f32, vbfdotq_f32): New * config/arm/arm_neon.h (vbfdot_f32, vbfdotq_f32): New
......
...@@ -139,7 +139,8 @@ ...@@ -139,7 +139,8 @@
(define_mode_iterator PTR [(SI "ptr_mode == SImode") (DI "ptr_mode == DImode")]) (define_mode_iterator PTR [(SI "ptr_mode == SImode") (DI "ptr_mode == DImode")])
;; Advanced SIMD Float modes suitable for moving, loading and storing. ;; Advanced SIMD Float modes suitable for moving, loading and storing.
(define_mode_iterator VDQF_F16 [V4HF V8HF V2SF V4SF V2DF]) (define_mode_iterator VDQF_F16 [V4HF V8HF V2SF V4SF V2DF
V4BF V8BF])
;; Advanced SIMD Float modes. ;; Advanced SIMD Float modes.
(define_mode_iterator VDQF [V2SF V4SF V2DF]) (define_mode_iterator VDQF [V2SF V4SF V2DF])
...@@ -180,7 +181,7 @@ ...@@ -180,7 +181,7 @@
;; All Advanced SIMD modes suitable for moving, loading, and storing. ;; All Advanced SIMD modes suitable for moving, loading, and storing.
(define_mode_iterator VALL_F16 [V8QI V16QI V4HI V8HI V2SI V4SI V2DI (define_mode_iterator VALL_F16 [V8QI V16QI V4HI V8HI V2SI V4SI V2DI
V4HF V8HF V2SF V4SF V2DF]) V4HF V8HF V4BF V8BF V2SF V4SF V2DF])
;; All Advanced SIMD modes suitable for moving, loading, and storing, ;; All Advanced SIMD modes suitable for moving, loading, and storing,
;; including special Bfloat vector types. ;; including special Bfloat vector types.
...@@ -196,7 +197,7 @@ ...@@ -196,7 +197,7 @@
;; All Advanced SIMD modes and DI. ;; All Advanced SIMD modes and DI.
(define_mode_iterator VALLDI_F16 [V8QI V16QI V4HI V8HI V2SI V4SI V2DI (define_mode_iterator VALLDI_F16 [V8QI V16QI V4HI V8HI V2SI V4SI V2DI
V4HF V8HF V2SF V4SF V2DF DI]) V4HF V8HF V4BF V8BF V2SF V4SF V2DF DI])
;; All Advanced SIMD modes, plus DI and DF. ;; All Advanced SIMD modes, plus DI and DF.
(define_mode_iterator VALLDIF [V8QI V16QI V4HI V8HI V2SI V4SI (define_mode_iterator VALLDIF [V8QI V16QI V4HI V8HI V2SI V4SI
...@@ -972,6 +973,7 @@ ...@@ -972,6 +973,7 @@
(define_mode_attr Vtype [(V8QI "8b") (V16QI "16b") (define_mode_attr Vtype [(V8QI "8b") (V16QI "16b")
(V4HI "4h") (V8HI "8h") (V4HI "4h") (V8HI "8h")
(V4BF "4h") (V8BF "8h")
(V2SI "2s") (V4SI "4s") (V2SI "2s") (V4SI "4s")
(DI "1d") (DF "1d") (DI "1d") (DF "1d")
(V2DI "2d") (V2SF "2s") (V2DI "2d") (V2SF "2s")
...@@ -1015,6 +1017,7 @@ ...@@ -1015,6 +1017,7 @@
(VNx4SF "s") (VNx2SF "s") (VNx4SF "s") (VNx2SF "s")
(VNx2DI "d") (VNx2DI "d")
(VNx2DF "d") (VNx2DF "d")
(BF "h") (V4BF "h") (V8BF "h")
(HF "h") (HF "h")
(SF "s") (DF "d") (SF "s") (DF "d")
(QI "b") (HI "h") (QI "b") (HI "h")
...@@ -1083,6 +1086,7 @@ ...@@ -1083,6 +1086,7 @@
(DF "DF") (V2DF "DF") (DF "DF") (V2DF "DF")
(SI "SI") (HI "HI") (SI "SI") (HI "HI")
(QI "QI") (QI "QI")
(V4BF "BF") (V8BF "BF")
(VNx16QI "QI") (VNx8QI "QI") (VNx4QI "QI") (VNx2QI "QI") (VNx16QI "QI") (VNx8QI "QI") (VNx4QI "QI") (VNx2QI "QI")
(VNx8HI "HI") (VNx4HI "HI") (VNx2HI "HI") (VNx8HI "HI") (VNx4HI "HI") (VNx2HI "HI")
(VNx8HF "HF") (VNx4HF "HF") (VNx2HF "HF") (VNx8HF "HF") (VNx4HF "HF") (VNx2HF "HF")
...@@ -1102,6 +1106,7 @@ ...@@ -1102,6 +1106,7 @@
(V2DF "df") (DF "df") (V2DF "df") (DF "df")
(SI "si") (HI "hi") (SI "si") (HI "hi")
(QI "qi") (QI "qi")
(V4BF "bf") (V8BF "bf")
(VNx16QI "qi") (VNx8QI "qi") (VNx4QI "qi") (VNx2QI "qi") (VNx16QI "qi") (VNx8QI "qi") (VNx4QI "qi") (VNx2QI "qi")
(VNx8HI "hi") (VNx4HI "hi") (VNx2HI "hi") (VNx8HI "hi") (VNx4HI "hi") (VNx2HI "hi")
(VNx8HF "hf") (VNx4HF "hf") (VNx2HF "hf") (VNx8HF "hf") (VNx4HF "hf") (VNx2HF "hf")
...@@ -1422,6 +1427,7 @@ ...@@ -1422,6 +1427,7 @@
(define_mode_attr VSWAP_WIDTH [(V8QI "V16QI") (V16QI "V8QI") (define_mode_attr VSWAP_WIDTH [(V8QI "V16QI") (V16QI "V8QI")
(V4HI "V8HI") (V8HI "V4HI") (V4HI "V8HI") (V8HI "V4HI")
(V8BF "V4BF") (V4BF "V8BF")
(V2SI "V4SI") (V4SI "V2SI") (V2SI "V4SI") (V4SI "V2SI")
(DI "V2DI") (V2DI "DI") (DI "V2DI") (V2DI "DI")
(V2SF "V4SF") (V4SF "V2SF") (V2SF "V4SF") (V4SF "V2SF")
...@@ -1434,6 +1440,7 @@ ...@@ -1434,6 +1440,7 @@
(DI "to_128") (V2DI "to_64") (DI "to_128") (V2DI "to_64")
(V4HF "to_128") (V8HF "to_64") (V4HF "to_128") (V8HF "to_64")
(V2SF "to_128") (V4SF "to_64") (V2SF "to_128") (V4SF "to_64")
(V4BF "to_128") (V8BF "to_64")
(DF "to_128") (V2DF "to_64")]) (DF "to_128") (V2DF "to_64")])
;; For certain vector-by-element multiplication instructions we must ;; For certain vector-by-element multiplication instructions we must
...@@ -1467,6 +1474,7 @@ ...@@ -1467,6 +1474,7 @@
;; Defined to '_q' for 128-bit types. ;; Defined to '_q' for 128-bit types.
(define_mode_attr q [(V8QI "") (V16QI "_q") (define_mode_attr q [(V8QI "") (V16QI "_q")
(V4HI "") (V8HI "_q") (V4HI "") (V8HI "_q")
(V4BF "") (V8BF "_q")
(V2SI "") (V4SI "_q") (V2SI "") (V4SI "_q")
(DI "") (V2DI "_q") (DI "") (V2DI "_q")
(V4HF "") (V8HF "_q") (V4HF "") (V8HF "_q")
......
2020-02-25 Mihail Ionescu <mihail.ionescu@arm.com>
* gcc.target/aarch64/advsimd-intrinsics/bf16_dup.c: New test.
* gcc.target/aarch64/advsimd-intrinsics/bf16_reinterpret.c: New test.
2020-02-25 Dennis Zhang <dennis.zhang@arm.com> 2020-02-25 Dennis Zhang <dennis.zhang@arm.com>
* gcc.target/arm/simd/bf16_dot_1.c: New test. * gcc.target/arm/simd/bf16_dot_1.c: New test.
......
/* { dg-do assemble { target { aarch64*-*-* } } } */
/* { dg-require-effective-target arm_v8_2a_bf16_neon_ok } */
/* { dg-options "-O2" } */
/* { dg-add-options arm_v8_2a_bf16_neon } */
/* { dg-additional-options "-save-temps" } */
#include <arm_neon.h>
float32x2_t test_vcreate (float32x2_t r, uint64_t a, uint64_t b)
{
bfloat16x4_t _a = vcreate_bf16(a);
bfloat16x4_t _b = vcreate_bf16(b);
return vbfdot_f32 (r, _a, _b);
}
/* { dg-final { scan-assembler {bfdot\tv[0-9]+.2s, v[0-9]+.4h, v[0-9]+.4h} } } */
bfloat16x4_t test_vset_lane_bf16 (bfloat16_t a, bfloat16x4_t b)
{
return vset_lane_bf16 (a, b, 3);
}
bfloat16x8_t test_vsetq_lane_bf16 (bfloat16_t a, bfloat16x8_t b)
{
return vsetq_lane_bf16 (a, b, 7);
}
/* { dg-final { scan-assembler-times "ins\\t" 2 } } */
bfloat16x4_t vdup_test (bfloat16_t a)
{
return vdup_n_bf16 (a);
}
/* { dg-final { scan-assembler "dup\\tv\[0-9\]+\.4h, v\[0-9\]+.h\\\[0\\\]" } } */
bfloat16x8_t vdupq_test (bfloat16_t a)
{
return vdupq_n_bf16 (a);
}
bfloat16x8_t test_vdupq_lane_bf16 (bfloat16x4_t a)
{
return vdupq_lane_bf16 (a, 1);
}
/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.8h, v\[0-9\]+.h\\\[0\\\]" 2 } } */
bfloat16_t test_vget_lane_bf16 (bfloat16x4_t a)
{
return vget_lane_bf16 (a, 1);
}
/* { dg-final { scan-assembler-times "dup\\th\[0-9\]+, v\[0-9\]+\.h\\\[1\\\]" 2 } } */
bfloat16x4_t test_vdup_lane_bf16 (bfloat16x4_t a)
{
return vdup_lane_bf16 (a, 1);
}
/* { dg-final { scan-assembler "dup\\tv\[0-9\]+\.4h, v\[0-9\]+\.h\\\[1\\\]" } } */
bfloat16x4_t test_vdup_laneq_bf16 (bfloat16x8_t a)
{
return vdup_laneq_bf16 (a, 7);
}
/* { dg-final { scan-assembler "dup\\tv\[0-9\]+\.8h, v\[0-9\]+\.h\\\[7\\\]" } } */
bfloat16x8_t test_vdupq_laneq_bf16 (bfloat16x8_t a)
{
return vdupq_laneq_bf16 (a, 5);
}
/* { dg-final { scan-assembler "dup\\tv\[0-9\]+\.8h, v\[0-9\]+\.h\\\[5\\\]" } } */
bfloat16_t test_vduph_lane_bf16 (bfloat16x4_t a)
{
return vduph_lane_bf16 (a, 3);
}
/* { dg-final { scan-assembler "dup\\th\[0-9\]+, v\[0-9\]+\.h\\\[3\\\]" } } */
bfloat16_t test_vgetq_lane_bf16 (bfloat16x8_t a)
{
return vgetq_lane_bf16 (a, 7);
}
bfloat16_t test_vduph_laneq_bf16 (bfloat16x8_t a)
{
return vduph_laneq_bf16 (a, 7);
}
/* { dg-final { scan-assembler-times "dup\\th\[0-9\]+, v\[0-9\]+\.h\\\[7\\\]" 2 } } */
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment