Commit 43031fbd by Delia Burduv Committed by Kyrylo Tkachov

[AArch32] ACLE intrinsics bfloat16 vmmla and vfma<b/t> for AArch32 AdvSIMD

This patch adds the ARMv8.6 ACLE intrinsics for vmmla, vfmab and vfmat
as part of the BFloat16 extension.
(https://developer.arm.com/docs/101028/latest.)
The intrinsics are declared in arm_neon.h and the RTL patterns are
defined in neon.md.
Two new tests are added to check assembler output and lane indices.

2020-03-05  Delia Burduv  <delia.burduv@arm.com>

	* config/arm/arm_neon.h (vbfmmlaq_f32): New.
	(vbfmlalbq_f32): New.
	(vbfmlaltq_f32): New.
	(vbfmlalbq_lane_f32): New.
	(vbfmlaltq_lane_f32): New.
	(vbfmlalbq_laneq_f32): New.
	(vbfmlaltq_laneq_f32): New.
	* config/arm/arm_neon_builtins.def (vmmla): New.
	(vfmab): New.
	(vfmat): New.
	(vfmab_lane): New.
	(vfmat_lane): New.
	(vfmab_laneq): New.
	(vfmat_laneq): New.
	* config/arm/iterators.md (BF_MA): New int iterator.
	(bt): New int attribute.
	(VQXBF): Copy of VQX with V8BF.
	* config/arm/neon.md (neon_vmmlav8bf): New insn.
	(neon_vfma<bt>v8bf): New insn.
	(neon_vfma<bt>_lanev8bf): New insn.
	(neon_vfma<bt>_laneqv8bf): New expand.
	(neon_vget_high<mode>): Changed iterator to VQXBF.
	* config/arm/unspecs.md (UNSPEC_BFMMLA): New UNSPEC.
	(UNSPEC_BFMAB): New UNSPEC.
	(UNSPEC_BFMAT): New UNSPEC.

2020-03-05  Delia Burduv  <delia.burduv@arm.com>

	* gcc.target/arm/simd/bf16_ma_1.c: New test.
	* gcc.target/arm/simd/bf16_ma_2.c: New test.
	* gcc.target/arm/simd/bf16_mmla_1.c: New test.
parent 7d6b3a78
2020-03-05 Delia Burduv <delia.burduv@arm.com>
* gcc.target/arm/simd/bf16_ma_1.c: New test.
* gcc.target/arm/simd/bf16_ma_2.c: New test.
* gcc.target/arm/simd/bf16_mmla_1.c: New test.
2020-03-05 Jakub Jelinek <jakub@redhat.com> 2020-03-05 Jakub Jelinek <jakub@redhat.com>
PR middle-end/93399 PR middle-end/93399
......
/* { dg-do assemble } */
/* { dg-require-effective-target arm_v8_2a_bf16_neon_ok } */
/* { dg-add-options arm_v8_2a_bf16_neon } */
/* { dg-additional-options "-save-temps -O2" } */
/* { dg-final { check-function-bodies "**" "" } } */
#include "arm_neon.h"
/*
**test_vfmabq_f32:
** ...
** vfmab.bf16 q0, q1, q2
** bx lr
*/
float32x4_t
test_vfmabq_f32 (float32x4_t r, bfloat16x8_t a, bfloat16x8_t b)
{
return vbfmlalbq_f32 (r, a, b);
}
/*
**test_vfmatq_f32:
** ...
** vfmat.bf16 q0, q1, q2
** bx lr
*/
float32x4_t
test_vfmatq_f32 (float32x4_t r, bfloat16x8_t a, bfloat16x8_t b)
{
return vbfmlaltq_f32 (r, a, b);
}
/*
**test_vfmabq_lane_f32:
** ...
** vfmab.bf16 q0, q1, d4\[0\]
** bx lr
*/
float32x4_t
test_vfmabq_lane_f32 (float32x4_t r, bfloat16x8_t a, bfloat16x4_t b)
{
return vbfmlalbq_lane_f32 (r, a, b, 0);
}
/*
**test_vfmatq_lane_f32:
** ...
** vfmat.bf16 q0, q1, d4\[2\]
** bx lr
*/
float32x4_t
test_vfmatq_lane_f32 (float32x4_t r, bfloat16x8_t a, bfloat16x4_t b)
{
return vbfmlaltq_lane_f32 (r, a, b, 2);
}
/*
**test_vfmabq_laneq_f32:
** ...
** vfmab.bf16 q0, q1, d5\[1\]
** bx lr
*/
float32x4_t
test_vfmabq_laneq_f32 (float32x4_t r, bfloat16x8_t a, bfloat16x8_t b)
{
return vbfmlalbq_laneq_f32 (r, a, b, 5);
}
/*
**test_vfmatq_laneq_f32:
** ...
** vfmat.bf16 q0, q1, d5\[3\]
** bx lr
*/
float32x4_t
test_vfmatq_laneq_f32 (float32x4_t r, bfloat16x8_t a, bfloat16x8_t b)
{
return vbfmlaltq_laneq_f32 (r, a, b, 7);
}
/* { dg-do compile { target { arm*-*-* } } } */
/* { dg-require-effective-target arm_v8_2a_bf16_neon_ok } */
/* { dg-add-options arm_v8_2a_bf16_neon } */
#include "arm_neon.h"
/* Test lane index limits for vfmabq_lane_f32 */
float32x4_t
test_vfmabq_lane_f32_low (float32x4_t r, bfloat16x8_t a, bfloat16x4_t b)
{
/* { dg-error "lane -1 out of range 0 - 3" "" { target *-*-* } 0 } */
return vbfmlalbq_lane_f32 (r, a, b, -1);
}
float32x4_t
test_vfmabq_lane_f32_high (float32x4_t r, bfloat16x8_t a, bfloat16x4_t b)
{
/* { dg-error "lane 4 out of range 0 - 3" "" { target *-*-* } 0 } */
return vbfmlalbq_lane_f32 (r, a, b, 4);
}
/* Test lane index limits for vfmatq_lane_f32 */
float32x4_t
test_vfmatq_lane_f32_low (float32x4_t r, bfloat16x8_t a, bfloat16x4_t b)
{
/* { dg-error "lane -2 out of range 0 - 3" "" { target *-*-* } 0 } */
return vbfmlaltq_lane_f32 (r, a, b, -2);
}
float32x4_t
test_vfmatq_lane_f32_high (float32x4_t r, bfloat16x8_t a, bfloat16x4_t b)
{
/* { dg-error "lane 5 out of range 0 - 3" "" { target *-*-* } 0 } */
return vbfmlaltq_lane_f32 (r, a, b, 5);
}
/* { dg-do assemble } */
/* { dg-require-effective-target arm_v8_2a_bf16_neon_ok } */
/* { dg-add-options arm_v8_2a_bf16_neon } */
/* { dg-additional-options "-save-temps -O2" } */
/* { dg-final { check-function-bodies "**" "" } } */
#include <arm_neon.h>
/*
**test_vmmlaq_f32:
** ...
** vmmla.bf16 q0, q1, q2
** bx lr
*/
float32x4_t
test_vmmlaq_f32 (float32x4_t r, bfloat16x8_t x, bfloat16x8_t y)
{
return vbfmmlaq_f32 (r, x, y);
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment