Commit f348846e by Stam Markianos-Wright

[GCC][PATCH][ARM]Add ACLE intrinsics for dot product (vusdot - vector,…

[GCC][PATCH][ARM]Add ACLE intrinsics for dot product (vusdot - vector, v<us/su>dot - by element) for AArch32 AdvSIMD ARMv8.6 Extension

This patch adds the ARMv8.6 Extension ACLE intrinsics for dot product
operations (vector/by element) to the ARM back-end.

These are:
usdot (vector), <us/su>dot (by element).

The functions are optional from ARMv8.2-a as -march=armv8.2-a+i8mm and
for ARM they remain optional after as of ARMv8.6-a.

The functions are declared in arm_neon.h, RTL patterns are defined to
generate assembler and tests are added to verify and perform adequate checks.

Regression testing on arm-none-eabi passed successfully.

gcc/ChangeLog:

2020-02-11  Stam Markianos-Wright  <stam.markianos-wright@arm.com>

	* config/arm/arm-builtins.c (enum arm_type_qualifiers):
	(USTERNOP_QUALIFIERS): New define.
	(USMAC_LANE_QUADTUP_QUALIFIERS): New define.
	(SUMAC_LANE_QUADTUP_QUALIFIERS): New define.
	(arm_expand_builtin_args): Add case ARG_BUILTIN_LANE_QUADTUP_INDEX.
	(arm_expand_builtin_1): Add qualifier_lane_quadtup_index.
	* config/arm/arm_neon.h (vusdot_s32): New.
	(vusdot_lane_s32): New.
	(vusdotq_lane_s32): New.
	(vsudot_lane_s32): New.
	(vsudotq_lane_s32): New.
	* config/arm/arm_neon_builtins.def (usdot, usdot_lane,sudot_lane): New.
	* config/arm/iterators.md (DOTPROD_I8MM): New.
	(sup, opsuffix): Add <us/su>.
	* config/arm/neon.md (neon_usdot, <us/su>dot_lane: New.
	* config/arm/unspecs.md (UNSPEC_DOT_US, UNSPEC_DOT_SU): New.

gcc/testsuite/ChangeLog:

2020-02-11  Stam Markianos-Wright  <stam.markianos-wright@arm.com>

	* gcc.target/arm/simd/vdot-2-1.c: New test.
	* gcc.target/arm/simd/vdot-2-2.c: New test.
	* gcc.target/arm/simd/vdot-2-3.c: New test.
	* gcc.target/arm/simd/vdot-2-4.c: New test.
parent 667afe5a
2020-02-11 Stam Markianos-Wright <stam.markianos-wright@arm.com>
* config/arm/arm-builtins.c (enum arm_type_qualifiers):
(USTERNOP_QUALIFIERS): New define.
(USMAC_LANE_QUADTUP_QUALIFIERS): New define.
(SUMAC_LANE_QUADTUP_QUALIFIERS): New define.
(arm_expand_builtin_args): Add case ARG_BUILTIN_LANE_QUADTUP_INDEX.
(arm_expand_builtin_1): Add qualifier_lane_quadtup_index.
* config/arm/arm_neon.h (vusdot_s32): New.
(vusdot_lane_s32): New.
(vusdotq_lane_s32): New.
(vsudot_lane_s32): New.
(vsudotq_lane_s32): New.
* config/arm/arm_neon_builtins.def (usdot, usdot_lane,sudot_lane): New.
* config/arm/iterators.md (DOTPROD_I8MM): New.
(sup, opsuffix): Add <us/su>.
* config/arm/neon.md (neon_usdot, <us/su>dot_lane: New.
* config/arm/unspecs.md (UNSPEC_DOT_US, UNSPEC_DOT_SU): New.
2020-02-11 Richard Biener <rguenther@suse.de> 2020-02-11 Richard Biener <rguenther@suse.de>
PR tree-optimization/93661 PR tree-optimization/93661
......
...@@ -86,7 +86,10 @@ enum arm_type_qualifiers ...@@ -86,7 +86,10 @@ enum arm_type_qualifiers
qualifier_const_void_pointer = 0x802, qualifier_const_void_pointer = 0x802,
/* Lane indices selected in pairs - must be within range of previous /* Lane indices selected in pairs - must be within range of previous
argument = a vector. */ argument = a vector. */
qualifier_lane_pair_index = 0x1000 qualifier_lane_pair_index = 0x1000,
/* Lane indices selected in quadtuplets - must be within range of previous
argument = a vector. */
qualifier_lane_quadtup_index = 0x2000
}; };
/* The qualifier_internal allows generation of a unary builtin from /* The qualifier_internal allows generation of a unary builtin from
...@@ -122,6 +125,13 @@ arm_unsigned_uternop_qualifiers[SIMD_MAX_BUILTIN_ARGS] ...@@ -122,6 +125,13 @@ arm_unsigned_uternop_qualifiers[SIMD_MAX_BUILTIN_ARGS]
qualifier_unsigned }; qualifier_unsigned };
#define UTERNOP_QUALIFIERS (arm_unsigned_uternop_qualifiers) #define UTERNOP_QUALIFIERS (arm_unsigned_uternop_qualifiers)
/* T (T, unsigned T, T). */
static enum arm_type_qualifiers
arm_usternop_qualifiers[SIMD_MAX_BUILTIN_ARGS]
= { qualifier_none, qualifier_none, qualifier_unsigned,
qualifier_none };
#define USTERNOP_QUALIFIERS (arm_usternop_qualifiers)
/* T (T, immediate). */ /* T (T, immediate). */
static enum arm_type_qualifiers static enum arm_type_qualifiers
arm_binop_imm_qualifiers[SIMD_MAX_BUILTIN_ARGS] arm_binop_imm_qualifiers[SIMD_MAX_BUILTIN_ARGS]
...@@ -176,6 +186,20 @@ arm_umac_lane_qualifiers[SIMD_MAX_BUILTIN_ARGS] ...@@ -176,6 +186,20 @@ arm_umac_lane_qualifiers[SIMD_MAX_BUILTIN_ARGS]
qualifier_unsigned, qualifier_lane_index }; qualifier_unsigned, qualifier_lane_index };
#define UMAC_LANE_QUALIFIERS (arm_umac_lane_qualifiers) #define UMAC_LANE_QUALIFIERS (arm_umac_lane_qualifiers)
/* T (T, unsigned T, T, lane index). */
static enum arm_type_qualifiers
arm_usmac_lane_quadtup_qualifiers[SIMD_MAX_BUILTIN_ARGS]
= { qualifier_none, qualifier_none, qualifier_unsigned,
qualifier_none, qualifier_lane_quadtup_index };
#define USMAC_LANE_QUADTUP_QUALIFIERS (arm_usmac_lane_quadtup_qualifiers)
/* T (T, T, unsigend T, lane index). */
static enum arm_type_qualifiers
arm_sumac_lane_quadtup_qualifiers[SIMD_MAX_BUILTIN_ARGS]
= { qualifier_none, qualifier_none, qualifier_none,
qualifier_unsigned, qualifier_lane_quadtup_index };
#define SUMAC_LANE_QUADTUP_QUALIFIERS (arm_sumac_lane_quadtup_qualifiers)
/* T (T, T, immediate). */ /* T (T, T, immediate). */
static enum arm_type_qualifiers static enum arm_type_qualifiers
arm_ternop_imm_qualifiers[SIMD_MAX_BUILTIN_ARGS] arm_ternop_imm_qualifiers[SIMD_MAX_BUILTIN_ARGS]
...@@ -2177,6 +2201,7 @@ typedef enum { ...@@ -2177,6 +2201,7 @@ typedef enum {
ARG_BUILTIN_LANE_INDEX, ARG_BUILTIN_LANE_INDEX,
ARG_BUILTIN_STRUCT_LOAD_STORE_LANE_INDEX, ARG_BUILTIN_STRUCT_LOAD_STORE_LANE_INDEX,
ARG_BUILTIN_LANE_PAIR_INDEX, ARG_BUILTIN_LANE_PAIR_INDEX,
ARG_BUILTIN_LANE_QUADTUP_INDEX,
ARG_BUILTIN_NEON_MEMORY, ARG_BUILTIN_NEON_MEMORY,
ARG_BUILTIN_MEMORY, ARG_BUILTIN_MEMORY,
ARG_BUILTIN_STOP ARG_BUILTIN_STOP
...@@ -2325,11 +2350,24 @@ arm_expand_builtin_args (rtx target, machine_mode map_mode, int fcode, ...@@ -2325,11 +2350,24 @@ arm_expand_builtin_args (rtx target, machine_mode map_mode, int fcode,
if (CONST_INT_P (op[argc])) if (CONST_INT_P (op[argc]))
{ {
machine_mode vmode = mode[argc - 1]; machine_mode vmode = mode[argc - 1];
neon_lane_bounds (op[argc], 0, GET_MODE_NUNITS (vmode) / 2, exp); neon_lane_bounds (op[argc], 0,
GET_MODE_NUNITS (vmode) / 2, exp);
}
/* If the lane index isn't a constant then error out. */
goto constant_arg;
case ARG_BUILTIN_LANE_QUADTUP_INDEX:
/* Previous argument must be a vector, which this indexes. */
gcc_assert (argc > 0);
if (CONST_INT_P (op[argc]))
{
machine_mode vmode = mode[argc - 1];
neon_lane_bounds (op[argc], 0,
GET_MODE_NUNITS (vmode) / 4, exp);
} }
/* If the lane index isn't a constant then the next /* If the lane index isn't a constant then error out. */
case will error. */ goto constant_arg;
/* Fall through. */
case ARG_BUILTIN_CONSTANT: case ARG_BUILTIN_CONSTANT:
constant_arg: constant_arg:
if (!(*insn_data[icode].operand[opno].predicate) if (!(*insn_data[icode].operand[opno].predicate)
...@@ -2493,6 +2531,8 @@ arm_expand_builtin_1 (int fcode, tree exp, rtx target, ...@@ -2493,6 +2531,8 @@ arm_expand_builtin_1 (int fcode, tree exp, rtx target,
args[k] = ARG_BUILTIN_LANE_INDEX; args[k] = ARG_BUILTIN_LANE_INDEX;
else if (d->qualifiers[qualifiers_k] & qualifier_lane_pair_index) else if (d->qualifiers[qualifiers_k] & qualifier_lane_pair_index)
args[k] = ARG_BUILTIN_LANE_PAIR_INDEX; args[k] = ARG_BUILTIN_LANE_PAIR_INDEX;
else if (d->qualifiers[qualifiers_k] & qualifier_lane_quadtup_index)
args[k] = ARG_BUILTIN_LANE_QUADTUP_INDEX;
else if (d->qualifiers[qualifiers_k] & qualifier_struct_load_store_lane_index) else if (d->qualifiers[qualifiers_k] & qualifier_struct_load_store_lane_index)
args[k] = ARG_BUILTIN_STRUCT_LOAD_STORE_LANE_INDEX; args[k] = ARG_BUILTIN_STRUCT_LOAD_STORE_LANE_INDEX;
else if (d->qualifiers[qualifiers_k] & qualifier_immediate) else if (d->qualifiers[qualifiers_k] & qualifier_immediate)
......
...@@ -18742,6 +18742,52 @@ vcmlaq_rot270_laneq_f32 (float32x4_t __r, float32x4_t __a, float32x4_t __b, ...@@ -18742,6 +18742,52 @@ vcmlaq_rot270_laneq_f32 (float32x4_t __r, float32x4_t __a, float32x4_t __b,
return __builtin_neon_vcmla_lane270v4sf (__r, __a, __b, __index); return __builtin_neon_vcmla_lane270v4sf (__r, __a, __b, __index);
} }
/* AdvSIMD Matrix Multiply-Accumulate and Dot Product intrinsics. */
#pragma GCC push_options
#pragma GCC target ("arch=armv8.2-a+i8mm")
__extension__ extern __inline int32x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vusdot_s32 (int32x2_t __r, uint8x8_t __a, int8x8_t __b)
{
return __builtin_neon_usdotv8qi_ssus (__r, __a, __b);
}
__extension__ extern __inline int32x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vusdot_lane_s32 (int32x2_t __r, uint8x8_t __a,
int8x8_t __b, const int __index)
{
return __builtin_neon_usdot_lanev8qi_ssuss (__r, __a, __b, __index);
}
__extension__ extern __inline int32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vusdotq_lane_s32 (int32x4_t __r, uint8x16_t __a,
int8x8_t __b, const int __index)
{
return __builtin_neon_usdot_lanev16qi_ssuss (__r, __a, __b, __index);
}
__extension__ extern __inline int32x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vsudot_lane_s32 (int32x2_t __r, int8x8_t __a,
uint8x8_t __b, const int __index)
{
return __builtin_neon_sudot_lanev8qi_sssus (__r, __a, __b, __index);
}
__extension__ extern __inline int32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vsudotq_lane_s32 (int32x4_t __r, int8x16_t __a,
uint8x8_t __b, const int __index)
{
return __builtin_neon_sudot_lanev16qi_sssus (__r, __a, __b, __index);
}
#pragma GCC pop_options
#pragma GCC pop_options #pragma GCC pop_options
#endif #endif
......
...@@ -352,6 +352,10 @@ VAR2 (UTERNOP, udot, v8qi, v16qi) ...@@ -352,6 +352,10 @@ VAR2 (UTERNOP, udot, v8qi, v16qi)
VAR2 (MAC_LANE, sdot_lane, v8qi, v16qi) VAR2 (MAC_LANE, sdot_lane, v8qi, v16qi)
VAR2 (UMAC_LANE, udot_lane, v8qi, v16qi) VAR2 (UMAC_LANE, udot_lane, v8qi, v16qi)
VAR1 (USTERNOP, usdot, v8qi)
VAR2 (USMAC_LANE_QUADTUP, usdot_lane, v8qi, v16qi)
VAR2 (SUMAC_LANE_QUADTUP, sudot_lane, v8qi, v16qi)
VAR4 (BINOP, vcadd90, v4hf, v2sf, v8hf, v4sf) VAR4 (BINOP, vcadd90, v4hf, v2sf, v8hf, v4sf)
VAR4 (BINOP, vcadd270, v4hf, v2sf, v8hf, v4sf) VAR4 (BINOP, vcadd270, v4hf, v2sf, v8hf, v4sf)
VAR4 (TERNOP, vcmla0, v2sf, v4sf, v4hf, v8hf) VAR4 (TERNOP, vcmla0, v2sf, v4sf, v4hf, v8hf)
......
...@@ -480,6 +480,8 @@ ...@@ -480,6 +480,8 @@
(define_int_iterator DOTPROD [UNSPEC_DOT_S UNSPEC_DOT_U]) (define_int_iterator DOTPROD [UNSPEC_DOT_S UNSPEC_DOT_U])
(define_int_iterator DOTPROD_I8MM [UNSPEC_DOT_US UNSPEC_DOT_SU])
(define_int_iterator VFMLHALVES [UNSPEC_VFML_LO UNSPEC_VFML_HI]) (define_int_iterator VFMLHALVES [UNSPEC_VFML_LO UNSPEC_VFML_HI])
(define_int_iterator VCADD [UNSPEC_VCADD90 UNSPEC_VCADD270]) (define_int_iterator VCADD [UNSPEC_VCADD90 UNSPEC_VCADD270])
...@@ -938,6 +940,7 @@ ...@@ -938,6 +940,7 @@
(UNSPEC_VRSRA_S_N "s") (UNSPEC_VRSRA_U_N "u") (UNSPEC_VRSRA_S_N "s") (UNSPEC_VRSRA_U_N "u")
(UNSPEC_VCVTH_S "s") (UNSPEC_VCVTH_U "u") (UNSPEC_VCVTH_S "s") (UNSPEC_VCVTH_U "u")
(UNSPEC_DOT_S "s") (UNSPEC_DOT_U "u") (UNSPEC_DOT_S "s") (UNSPEC_DOT_U "u")
(UNSPEC_DOT_US "us") (UNSPEC_DOT_SU "su")
(UNSPEC_SSAT16 "s") (UNSPEC_USAT16 "u") (UNSPEC_SSAT16 "s") (UNSPEC_USAT16 "u")
]) ])
...@@ -1169,6 +1172,9 @@ ...@@ -1169,6 +1172,9 @@
(define_int_attr MRRC [(VUNSPEC_MRRC "MRRC") (VUNSPEC_MRRC2 "MRRC2")]) (define_int_attr MRRC [(VUNSPEC_MRRC "MRRC") (VUNSPEC_MRRC2 "MRRC2")])
(define_int_attr opsuffix [(UNSPEC_DOT_S "s8") (define_int_attr opsuffix [(UNSPEC_DOT_S "s8")
(UNSPEC_DOT_U "u8")]) (UNSPEC_DOT_U "u8")
(UNSPEC_DOT_US "s8")
(UNSPEC_DOT_SU "u8")
])
(define_int_attr smlaw_op [(UNSPEC_SMLAWB "smlawb") (UNSPEC_SMLAWT "smlawt")]) (define_int_attr smlaw_op [(UNSPEC_SMLAWB "smlawb") (UNSPEC_SMLAWT "smlawt")])
...@@ -3279,6 +3279,20 @@ ...@@ -3279,6 +3279,20 @@
[(set_attr "type" "neon_dot<q>")] [(set_attr "type" "neon_dot<q>")]
) )
;; These instructions map to the __builtins for the Dot Product operations.
(define_insn "neon_usdot<vsi2qi>"
[(set (match_operand:VCVTI 0 "register_operand" "=w")
(plus:VCVTI
(unspec:VCVTI
[(match_operand:<VSI2QI> 2 "register_operand" "w")
(match_operand:<VSI2QI> 3 "register_operand" "w")]
UNSPEC_DOT_US)
(match_operand:VCVTI 1 "register_operand" "0")))]
"TARGET_I8MM"
"vusdot.s8\\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
[(set_attr "type" "neon_dot<q>")]
)
;; These instructions map to the __builtins for the Dot Product ;; These instructions map to the __builtins for the Dot Product
;; indexed operations. ;; indexed operations.
(define_insn "neon_<sup>dot_lane<vsi2qi>" (define_insn "neon_<sup>dot_lane<vsi2qi>"
...@@ -3298,6 +3312,25 @@ ...@@ -3298,6 +3312,25 @@
[(set_attr "type" "neon_dot<q>")] [(set_attr "type" "neon_dot<q>")]
) )
;; These instructions map to the __builtins for the Dot Product
;; indexed operations in the v8.6 I8MM extension.
(define_insn "neon_<sup>dot_lane<vsi2qi>"
[(set (match_operand:VCVTI 0 "register_operand" "=w")
(plus:VCVTI
(unspec:VCVTI
[(match_operand:<VSI2QI> 2 "register_operand" "w")
(match_operand:V8QI 3 "register_operand" "t")
(match_operand:SI 4 "immediate_operand" "i")]
DOTPROD_I8MM)
(match_operand:VCVTI 1 "register_operand" "0")))]
"TARGET_I8MM"
{
operands[4] = GEN_INT (INTVAL (operands[4]));
return "v<sup>dot.<opsuffix>\\t%<V_reg>0, %<V_reg>2, %P3[%c4]";
}
[(set_attr "type" "neon_dot<q>")]
)
;; These expands map to the Dot Product optab the vectorizer checks for. ;; These expands map to the Dot Product optab the vectorizer checks for.
;; The auto-vectorizer expects a dot product builtin that also does an ;; The auto-vectorizer expects a dot product builtin that also does an
;; accumulation into the provided register. ;; accumulation into the provided register.
......
...@@ -493,6 +493,8 @@ ...@@ -493,6 +493,8 @@
UNSPEC_VRNDX UNSPEC_VRNDX
UNSPEC_DOT_S UNSPEC_DOT_S
UNSPEC_DOT_U UNSPEC_DOT_U
UNSPEC_DOT_US
UNSPEC_DOT_SU
UNSPEC_VFML_LO UNSPEC_VFML_LO
UNSPEC_VFML_HI UNSPEC_VFML_HI
UNSPEC_VCADD90 UNSPEC_VCADD90
......
2020-02-11 Stam Markianos-Wright <stam.markianos-wright@arm.com>
* gcc.target/arm/simd/vdot-2-1.c: New test.
* gcc.target/arm/simd/vdot-2-2.c: New test.
* gcc.target/arm/simd/vdot-2-3.c: New test.
* gcc.target/arm/simd/vdot-2-4.c: New test.
2020-02-11 Richard Biener <rguenther@suse.de> 2020-02-11 Richard Biener <rguenther@suse.de>
PR tree-optimization/93661 PR tree-optimization/93661
......
/* { dg-do assemble { target { arm*-*-* } } } */
/* { dg-require-effective-target arm_v8_2a_i8mm_ok } */
/* { dg-add-options arm_v8_2a_i8mm } */
/* { dg-additional-options "-O -save-temps" } */
/* { dg-final { check-function-bodies "**" "" } } */
#include <arm_neon.h>
/* Unsigned-Signed Dot Product instructions. */
/*
**usfoo:
** ...
** vusdot\.s8 d0, d1, d2
** bx lr
*/
int32x2_t usfoo (int32x2_t r, uint8x8_t x, int8x8_t y)
{
return vusdot_s32 (r, x, y);
}
/*
**usfoo_lane:
** ...
** vusdot\.s8 d0, d1, d2\[0\]
** bx lr
*/
int32x2_t usfoo_lane (int32x2_t r, uint8x8_t x, int8x8_t y)
{
return vusdot_lane_s32 (r, x, y, 0);
}
/*
**usfooq_lane:
** ...
** vusdot\.s8 q0, q1, d4\[1\]
** bx lr
*/
int32x4_t usfooq_lane (int32x4_t r, uint8x16_t x, int8x8_t y)
{
return vusdotq_lane_s32 (r, x, y, 1);
}
/* Signed-Unsigned Dot Product instructions. */
/*
**sfoo_lane:
** ...
** vsudot\.u8 d0, d1, d2\[0\]
** bx lr
*/
int32x2_t sfoo_lane (int32x2_t r, int8x8_t x, uint8x8_t y)
{
return vsudot_lane_s32 (r, x, y, 0);
}
/*
**sfooq_lane:
** ...
** vsudot\.u8 q0, q1, d4\[1\]
** bx lr
*/
int32x4_t sfooq_lane (int32x4_t r, int8x16_t x, uint8x8_t y)
{
return vsudotq_lane_s32 (r, x, y, 1);
}
/*
**usfoo_untied:
** ...
** vusdot\.s8 d1, d2, d3
** vmov d0, d1 @ v2si
** bx lr
*/
int32x2_t usfoo_untied (int32x2_t unused, int32x2_t r, uint8x8_t x, int8x8_t y)
{
return vusdot_s32 (r, x, y);
}
/*
**usfoo_lane_untied:
** ...
** vusdot.s8 d1, d2, d3\[0\]
** vmov d0, d1 @ v2si
** bx lr
*/
int32x2_t usfoo_lane_untied (int32x2_t unused, int32x2_t r, uint8x8_t x, int8x8_t y)
{
return vusdot_lane_s32 (r, x, y, 0);
}
/* { dg-do assemble { target { arm*-*-* } } } */
/* { dg-require-effective-target arm_v8_2a_i8mm_ok } */
/* { dg-add-options arm_v8_2a_i8mm } */
/* { dg-additional-options "-O -save-temps -mbig-endian" } */
/* { dg-final { check-function-bodies "**" "" } } */
#include <arm_neon.h>
/* Unsigned-Signed Dot Product instructions. */
/*
**usfoo:
** ...
** vusdot\.s8 d0, d1, d2
** bx lr
*/
int32x2_t usfoo (int32x2_t r, uint8x8_t x, int8x8_t y)
{
return vusdot_s32 (r, x, y);
}
/*
**usfoo_lane:
** ...
** vusdot\.s8 d0, d1, d2\[0\]
** bx lr
*/
int32x2_t usfoo_lane (int32x2_t r, uint8x8_t x, int8x8_t y)
{
return vusdot_lane_s32 (r, x, y, 0);
}
/*
**usfooq_lane:
** ...
** vusdot\.s8 q0, q1, d4\[1\]
** bx lr
*/
int32x4_t usfooq_lane (int32x4_t r, uint8x16_t x, int8x8_t y)
{
return vusdotq_lane_s32 (r, x, y, 1);
}
/* Signed-Unsigned Dot Product instructions. */
/*
**sfoo_lane:
** ...
** vsudot\.u8 d0, d1, d2\[0\]
** bx lr
*/
int32x2_t sfoo_lane (int32x2_t r, int8x8_t x, uint8x8_t y)
{
return vsudot_lane_s32 (r, x, y, 0);
}
/*
**sfooq_lane:
** ...
** vsudot\.u8 q0, q1, d4\[1\]
** bx lr
*/
int32x4_t sfooq_lane (int32x4_t r, int8x16_t x, uint8x8_t y)
{
return vsudotq_lane_s32 (r, x, y, 1);
}
/*
**usfoo_untied:
** ...
** vusdot\.s8 d1, d2, d3
** vmov d0, d1 @ v2si
** bx lr
*/
int32x2_t usfoo_untied (int32x2_t unused, int32x2_t r, uint8x8_t x, int8x8_t y)
{
return vusdot_s32 (r, x, y);
}
/*
**usfoo_lane_untied:
** ...
** vusdot.s8 d1, d2, d3\[0\]
** vmov d0, d1 @ v2si
** bx lr
*/
int32x2_t usfoo_lane_untied (int32x2_t unused, int32x2_t r, uint8x8_t x, int8x8_t y)
{
return vusdot_lane_s32 (r, x, y, 0);
}
/* { dg-do assemble { target { arm*-*-* } } } */
/* { dg-require-effective-target arm_v8_2a_i8mm_ok } */
/* { dg-add-options arm_v8_2a_i8mm } */
/* { dg-additional-options "--save-temps" } */
#include <arm_neon.h>
/* Unsigned-Signed Dot Product instructions. */
int32x2_t usfoo_lane (int32x2_t r, uint8x8_t x, int8x8_t y)
{
/* { dg-error "lane -1 out of range 0 - 1" "" { target *-*-* } 0 } */
return vusdot_lane_s32 (r, x, y, -1);
}
int32x4_t usfooq_lane (int32x4_t r, uint8x16_t x, int8x8_t y)
{
/* { dg-error "lane 2 out of range 0 - 1" "" { target *-*-* } 0 } */
return vusdotq_lane_s32 (r, x, y, 2);
}
/* { dg-do assemble { target { arm*-*-* } } } */
/* { dg-require-effective-target arm_v8_2a_i8mm_ok } */
/* { dg-add-options arm_v8_2a_i8mm } */
/* { dg-additional-options "--save-temps" } */
#include <arm_neon.h>
/* Signed-Unsigned Dot Product instructions. */
int32x2_t sfoo_lane (int32x2_t r, int8x8_t x, uint8x8_t y)
{
/* { dg-error "lane -1 out of range 0 - 1" "" { target *-*-* } 0 } */
return vsudot_lane_s32 (r, x, y, -1);
}
int32x4_t sfooq_lane (int32x4_t r, int8x16_t x, uint8x8_t y)
{
/* { dg-error "lane 2 out of range 0 - 1" "" { target *-*-* } 0 } */
return vsudotq_lane_s32 (r, x, y, 2);
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment