Commit 8c197c85 by Stam Markianos-Wright

[GCC][PATCH][AArch64]Add ACLE intrinsics for dot product (usdot - vector,…

[GCC][PATCH][AArch64]Add ACLE intrinsics for dot product (usdot - vector, <us/su>dot - by element) for AArch64 AdvSIMD ARMv8.6 Extension

gcc/ChangeLog:

2020-01-16  Stam Markianos-Wright  <stam.markianos-wright@arm.com>

	* config/aarch64/aarch64-builtins.c: (enum aarch64_type_qualifiers):
	New qualifier_lane_quadtup_index, TYPES_TERNOP_SSUS,
	TYPES_QUADOPSSUS_LANE_QUADTUP, TYPES_QUADOPSSSU_LANE_QUADTUP.
	(aarch64_simd_expand_args): Add case SIMD_ARG_LANE_QUADTUP_INDEX.
	(aarch64_simd_expand_builtin): Add qualifier_lane_quadtup_index.
	* config/aarch64/aarch64-simd-builtins.def (usdot, usdot_lane,
	usdot_laneq, sudot_lane,sudot_laneq): New.
	* config/aarch64/aarch64-simd.md (aarch64_usdot): New.
	(aarch64_<sur>dot_lane): New.
	* config/aarch64/arm_neon.h (vusdot_s32): New.
	(vusdotq_s32): New.
	(vusdot_lane_s32): New.
	(vsudot_lane_s32): New.
	* config/aarch64/iterators.md (DOTPROD_I8MM): New iterator.
	(UNSPEC_USDOT, UNSPEC_SUDOT): New unspecs.

gcc/testsuite/ChangeLog:

2020-01-16  Stam Markianos-Wright  <stam.markianos-wright@arm.com>

	* gcc.target/aarch64/advsimd-intrinsics/vdot-compile-3-1.c: New test.
	* gcc.target/aarch64/advsimd-intrinsics/vdot-compile-3-2.c: New test.
	* gcc.target/aarch64/advsimd-intrinsics/vdot-compile-3-3.c: New test.
	* gcc.target/aarch64/advsimd-intrinsics/vdot-compile-3-4.c: New test.
parent 545f5fad
2020-01-16 Stam Markianos-Wright <stam.markianos-wright@arm.com>
* config/aarch64/aarch64-builtins.c: (enum aarch64_type_qualifiers):
New qualifier_lane_quadtup_index, TYPES_TERNOP_SSUS,
TYPES_QUADOPSSUS_LANE_QUADTUP, TYPES_QUADOPSSSU_LANE_QUADTUP.
(aarch64_simd_expand_args): Add case SIMD_ARG_LANE_QUADTUP_INDEX.
(aarch64_simd_expand_builtin): Add qualifier_lane_quadtup_index.
* config/aarch64/aarch64-simd-builtins.def (usdot, usdot_lane,
usdot_laneq, sudot_lane,sudot_laneq): New.
* config/aarch64/aarch64-simd.md (aarch64_usdot): New.
(aarch64_<sur>dot_lane): New.
* config/aarch64/arm_neon.h (vusdot_s32): New.
(vusdotq_s32): New.
(vusdot_lane_s32): New.
(vsudot_lane_s32): New.
* config/aarch64/iterators.md (DOTPROD_I8MM): New iterator.
(UNSPEC_USDOT, UNSPEC_SUDOT): New unspecs.
2020-01-16 Martin Liska <mliska@suse.cz>
* value-prof.c (dump_histogram_value): Fix
......
......@@ -110,6 +110,9 @@ enum aarch64_type_qualifiers
/* Lane indices selected in pairs. - must be in range, and flipped for
bigendian. */
qualifier_lane_pair_index = 0x800,
/* Lane indices selected in quadtuplets. - must be in range, and flipped for
bigendian. */
qualifier_lane_quadtup_index = 0x1000,
};
typedef struct
......@@ -176,6 +179,10 @@ aarch64_types_ternopu_imm_qualifiers[SIMD_MAX_BUILTIN_ARGS]
= { qualifier_unsigned, qualifier_unsigned,
qualifier_unsigned, qualifier_immediate };
#define TYPES_TERNOPUI (aarch64_types_ternopu_imm_qualifiers)
static enum aarch64_type_qualifiers
aarch64_types_ternop_ssus_qualifiers[SIMD_MAX_BUILTIN_ARGS]
= { qualifier_none, qualifier_none, qualifier_unsigned, qualifier_none };
#define TYPES_TERNOP_SSUS (aarch64_types_ternop_ssus_qualifiers)
static enum aarch64_type_qualifiers
......@@ -195,6 +202,19 @@ aarch64_types_quadopu_lane_qualifiers[SIMD_MAX_BUILTIN_ARGS]
#define TYPES_QUADOPU_LANE (aarch64_types_quadopu_lane_qualifiers)
static enum aarch64_type_qualifiers
aarch64_types_quadopssus_lane_quadtup_qualifiers[SIMD_MAX_BUILTIN_ARGS]
= { qualifier_none, qualifier_none, qualifier_unsigned,
qualifier_none, qualifier_lane_quadtup_index };
#define TYPES_QUADOPSSUS_LANE_QUADTUP \
(aarch64_types_quadopssus_lane_quadtup_qualifiers)
static enum aarch64_type_qualifiers
aarch64_types_quadopsssu_lane_quadtup_qualifiers[SIMD_MAX_BUILTIN_ARGS]
= { qualifier_none, qualifier_none, qualifier_none,
qualifier_unsigned, qualifier_lane_quadtup_index };
#define TYPES_QUADOPSSSU_LANE_QUADTUP \
(aarch64_types_quadopsssu_lane_quadtup_qualifiers)
static enum aarch64_type_qualifiers
aarch64_types_quadopu_imm_qualifiers[SIMD_MAX_BUILTIN_ARGS]
= { qualifier_unsigned, qualifier_unsigned, qualifier_unsigned,
qualifier_unsigned, qualifier_immediate };
......@@ -1288,6 +1308,7 @@ typedef enum
SIMD_ARG_LANE_INDEX,
SIMD_ARG_STRUCT_LOAD_STORE_LANE_INDEX,
SIMD_ARG_LANE_PAIR_INDEX,
SIMD_ARG_LANE_QUADTUP_INDEX,
SIMD_ARG_STOP
} builtin_simd_arg;
......@@ -1377,9 +1398,25 @@ aarch64_simd_expand_args (rtx target, int icode, int have_retval,
op[opc] = gen_int_mode (ENDIAN_LANE_N (nunits / 2, lane),
SImode);
}
/* Fall through - if the lane index isn't a constant then
the next case will error. */
/* FALLTHRU */
/* If the lane index isn't a constant then error out. */
goto constant_arg;
case SIMD_ARG_LANE_QUADTUP_INDEX:
/* Must be a previous operand into which this is an index and
index is restricted to nunits / 4. */
gcc_assert (opc > 0);
if (CONST_INT_P (op[opc]))
{
machine_mode vmode = insn_data[icode].operand[opc - 1].mode;
unsigned int nunits
= GET_MODE_NUNITS (vmode).to_constant ();
aarch64_simd_lane_bounds (op[opc], 0, nunits / 4, exp);
/* Keep to GCC-vector-extension lane indices in the RTL. */
int lane = INTVAL (op[opc]);
op[opc] = gen_int_mode (ENDIAN_LANE_N (nunits / 4, lane),
SImode);
}
/* If the lane index isn't a constant then error out. */
goto constant_arg;
case SIMD_ARG_CONSTANT:
constant_arg:
if (!(*insn_data[icode].operand[opc].predicate)
......@@ -1492,6 +1529,8 @@ aarch64_simd_expand_builtin (int fcode, tree exp, rtx target)
args[k] = SIMD_ARG_LANE_INDEX;
else if (d->qualifiers[qualifiers_k] & qualifier_lane_pair_index)
args[k] = SIMD_ARG_LANE_PAIR_INDEX;
else if (d->qualifiers[qualifiers_k] & qualifier_lane_quadtup_index)
args[k] = SIMD_ARG_LANE_QUADTUP_INDEX;
else if (d->qualifiers[qualifiers_k] & qualifier_struct_load_store_lane_index)
args[k] = SIMD_ARG_STRUCT_LOAD_STORE_LANE_INDEX;
else if (d->qualifiers[qualifiers_k] & qualifier_immediate)
......
......@@ -212,10 +212,15 @@
/* Implemented by aarch64_<sur><dotprod>{_lane}{q}<dot_mode>. */
BUILTIN_VB (TERNOP, sdot, 0)
BUILTIN_VB (TERNOPU, udot, 0)
BUILTIN_VB (TERNOP_SSUS, usdot, 0)
BUILTIN_VB (QUADOP_LANE, sdot_lane, 0)
BUILTIN_VB (QUADOPU_LANE, udot_lane, 0)
BUILTIN_VB (QUADOP_LANE, sdot_laneq, 0)
BUILTIN_VB (QUADOPU_LANE, udot_laneq, 0)
BUILTIN_VB (QUADOPSSUS_LANE_QUADTUP, usdot_lane, 0)
BUILTIN_VB (QUADOPSSUS_LANE_QUADTUP, usdot_laneq, 0)
BUILTIN_VB (QUADOPSSSU_LANE_QUADTUP, sudot_lane, 0)
BUILTIN_VB (QUADOPSSSU_LANE_QUADTUP, sudot_laneq, 0)
/* Implemented by aarch64_fcadd<rot><mode>. */
BUILTIN_VHSDF (BINOP, fcadd90, 0)
......
......@@ -506,6 +506,20 @@
[(set_attr "type" "neon_dot<q>")]
)
;; These instructions map to the __builtins for the armv8.6a I8MM usdot
;; (vector) Dot Product operation.
(define_insn "aarch64_usdot<vsi2qi>"
[(set (match_operand:VS 0 "register_operand" "=w")
(plus:VS
(unspec:VS [(match_operand:<VSI2QI> 2 "register_operand" "w")
(match_operand:<VSI2QI> 3 "register_operand" "w")]
UNSPEC_USDOT)
(match_operand:VS 1 "register_operand" "0")))]
"TARGET_I8MM"
"usdot\\t%0.<Vtype>, %2.<Vdottype>, %3.<Vdottype>"
[(set_attr "type" "neon_dot<q>")]
)
;; These expands map to the Dot Product optab the vectorizer checks for.
;; The auto-vectorizer expects a dot product builtin that also does an
;; accumulation into the provided register.
......@@ -573,6 +587,26 @@
[(set_attr "type" "neon_dot<q>")]
)
;; These instructions map to the __builtins for the armv8.6a I8MM usdot, sudot
;; (by element) Dot Product operations.
(define_insn "aarch64_<DOTPROD_I8MM:sur>dot_lane<VB:isquadop><VS:vsi2qi>"
[(set (match_operand:VS 0 "register_operand" "=w")
(plus:VS
(unspec:VS [(match_operand:<VS:VSI2QI> 2 "register_operand" "w")
(match_operand:VB 3 "register_operand" "w")
(match_operand:SI 4 "immediate_operand" "i")]
DOTPROD_I8MM)
(match_operand:VS 1 "register_operand" "0")))]
"TARGET_I8MM"
{
int nunits = GET_MODE_NUNITS (<VB:MODE>mode).to_constant ();
int lane = INTVAL (operands[4]);
operands[4] = gen_int_mode (ENDIAN_LANE_N (nunits / 4, lane), SImode);
return "<DOTPROD_I8MM:sur>dot\\t%0.<VS:Vtype>, %2.<VS:Vdottype>, %3.4b[%4]";
}
[(set_attr "type" "neon_dot<VS:q>")]
)
(define_expand "copysign<mode>3"
[(match_operand:VHSDF 0 "register_operand")
(match_operand:VHSDF 1 "register_operand")
......
......@@ -34611,6 +34611,89 @@ vrnd64xq_f64 (float64x2_t __a)
#include "arm_bf16.h"
/* AdvSIMD 8-bit Integer Matrix Multiply (I8MM) intrinsics. */
#pragma GCC push_options
#pragma GCC target ("arch=armv8.2-a+i8mm")
__extension__ extern __inline int32x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vusdot_s32 (int32x2_t __r, uint8x8_t __a, int8x8_t __b)
{
return __builtin_aarch64_usdotv8qi_ssus (__r, __a, __b);
}
__extension__ extern __inline int32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vusdotq_s32 (int32x4_t __r, uint8x16_t __a, int8x16_t __b)
{
return __builtin_aarch64_usdotv16qi_ssus (__r, __a, __b);
}
__extension__ extern __inline int32x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vusdot_lane_s32 (int32x2_t __r, uint8x8_t __a, int8x8_t __b, const int __index)
{
return __builtin_aarch64_usdot_lanev8qi_ssuss (__r, __a, __b, __index);
}
__extension__ extern __inline int32x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vusdot_laneq_s32 (int32x2_t __r, uint8x8_t __a, int8x16_t __b,
const int __index)
{
return __builtin_aarch64_usdot_laneqv8qi_ssuss (__r, __a, __b, __index);
}
__extension__ extern __inline int32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vusdotq_lane_s32 (int32x4_t __r, uint8x16_t __a, int8x8_t __b,
const int __index)
{
return __builtin_aarch64_usdot_lanev16qi_ssuss (__r, __a, __b, __index);
}
__extension__ extern __inline int32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vusdotq_laneq_s32 (int32x4_t __r, uint8x16_t __a, int8x16_t __b,
const int __index)
{
return __builtin_aarch64_usdot_laneqv16qi_ssuss (__r, __a, __b, __index);
}
__extension__ extern __inline int32x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vsudot_lane_s32 (int32x2_t __r, int8x8_t __a, uint8x8_t __b, const int __index)
{
return __builtin_aarch64_sudot_lanev8qi_sssus (__r, __a, __b, __index);
}
__extension__ extern __inline int32x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vsudot_laneq_s32 (int32x2_t __r, int8x8_t __a, uint8x16_t __b,
const int __index)
{
return __builtin_aarch64_sudot_laneqv8qi_sssus (__r, __a, __b, __index);
}
__extension__ extern __inline int32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vsudotq_lane_s32 (int32x4_t __r, int8x16_t __a, uint8x8_t __b,
const int __index)
{
return __builtin_aarch64_sudot_lanev16qi_sssus (__r, __a, __b, __index);
}
__extension__ extern __inline int32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vsudotq_laneq_s32 (int32x4_t __r, int8x16_t __a, uint8x16_t __b,
const int __index)
{
return __builtin_aarch64_sudot_laneqv16qi_sssus (__r, __a, __b, __index);
}
#pragma GCC pop_options
#undef __aarch64_vget_lane_any
#undef __aarch64_vdup_lane_any
......@@ -799,6 +799,8 @@
UNSPEC_USUBLT ; Used in aarch64-sve2.md.
UNSPEC_USUBWB ; Used in aarch64-sve2.md.
UNSPEC_USUBWT ; Used in aarch64-sve2.md.
UNSPEC_USDOT ; Used in aarch64-simd.md.
UNSPEC_SUDOT ; Used in aarch64-simd.md.
])
;; ------------------------------------------------------------------
......@@ -1463,6 +1465,8 @@
(define_mode_attr f16quad [(V2SF "") (V4SF "q")])
(define_mode_attr isquadop [(V8QI "") (V16QI "q")])
(define_code_attr f16mac [(plus "a") (minus "s")])
;; Map smax to smin and umax to umin.
......@@ -2045,6 +2049,8 @@
(define_int_iterator DOTPROD [UNSPEC_SDOT UNSPEC_UDOT])
(define_int_iterator DOTPROD_I8MM [UNSPEC_USDOT UNSPEC_SUDOT])
(define_int_iterator ADDSUBHN [UNSPEC_ADDHN UNSPEC_RADDHN
UNSPEC_SUBHN UNSPEC_RSUBHN])
......@@ -2738,6 +2744,7 @@
(UNSPEC_URSHL "ur") (UNSPEC_SRSHL "sr")
(UNSPEC_UQRSHL "u") (UNSPEC_SQRSHL "s")
(UNSPEC_SDOT "s") (UNSPEC_UDOT "u")
(UNSPEC_USDOT "us") (UNSPEC_SUDOT "su")
])
(define_int_attr r [(UNSPEC_SQDMULH "") (UNSPEC_SQRDMULH "r")
......
2020-01-16 Stam Markianos-Wright <stam.markianos-wright@arm.com>
* gcc.target/aarch64/advsimd-intrinsics/vdot-compile-3-1.c: New test.
* gcc.target/aarch64/advsimd-intrinsics/vdot-compile-3-2.c: New test.
* gcc.target/aarch64/advsimd-intrinsics/vdot-compile-3-3.c: New test.
* gcc.target/aarch64/advsimd-intrinsics/vdot-compile-3-4.c: New test.
2020-01-16 Andre Vieira <andre.simoesdiasvieira@arm.com>
PR tree-optimization/92429
......
/* { dg-do assemble { target { aarch64*-*-* } } } */
/* { dg-require-effective-target arm_v8_2a_i8mm_ok } */
/* { dg-add-options arm_v8_2a_i8mm } */
/* { dg-additional-options "-save-temps" } */
/* { dg-final { check-function-bodies "**" "" {-O[^0]} } } */
/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */
#include <arm_neon.h>
/* Unsigned-Signed Dot Product instructions. */
/*
**ufoo:
** usdot v0\.2s, v1\.8b, v2\.8b
** ret
*/
int32x2_t ufoo (int32x2_t r, uint8x8_t x, int8x8_t y)
{
return vusdot_s32 (r, x, y);
}
/*
**ufooq:
** usdot v0\.4s, v1\.16b, v2\.16b
** ret
*/
int32x4_t ufooq (int32x4_t r, uint8x16_t x, int8x16_t y)
{
return vusdotq_s32 (r, x, y);
}
/*
**ufoo_lane:
** usdot v0\.2s, v1\.8b, v2\.4b\[0\]
** ret
*/
int32x2_t ufoo_lane (int32x2_t r, uint8x8_t x, int8x8_t y)
{
return vusdot_lane_s32 (r, x, y, 0);
}
/*
**ufoo_laneq:
** usdot v0\.2s, v1\.8b, v2\.4b\[2\]
** ret
*/
int32x2_t ufoo_laneq (int32x2_t r, uint8x8_t x, int8x16_t y)
{
return vusdot_laneq_s32 (r, x, y, 2);
}
/*
**ufooq_lane:
** usdot v0\.4s, v1\.16b, v2\.4b\[1\]
** ret
*/
int32x4_t ufooq_lane (int32x4_t r, uint8x16_t x, int8x8_t y)
{
return vusdotq_lane_s32 (r, x, y, 1);
}
/*
**ufooq_laneq:
** usdot v0\.4s, v1\.16b, v2\.4b\[3\]
** ret
*/
int32x4_t ufooq_laneq (int32x4_t r, uint8x16_t x, int8x16_t y)
{
return vusdotq_laneq_s32 (r, x, y, 3);
}
/* Signed-Unsigned Dot Product instructions. */
/*
**sfoo_lane:
** sudot v0\.2s, v1\.8b, v2\.4b\[0\]
** ret
*/
int32x2_t sfoo_lane (int32x2_t r, int8x8_t x, uint8x8_t y)
{
return vsudot_lane_s32 (r, x, y, 0);
}
/*
**sfoo_laneq:
** sudot v0\.2s, v1\.8b, v2\.4b\[2\]
** ret
*/
int32x2_t sfoo_laneq (int32x2_t r, int8x8_t x, uint8x16_t y)
{
return vsudot_laneq_s32 (r, x, y, 2);
}
/*
**sfooq_lane:
** sudot v0\.4s, v1\.16b, v2\.4b\[1\]
** ret
*/
int32x4_t sfooq_lane (int32x4_t r, int8x16_t x, uint8x8_t y)
{
return vsudotq_lane_s32 (r, x, y, 1);
}
/*
**sfooq_laneq:
** sudot v0\.4s, v1\.16b, v2\.4b\[3\]
** ret
*/
int32x4_t sfooq_laneq (int32x4_t r, int8x16_t x, uint8x16_t y)
{
return vsudotq_laneq_s32 (r, x, y, 3);
}
/*
**ufoo_untied:
** mov v0\.8b, v1\.8b
** usdot v0\.2s, v2\.8b, v3\.8b
** ret
*/
int32x2_t ufoo_untied (int32x2_t unused, int32x2_t r, uint8x8_t x, int8x8_t y)
{
return vusdot_s32 (r, x, y);
}
/*
**ufooq_laneq_untied:
** mov v0\.16b, v1\.16b
** usdot v0\.4s, v2\.16b, v3\.4b\[3\]
** ret
*/
int32x4_t ufooq_laneq_untied (int32x2_t unused, int32x4_t r, uint8x16_t x, int8x16_t y)
{
return vusdotq_laneq_s32 (r, x, y, 3);
}
/* { dg-do assemble { target { aarch64*-*-* } } } */
/* { dg-require-effective-target arm_v8_2a_i8mm_ok } */
/* { dg-add-options arm_v8_2a_i8mm } */
/* { dg-additional-options "-mbig-endian -save-temps" } */
/* { dg-final { check-function-bodies "**" "" {-O[^0]} } } */
/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */
#include <arm_neon.h>
/* Unsigned-Signed Dot Product instructions. */
/*
**ufoo:
** usdot v0\.2s, v1\.8b, v2\.8b
** ret
*/
int32x2_t ufoo (int32x2_t r, uint8x8_t x, int8x8_t y)
{
return vusdot_s32 (r, x, y);
}
/*
**ufooq:
** usdot v0\.4s, v1\.16b, v2\.16b
** ret
*/
int32x4_t ufooq (int32x4_t r, uint8x16_t x, int8x16_t y)
{
return vusdotq_s32 (r, x, y);
}
/*
**ufoo_lane:
** usdot v0\.2s, v1\.8b, v2\.4b\[0\]
** ret
*/
int32x2_t ufoo_lane (int32x2_t r, uint8x8_t x, int8x8_t y)
{
return vusdot_lane_s32 (r, x, y, 0);
}
/*
**ufoo_laneq:
** usdot v0\.2s, v1\.8b, v2\.4b\[2\]
** ret
*/
int32x2_t ufoo_laneq (int32x2_t r, uint8x8_t x, int8x16_t y)
{
return vusdot_laneq_s32 (r, x, y, 2);
}
/*
**ufooq_lane:
** usdot v0\.4s, v1\.16b, v2\.4b\[1\]
** ret
*/
int32x4_t ufooq_lane (int32x4_t r, uint8x16_t x, int8x8_t y)
{
return vusdotq_lane_s32 (r, x, y, 1);
}
/*
**ufooq_laneq:
** usdot v0\.4s, v1\.16b, v2\.4b\[3\]
** ret
*/
int32x4_t ufooq_laneq (int32x4_t r, uint8x16_t x, int8x16_t y)
{
return vusdotq_laneq_s32 (r, x, y, 3);
}
/* Signed-Unsigned Dot Product instructions. */
/*
**sfoo_lane:
** sudot v0\.2s, v1\.8b, v2\.4b\[0\]
** ret
*/
int32x2_t sfoo_lane (int32x2_t r, int8x8_t x, uint8x8_t y)
{
return vsudot_lane_s32 (r, x, y, 0);
}
/*
**sfoo_laneq:
** sudot v0\.2s, v1\.8b, v2\.4b\[2\]
** ret
*/
int32x2_t sfoo_laneq (int32x2_t r, int8x8_t x, uint8x16_t y)
{
return vsudot_laneq_s32 (r, x, y, 2);
}
/*
**sfooq_lane:
** sudot v0\.4s, v1\.16b, v2\.4b\[1\]
** ret
*/
int32x4_t sfooq_lane (int32x4_t r, int8x16_t x, uint8x8_t y)
{
return vsudotq_lane_s32 (r, x, y, 1);
}
/*
**sfooq_laneq:
** sudot v0\.4s, v1\.16b, v2\.4b\[3\]
** ret
*/
int32x4_t sfooq_laneq (int32x4_t r, int8x16_t x, uint8x16_t y)
{
return vsudotq_laneq_s32 (r, x, y, 3);
}
/*
**ufoo_untied:
** mov v0\.8b, v1\.8b
** usdot v0\.2s, v2\.8b, v3\.8b
** ret
*/
int32x2_t ufoo_untied (int32x2_t unused, int32x2_t r, uint8x8_t x, int8x8_t y)
{
return vusdot_s32 (r, x, y);
}
/*
**ufooq_laneq_untied:
** mov v0\.16b, v1\.16b
** usdot v0\.4s, v2\.16b, v3\.4b\[3\]
** ret
*/
int32x4_t ufooq_laneq_untied (int32x2_t unused, int32x4_t r, uint8x16_t x, int8x16_t y)
{
return vusdotq_laneq_s32 (r, x, y, 3);
}
/* { dg-do assemble { target { aarch64*-*-* } } } */
/* { dg-require-effective-target arm_v8_2a_i8mm_ok } */
/* { dg-add-options arm_v8_2a_i8mm } */
/* { dg-additional-options "--save-temps" } */
/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */
#include <arm_neon.h>
int32x2_t ufoo_lane (int32x2_t r, uint8x8_t x, int8x8_t y)
{
/* { dg-error "lane -1 out of range 0 - 1" "" { target *-*-* } 0 } */
return vusdot_lane_s32 (r, x, y, -1);
}
int32x2_t ufoo_laneq (int32x2_t r, uint8x8_t x, int8x16_t y)
{
/* { dg-error "lane -1 out of range 0 - 3" "" { target *-*-* } 0 } */
return vusdot_laneq_s32 (r, x, y, -1);
}
int32x4_t ufooq_lane (int32x4_t r, uint8x16_t x, int8x8_t y)
{
/* { dg-error "lane 2 out of range 0 - 1" "" { target *-*-* } 0 } */
return vusdotq_lane_s32 (r, x, y, 2);
}
int32x4_t ufooq_laneq (int32x4_t r, uint8x16_t x, int8x16_t y)
{
/* { dg-error "lane 4 out of range 0 - 3" "" { target *-*-* } 0 } */
return vusdotq_laneq_s32 (r, x, y, 4);
}
/* { dg-do assemble { target { aarch64*-*-* } } } */
/* { dg-require-effective-target arm_v8_2a_i8mm_ok } */
/* { dg-add-options arm_v8_2a_i8mm } */
/* { dg-additional-options "--save-temps" } */
/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */
#include <arm_neon.h>
int32x2_t sfoo_lane (int32x2_t r, int8x8_t x, uint8x8_t y)
{
/* { dg-error "lane -1 out of range 0 - 1" "" { target *-*-* } 0 } */
return vsudot_lane_s32 (r, x, y, -1);
}
int32x2_t sfoo_laneq (int32x2_t r, int8x8_t x, uint8x16_t y)
{
/* { dg-error "lane -1 out of range 0 - 3" "" { target *-*-* } 0 } */
return vsudot_laneq_s32 (r, x, y, -1);
}
int32x4_t sfooq_lane (int32x4_t r, int8x16_t x, uint8x8_t y)
{
/* { dg-error "lane 2 out of range 0 - 1" "" { target *-*-* } 0 } */
return vsudotq_lane_s32 (r, x, y, 2);
}
int32x4_t sfooq_laneq (int32x4_t r, int8x16_t x, uint8x16_t y)
{
/* { dg-error "lane 4 out of range 0 - 3" "" { target *-*-* } 0 } */
return vsudotq_laneq_s32 (r, x, y, 4);
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment