Commit d71dba7b by Srinath Parvathaneni Committed by Kyrylo Tkachov

[ARM][GCC][3/2x]: MVE intrinsics with binary operands.

This patch supports following MVE ACLE intrinsics with binary operands.

vaddlvq_p_s32, vaddlvq_p_u32, vcmpneq_s8, vcmpneq_s16, vcmpneq_s32, vcmpneq_u8, vcmpneq_u16, vcmpneq_u32, vshlq_s8, vshlq_s16, vshlq_s32, vshlq_u8, vshlq_u16, vshlq_u32.

Please refer to M-profile Vector Extension (MVE) intrinsics [1]  for more details.
[1] https://developer.arm.com/architectures/instruction-sets/simd-isas/helium/mve-intrinsics

2020-03-17  Andre Vieira  <andre.simoesdiasvieira@arm.com>
            Mihail Ionescu  <mihail.ionescu@arm.com>
            Srinath Parvathaneni  <srinath.parvathaneni@arm.com>

	* config/arm/arm-builtins.c (BINOP_NONE_NONE_UNONE_QUALIFIERS): Define
	qualifier for binary operands.
	(BINOP_UNONE_NONE_NONE_QUALIFIERS): Likewise.
	(BINOP_UNONE_UNONE_NONE_QUALIFIERS): Likewise.
	* config/arm/arm_mve.h (vaddlvq_p_s32): Define macro.
	(vaddlvq_p_u32): Likewise.
	(vcmpneq_s8): Likewise.
	(vcmpneq_s16): Likewise.
	(vcmpneq_s32): Likewise.
	(vcmpneq_u8): Likewise.
	(vcmpneq_u16): Likewise.
	(vcmpneq_u32): Likewise.
	(vshlq_s8): Likewise.
	(vshlq_s16): Likewise.
	(vshlq_s32): Likewise.
	(vshlq_u8): Likewise.
	(vshlq_u16): Likewise.
	(vshlq_u32): Likewise.
	(__arm_vaddlvq_p_s32): Define intrinsic.
	(__arm_vaddlvq_p_u32): Likewise.
	(__arm_vcmpneq_s8): Likewise.
	(__arm_vcmpneq_s16): Likewise.
	(__arm_vcmpneq_s32): Likewise.
	(__arm_vcmpneq_u8): Likewise.
	(__arm_vcmpneq_u16): Likewise.
	(__arm_vcmpneq_u32): Likewise.
	(__arm_vshlq_s8): Likewise.
	(__arm_vshlq_s16): Likewise.
	(__arm_vshlq_s32): Likewise.
	(__arm_vshlq_u8): Likewise.
	(__arm_vshlq_u16): Likewise.
	(__arm_vshlq_u32): Likewise.
	(vaddlvq_p): Define polymorphic variant.
	(vcmpneq): Likewise.
	(vshlq): Likewise.
	* config/arm/arm_mve_builtins.def (BINOP_NONE_NONE_UNONE_QUALIFIERS):
	Use it.
	(BINOP_UNONE_NONE_NONE_QUALIFIERS): Likewise.
	(BINOP_UNONE_UNONE_NONE_QUALIFIERS): Likewise.
	* config/arm/mve.md (mve_vaddlvq_p_<supf>v4si): Define RTL pattern.
	(mve_vcmpneq_<supf><mode>): Likewise.
	(mve_vshlq_<supf><mode>): Likewise.

gcc/testsuite/ChangeLog:

2020-03-17  Andre Vieira  <andre.simoesdiasvieira@arm.com>
            Mihail Ionescu  <mihail.ionescu@arm.com>
            Srinath Parvathaneni  <srinath.parvathaneni@arm.com>

	* gcc.target/arm/mve/intrinsics/vaddlvq_p_s32.c: New test.
	* gcc.target/arm/mve/intrinsics/vaddlvq_p_u32.c: Likewise.
	* gcc.target/arm/mve/intrinsics/vcmpneq_s16.c: Likewise.
	* gcc.target/arm/mve/intrinsics/vcmpneq_s32.c: Likewise.
	* gcc.target/arm/mve/intrinsics/vcmpneq_s8.c: Likewise.
	* gcc.target/arm/mve/intrinsics/vcmpneq_u16.c: Likewise.
	* gcc.target/arm/mve/intrinsics/vcmpneq_u32.c: Likewise.
	* gcc.target/arm/mve/intrinsics/vcmpneq_u8.c: Likewise.
	* gcc.target/arm/mve/intrinsics/vshlq_s16.c: Likewise.
	* gcc.target/arm/mve/intrinsics/vshlq_s32.c: Likewise.
	* gcc.target/arm/mve/intrinsics/vshlq_s8.c: Likewise.
	* gcc.target/arm/mve/intrinsics/vshlq_u16.c: Likewise.
	* gcc.target/arm/mve/intrinsics/vshlq_u32.c: Likewise.
	* gcc.target/arm/mve/intrinsics/vshlq_u8.c: Likewise.
parent f166a8cd
...@@ -2,6 +2,53 @@ ...@@ -2,6 +2,53 @@
Mihail Ionescu <mihail.ionescu@arm.com> Mihail Ionescu <mihail.ionescu@arm.com>
Srinath Parvathaneni <srinath.parvathaneni@arm.com> Srinath Parvathaneni <srinath.parvathaneni@arm.com>
* config/arm/arm-builtins.c (BINOP_NONE_NONE_UNONE_QUALIFIERS): Define
qualifier for binary operands.
(BINOP_UNONE_NONE_NONE_QUALIFIERS): Likewise.
(BINOP_UNONE_UNONE_NONE_QUALIFIERS): Likewise.
* config/arm/arm_mve.h (vaddlvq_p_s32): Define macro.
(vaddlvq_p_u32): Likewise.
(vcmpneq_s8): Likewise.
(vcmpneq_s16): Likewise.
(vcmpneq_s32): Likewise.
(vcmpneq_u8): Likewise.
(vcmpneq_u16): Likewise.
(vcmpneq_u32): Likewise.
(vshlq_s8): Likewise.
(vshlq_s16): Likewise.
(vshlq_s32): Likewise.
(vshlq_u8): Likewise.
(vshlq_u16): Likewise.
(vshlq_u32): Likewise.
(__arm_vaddlvq_p_s32): Define intrinsic.
(__arm_vaddlvq_p_u32): Likewise.
(__arm_vcmpneq_s8): Likewise.
(__arm_vcmpneq_s16): Likewise.
(__arm_vcmpneq_s32): Likewise.
(__arm_vcmpneq_u8): Likewise.
(__arm_vcmpneq_u16): Likewise.
(__arm_vcmpneq_u32): Likewise.
(__arm_vshlq_s8): Likewise.
(__arm_vshlq_s16): Likewise.
(__arm_vshlq_s32): Likewise.
(__arm_vshlq_u8): Likewise.
(__arm_vshlq_u16): Likewise.
(__arm_vshlq_u32): Likewise.
(vaddlvq_p): Define polymorphic variant.
(vcmpneq): Likewise.
(vshlq): Likewise.
* config/arm/arm_mve_builtins.def (BINOP_NONE_NONE_UNONE_QUALIFIERS):
Use it.
(BINOP_UNONE_NONE_NONE_QUALIFIERS): Likewise.
(BINOP_UNONE_UNONE_NONE_QUALIFIERS): Likewise.
* config/arm/mve.md (mve_vaddlvq_p_<supf>v4si): Define RTL pattern.
(mve_vcmpneq_<supf><mode>): Likewise.
(mve_vshlq_<supf><mode>): Likewise.
2020-03-17 Andre Vieira <andre.simoesdiasvieira@arm.com>
Mihail Ionescu <mihail.ionescu@arm.com>
Srinath Parvathaneni <srinath.parvathaneni@arm.com>
* config/arm/arm-builtins.c (BINOP_UNONE_UNONE_IMM_QUALIFIERS): Define * config/arm/arm-builtins.c (BINOP_UNONE_UNONE_IMM_QUALIFIERS): Define
qualifier for binary operands. qualifier for binary operands.
(BINOP_UNONE_UNONE_UNONE_QUALIFIERS): Likewise. (BINOP_UNONE_UNONE_UNONE_QUALIFIERS): Likewise.
......
...@@ -415,6 +415,24 @@ arm_binop_unone_none_imm_qualifiers[SIMD_MAX_BUILTIN_ARGS] ...@@ -415,6 +415,24 @@ arm_binop_unone_none_imm_qualifiers[SIMD_MAX_BUILTIN_ARGS]
#define BINOP_UNONE_NONE_IMM_QUALIFIERS \ #define BINOP_UNONE_NONE_IMM_QUALIFIERS \
(arm_binop_unone_none_imm_qualifiers) (arm_binop_unone_none_imm_qualifiers)
static enum arm_type_qualifiers
arm_binop_none_none_unone_qualifiers[SIMD_MAX_BUILTIN_ARGS]
= { qualifier_none, qualifier_none, qualifier_unsigned };
#define BINOP_NONE_NONE_UNONE_QUALIFIERS \
(arm_binop_none_none_unone_qualifiers)
static enum arm_type_qualifiers
arm_binop_unone_none_none_qualifiers[SIMD_MAX_BUILTIN_ARGS]
= { qualifier_unsigned, qualifier_none, qualifier_none };
#define BINOP_UNONE_NONE_NONE_QUALIFIERS \
(arm_binop_unone_none_none_qualifiers)
static enum arm_type_qualifiers
arm_binop_unone_unone_none_qualifiers[SIMD_MAX_BUILTIN_ARGS]
= { qualifier_unsigned, qualifier_unsigned, qualifier_none };
#define BINOP_UNONE_UNONE_NONE_QUALIFIERS \
(arm_binop_unone_unone_none_qualifiers)
/* End of Qualifier for MVE builtins. */ /* End of Qualifier for MVE builtins. */
/* void ([T element type] *, T, immediate). */ /* void ([T element type] *, T, immediate). */
......
...@@ -225,6 +225,20 @@ typedef struct { uint8x16_t val[4]; } uint8x16x4_t; ...@@ -225,6 +225,20 @@ typedef struct { uint8x16_t val[4]; } uint8x16x4_t;
#define vshrq_n_u8(__a, __imm) __arm_vshrq_n_u8(__a, __imm) #define vshrq_n_u8(__a, __imm) __arm_vshrq_n_u8(__a, __imm)
#define vshrq_n_u16(__a, __imm) __arm_vshrq_n_u16(__a, __imm) #define vshrq_n_u16(__a, __imm) __arm_vshrq_n_u16(__a, __imm)
#define vshrq_n_u32(__a, __imm) __arm_vshrq_n_u32(__a, __imm) #define vshrq_n_u32(__a, __imm) __arm_vshrq_n_u32(__a, __imm)
#define vaddlvq_p_s32(__a, __p) __arm_vaddlvq_p_s32(__a, __p)
#define vaddlvq_p_u32(__a, __p) __arm_vaddlvq_p_u32(__a, __p)
#define vcmpneq_s8(__a, __b) __arm_vcmpneq_s8(__a, __b)
#define vcmpneq_s16(__a, __b) __arm_vcmpneq_s16(__a, __b)
#define vcmpneq_s32(__a, __b) __arm_vcmpneq_s32(__a, __b)
#define vcmpneq_u8(__a, __b) __arm_vcmpneq_u8(__a, __b)
#define vcmpneq_u16(__a, __b) __arm_vcmpneq_u16(__a, __b)
#define vcmpneq_u32(__a, __b) __arm_vcmpneq_u32(__a, __b)
#define vshlq_s8(__a, __b) __arm_vshlq_s8(__a, __b)
#define vshlq_s16(__a, __b) __arm_vshlq_s16(__a, __b)
#define vshlq_s32(__a, __b) __arm_vshlq_s32(__a, __b)
#define vshlq_u8(__a, __b) __arm_vshlq_u8(__a, __b)
#define vshlq_u16(__a, __b) __arm_vshlq_u16(__a, __b)
#define vshlq_u32(__a, __b) __arm_vshlq_u32(__a, __b)
#endif #endif
__extension__ extern __inline void __extension__ extern __inline void
...@@ -868,6 +882,103 @@ __arm_vshrq_n_u32 (uint32x4_t __a, const int __imm) ...@@ -868,6 +882,103 @@ __arm_vshrq_n_u32 (uint32x4_t __a, const int __imm)
{ {
return __builtin_mve_vshrq_n_uv4si (__a, __imm); return __builtin_mve_vshrq_n_uv4si (__a, __imm);
} }
__extension__ extern __inline int64_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
__arm_vaddlvq_p_s32 (int32x4_t __a, mve_pred16_t __p)
{
return __builtin_mve_vaddlvq_p_sv4si (__a, __p);
}
__extension__ extern __inline uint64_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
__arm_vaddlvq_p_u32 (uint32x4_t __a, mve_pred16_t __p)
{
return __builtin_mve_vaddlvq_p_uv4si (__a, __p);
}
__extension__ extern __inline int32_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
__arm_vcmpneq_s8 (int8x16_t __a, int8x16_t __b)
{
return __builtin_mve_vcmpneq_sv16qi (__a, __b);
}
__extension__ extern __inline mve_pred16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
__arm_vcmpneq_s16 (int16x8_t __a, int16x8_t __b)
{
return __builtin_mve_vcmpneq_sv8hi (__a, __b);
}
__extension__ extern __inline mve_pred16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
__arm_vcmpneq_s32 (int32x4_t __a, int32x4_t __b)
{
return __builtin_mve_vcmpneq_sv4si (__a, __b);
}
__extension__ extern __inline mve_pred16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
__arm_vcmpneq_u8 (uint8x16_t __a, uint8x16_t __b)
{
return __builtin_mve_vcmpneq_uv16qi (__a, __b);
}
__extension__ extern __inline mve_pred16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
__arm_vcmpneq_u16 (uint16x8_t __a, uint16x8_t __b)
{
return __builtin_mve_vcmpneq_uv8hi (__a, __b);
}
__extension__ extern __inline mve_pred16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
__arm_vcmpneq_u32 (uint32x4_t __a, uint32x4_t __b)
{
return __builtin_mve_vcmpneq_uv4si (__a, __b);
}
__extension__ extern __inline int8x16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
__arm_vshlq_s8 (int8x16_t __a, int8x16_t __b)
{
return __builtin_mve_vshlq_sv16qi (__a, __b);
}
__extension__ extern __inline int16x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
__arm_vshlq_s16 (int16x8_t __a, int16x8_t __b)
{
return __builtin_mve_vshlq_sv8hi (__a, __b);
}
__extension__ extern __inline int32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
__arm_vshlq_s32 (int32x4_t __a, int32x4_t __b)
{
return __builtin_mve_vshlq_sv4si (__a, __b);
}
__extension__ extern __inline uint8x16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
__arm_vshlq_u8 (uint8x16_t __a, int8x16_t __b)
{
return __builtin_mve_vshlq_uv16qi (__a, __b);
}
__extension__ extern __inline uint16x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
__arm_vshlq_u16 (uint16x8_t __a, int16x8_t __b)
{
return __builtin_mve_vshlq_uv8hi (__a, __b);
}
__extension__ extern __inline uint32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
__arm_vshlq_u32 (uint32x4_t __a, int32x4_t __b)
{
return __builtin_mve_vshlq_uv4si (__a, __b);
}
#if (__ARM_FEATURE_MVE & 2) /* MVE Floating point. */ #if (__ARM_FEATURE_MVE & 2) /* MVE Floating point. */
...@@ -1689,6 +1800,27 @@ extern void *__ARM_undef; ...@@ -1689,6 +1800,27 @@ extern void *__ARM_undef;
int (*)[__ARM_mve_type_float16x8_t]: __arm_vbrsrq_n_f16 (__ARM_mve_coerce(__p0, float16x8_t), p1), \ int (*)[__ARM_mve_type_float16x8_t]: __arm_vbrsrq_n_f16 (__ARM_mve_coerce(__p0, float16x8_t), p1), \
int (*)[__ARM_mve_type_float32x4_t]: __arm_vbrsrq_n_f32 (__ARM_mve_coerce(__p0, float32x4_t), p1));}) int (*)[__ARM_mve_type_float32x4_t]: __arm_vbrsrq_n_f32 (__ARM_mve_coerce(__p0, float32x4_t), p1));})
#define vshlq(p0,p1) __arm_vshlq(p0,p1)
#define __arm_vshlq(p0,p1) ({ __typeof(p0) __p0 = (p0); \
__typeof(p1) __p1 = (p1); \
_Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vshlq_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t)), \
int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vshlq_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t)), \
int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vshlq_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t)), \
int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_int8x16_t]: __arm_vshlq_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, int8x16_t)), \
int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_int16x8_t]: __arm_vshlq_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, int16x8_t)), \
int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_int32x4_t]: __arm_vshlq_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, int32x4_t)));})
#define vshrq(p0,p1) __arm_vshrq(p0,p1)
#define __arm_vshrq(p0,p1) ({ __typeof(p0) __p0 = (p0); \
_Generic( (int (*)[__ARM_mve_typeid(__p0)])0, \
int (*)[__ARM_mve_type_int8x16_t]: __arm_vshrq_n_s8 (__ARM_mve_coerce(__p0, int8x16_t), p1), \
int (*)[__ARM_mve_type_int16x8_t]: __arm_vshrq_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), p1), \
int (*)[__ARM_mve_type_int32x4_t]: __arm_vshrq_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), p1), \
int (*)[__ARM_mve_type_uint8x16_t]: __arm_vshrq_n_u8 (__ARM_mve_coerce(__p0, uint8x16_t), p1), \
int (*)[__ARM_mve_type_uint16x8_t]: __arm_vshrq_n_u16 (__ARM_mve_coerce(__p0, uint16x8_t), p1), \
int (*)[__ARM_mve_type_uint32x4_t]: __arm_vshrq_n_u32 (__ARM_mve_coerce(__p0, uint32x4_t), p1));})
#define vcvtq_n(p0,p1) __arm_vcvtq_n(p0,p1) #define vcvtq_n(p0,p1) __arm_vcvtq_n(p0,p1)
#define __arm_vcvtq_n(p0,p1) ({ __typeof(p0) __p0 = (p0); \ #define __arm_vcvtq_n(p0,p1) ({ __typeof(p0) __p0 = (p0); \
_Generic( (int (*)[__ARM_mve_typeid(__p0)])0, \ _Generic( (int (*)[__ARM_mve_typeid(__p0)])0, \
...@@ -1831,6 +1963,34 @@ extern void *__ARM_undef; ...@@ -1831,6 +1963,34 @@ extern void *__ARM_undef;
int (*)[__ARM_mve_type_uint16x8_t]: __arm_vshrq_n_u16 (__ARM_mve_coerce(__p0, uint16x8_t), p1), \ int (*)[__ARM_mve_type_uint16x8_t]: __arm_vshrq_n_u16 (__ARM_mve_coerce(__p0, uint16x8_t), p1), \
int (*)[__ARM_mve_type_uint32x4_t]: __arm_vshrq_n_u32 (__ARM_mve_coerce(__p0, uint32x4_t), p1));}) int (*)[__ARM_mve_type_uint32x4_t]: __arm_vshrq_n_u32 (__ARM_mve_coerce(__p0, uint32x4_t), p1));})
#define vaddlvq_p(p0,p1) __arm_vaddlvq_p(p0,p1)
#define __arm_vaddlvq_p(p0,p1) ({ __typeof(p0) __p0 = (p0); \
_Generic( (int (*)[__ARM_mve_typeid(__p0)])0, \
int (*)[__ARM_mve_type_int32x4_t]: __arm_vaddlvq_p_s32 (__ARM_mve_coerce(__p0, int32x4_t), p1), \
int (*)[__ARM_mve_type_uint32x4_t]: __arm_vaddlvq_p_u32 (__ARM_mve_coerce(__p0, uint32x4_t), p1));})
#define vcmpneq(p0,p1) __arm_vcmpneq(p0,p1)
#define __arm_vcmpneq(p0,p1) ({ __typeof(p0) __p0 = (p0); \
__typeof(p1) __p1 = (p1); \
_Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vcmpneq_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t)), \
int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vcmpneq_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t)), \
int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vcmpneq_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t)), \
int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t]: __arm_vcmpneq_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8x16_t)), \
int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vcmpneq_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t)), \
int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vcmpneq_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t)));})
#define vshlq(p0,p1) __arm_vshlq(p0,p1)
#define __arm_vshlq(p0,p1) ({ __typeof(p0) __p0 = (p0); \
__typeof(p1) __p1 = (p1); \
_Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vshlq_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t)), \
int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vshlq_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t)), \
int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vshlq_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t)), \
int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_int8x16_t]: __arm_vshlq_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, int8x16_t)), \
int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_int16x8_t]: __arm_vshlq_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, int16x8_t)), \
int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_int32x4_t]: __arm_vshlq_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, int32x4_t)));})
#endif /* MVE Floating point. */ #endif /* MVE Floating point. */
#ifdef __cplusplus #ifdef __cplusplus
......
...@@ -6,7 +6,7 @@ ...@@ -6,7 +6,7 @@
GCC is free software; you can redistribute it and/or modify it GCC is free software; you can redistribute it and/or modify it
under the terms of the GNU General Public License as published under the terms of the GNU General Public License as published
by the Free Software Foundation; either version 3, or (at your by the Free Software Foundation; either version 3, or (at your
option) any later version. option) any later version.
GCC is distributed in the hope that it will be useful, but WITHOUT GCC is distributed in the hope that it will be useful, but WITHOUT
...@@ -87,3 +87,9 @@ VAR4 (BINOP_UNONE_UNONE_UNONE, vcreateq_u, v16qi, v8hi, v4si, v2di) ...@@ -87,3 +87,9 @@ VAR4 (BINOP_UNONE_UNONE_UNONE, vcreateq_u, v16qi, v8hi, v4si, v2di)
VAR4 (BINOP_NONE_UNONE_UNONE, vcreateq_s, v16qi, v8hi, v4si, v2di) VAR4 (BINOP_NONE_UNONE_UNONE, vcreateq_s, v16qi, v8hi, v4si, v2di)
VAR3 (BINOP_UNONE_UNONE_IMM, vshrq_n_u, v16qi, v8hi, v4si) VAR3 (BINOP_UNONE_UNONE_IMM, vshrq_n_u, v16qi, v8hi, v4si)
VAR3 (BINOP_NONE_NONE_IMM, vshrq_n_s, v16qi, v8hi, v4si) VAR3 (BINOP_NONE_NONE_IMM, vshrq_n_s, v16qi, v8hi, v4si)
VAR1 (BINOP_NONE_NONE_UNONE, vaddlvq_p_s, v4si)
VAR1 (BINOP_UNONE_UNONE_UNONE, vaddlvq_p_u, v4si)
VAR3 (BINOP_UNONE_NONE_NONE, vcmpneq_s, v16qi, v8hi, v4si)
VAR3 (BINOP_UNONE_UNONE_UNONE, vcmpneq_u, v16qi, v8hi, v4si)
VAR3 (BINOP_NONE_NONE_NONE, vshlq_s, v16qi, v8hi, v4si)
VAR3 (BINOP_UNONE_UNONE_NONE, vshlq_u, v16qi, v8hi, v4si)
...@@ -40,7 +40,8 @@ ...@@ -40,7 +40,8 @@
VADDLVQ_U VCTP8Q VCTP16Q VCTP32Q VCTP64Q VPNOT VADDLVQ_U VCTP8Q VCTP16Q VCTP32Q VCTP64Q VPNOT
VCREATEQ_F VCVTQ_N_TO_F_S VCVTQ_N_TO_F_U VBRSRQ_N_F VCREATEQ_F VCVTQ_N_TO_F_S VCVTQ_N_TO_F_U VBRSRQ_N_F
VSUBQ_N_F VCREATEQ_U VCREATEQ_S VSHRQ_N_S VSHRQ_N_U VSUBQ_N_F VCREATEQ_U VCREATEQ_S VSHRQ_N_S VSHRQ_N_U
VCVTQ_N_FROM_F_S VCVTQ_N_FROM_F_U]) VCVTQ_N_FROM_F_S VCVTQ_N_FROM_F_U VADDLVQ_P_S
VADDLVQ_P_U VCMPNEQ_U VCMPNEQ_S VSHLQ_S VSHLQ_U])
(define_mode_attr MVE_CNVT [(V8HI "V8HF") (V4SI "V4SF") (define_mode_attr MVE_CNVT [(V8HI "V8HF") (V4SI "V4SF")
(V8HF "V8HI") (V4SF "V4SI")]) (V8HF "V8HI") (V4SF "V4SI")])
...@@ -59,8 +60,9 @@ ...@@ -59,8 +60,9 @@
(VREV32Q_S "s") (VADDLVQ_U "u") (VADDLVQ_S "s") (VREV32Q_S "s") (VADDLVQ_U "u") (VADDLVQ_S "s")
(VCVTQ_N_TO_F_S "s") (VCVTQ_N_TO_F_U "u") (VCVTQ_N_TO_F_S "s") (VCVTQ_N_TO_F_U "u")
(VCREATEQ_U "u") (VCREATEQ_S "s") (VSHRQ_N_S "s") (VCREATEQ_U "u") (VCREATEQ_S "s") (VSHRQ_N_S "s")
(VSHRQ_N_U "u") (VCVTQ_N_FROM_F_S "s") (VSHRQ_N_U "u") (VCVTQ_N_FROM_F_S "s") (VSHLQ_U "u")
(VCVTQ_N_FROM_F_U "u")]) (VCVTQ_N_FROM_F_U "u") (VADDLVQ_P_S "s") (VSHLQ_S "s")
(VADDLVQ_P_U "u") (VCMPNEQ_U "u") (VCMPNEQ_S "s")])
(define_int_attr mode1 [(VCTP8Q "8") (VCTP16Q "16") (VCTP32Q "32") (define_int_attr mode1 [(VCTP8Q "8") (VCTP16Q "16") (VCTP32Q "32")
(VCTP64Q "64")]) (VCTP64Q "64")])
...@@ -90,6 +92,9 @@ ...@@ -90,6 +92,9 @@
(define_int_iterator VCREATEQ [VCREATEQ_U VCREATEQ_S]) (define_int_iterator VCREATEQ [VCREATEQ_U VCREATEQ_S])
(define_int_iterator VSHRQ_N [VSHRQ_N_S VSHRQ_N_U]) (define_int_iterator VSHRQ_N [VSHRQ_N_S VSHRQ_N_U])
(define_int_iterator VCVTQ_N_FROM_F [VCVTQ_N_FROM_F_S VCVTQ_N_FROM_F_U]) (define_int_iterator VCVTQ_N_FROM_F [VCVTQ_N_FROM_F_S VCVTQ_N_FROM_F_U])
(define_int_iterator VADDLVQ_P [VADDLVQ_P_S VADDLVQ_P_U])
(define_int_iterator VCMPNEQ [VCMPNEQ_U VCMPNEQ_S])
(define_int_iterator VSHLQ [VSHLQ_S VSHLQ_U])
(define_insn "*mve_mov<mode>" (define_insn "*mve_mov<mode>"
[(set (match_operand:MVE_types 0 "nonimmediate_operand" "=w,w,r,w,w,r,w,Us") [(set (match_operand:MVE_types 0 "nonimmediate_operand" "=w,w,r,w,w,r,w,Us")
...@@ -806,3 +811,48 @@ ...@@ -806,3 +811,48 @@
"vcvt.<supf><V_sz_elem>.f<V_sz_elem>\t%q0, %q1, %2" "vcvt.<supf><V_sz_elem>.f<V_sz_elem>\t%q0, %q1, %2"
[(set_attr "type" "mve_move") [(set_attr "type" "mve_move")
]) ])
;;
;; [vaddlvq_p_s])
;;
(define_insn "mve_vaddlvq_p_<supf>v4si"
[
(set (match_operand:DI 0 "s_register_operand" "=r")
(unspec:DI [(match_operand:V4SI 1 "s_register_operand" "w")
(match_operand:HI 2 "vpr_register_operand" "Up")]
VADDLVQ_P))
]
"TARGET_HAVE_MVE"
"vpst\;vaddlvt.<supf>32 %Q0, %R0, %q1"
[(set_attr "type" "mve_move")
(set_attr "length""8")])
;;
;; [vcmpneq_u, vcmpneq_s])
;;
(define_insn "mve_vcmpneq_<supf><mode>"
[
(set (match_operand:HI 0 "vpr_register_operand" "=Up")
(unspec:HI [(match_operand:MVE_2 1 "s_register_operand" "w")
(match_operand:MVE_2 2 "s_register_operand" "w")]
VCMPNEQ))
]
"TARGET_HAVE_MVE"
"vcmp.i%#<V_sz_elem> ne, %q1, %q2"
[(set_attr "type" "mve_move")
])
;;
;; [vshlq_s, vshlq_u])
;;
(define_insn "mve_vshlq_<supf><mode>"
[
(set (match_operand:MVE_2 0 "s_register_operand" "=w")
(unspec:MVE_2 [(match_operand:MVE_2 1 "s_register_operand" "w")
(match_operand:MVE_2 2 "s_register_operand" "w")]
VSHLQ))
]
"TARGET_HAVE_MVE"
"vshl.<supf>%#<V_sz_elem>\t%q0, %q1, %q2"
[(set_attr "type" "mve_move")
])
...@@ -2,6 +2,25 @@ ...@@ -2,6 +2,25 @@
Mihail Ionescu <mihail.ionescu@arm.com> Mihail Ionescu <mihail.ionescu@arm.com>
Srinath Parvathaneni <srinath.parvathaneni@arm.com> Srinath Parvathaneni <srinath.parvathaneni@arm.com>
* gcc.target/arm/mve/intrinsics/vaddlvq_p_s32.c: New test.
* gcc.target/arm/mve/intrinsics/vaddlvq_p_u32.c: Likewise.
* gcc.target/arm/mve/intrinsics/vcmpneq_s16.c: Likewise.
* gcc.target/arm/mve/intrinsics/vcmpneq_s32.c: Likewise.
* gcc.target/arm/mve/intrinsics/vcmpneq_s8.c: Likewise.
* gcc.target/arm/mve/intrinsics/vcmpneq_u16.c: Likewise.
* gcc.target/arm/mve/intrinsics/vcmpneq_u32.c: Likewise.
* gcc.target/arm/mve/intrinsics/vcmpneq_u8.c: Likewise.
* gcc.target/arm/mve/intrinsics/vshlq_s16.c: Likewise.
* gcc.target/arm/mve/intrinsics/vshlq_s32.c: Likewise.
* gcc.target/arm/mve/intrinsics/vshlq_s8.c: Likewise.
* gcc.target/arm/mve/intrinsics/vshlq_u16.c: Likewise.
* gcc.target/arm/mve/intrinsics/vshlq_u32.c: Likewise.
* gcc.target/arm/mve/intrinsics/vshlq_u8.c: Likewise.
2020-03-17 Andre Vieira <andre.simoesdiasvieira@arm.com>
Mihail Ionescu <mihail.ionescu@arm.com>
Srinath Parvathaneni <srinath.parvathaneni@arm.com>
* gcc.target/arm/mve/intrinsics/vcreateq_s16.c: New test. * gcc.target/arm/mve/intrinsics/vcreateq_s16.c: New test.
* gcc.target/arm/mve/intrinsics/vcreateq_s32.c: Likewise. * gcc.target/arm/mve/intrinsics/vcreateq_s32.c: Likewise.
* gcc.target/arm/mve/intrinsics/vcreateq_s64.c: Likewise. * gcc.target/arm/mve/intrinsics/vcreateq_s64.c: Likewise.
......
/* { dg-do compile } */
/* { dg-require-effective-target arm_v8_1m_mve_ok } */
/* { dg-add-options arm_v8_1m_mve } */
/* { dg-additional-options "-O2" } */
#include "arm_mve.h"
int64_t
foo (int32x4_t a, mve_pred16_t p)
{
return vaddlvq_p_s32 (a, p);
}
/* { dg-final { scan-assembler "vaddlvt.s32" } } */
int64_t
foo1 (int32x4_t a, mve_pred16_t p)
{
return vaddlvq_p (a, p);
}
/* { dg-final { scan-assembler "vaddlvt.s32" } } */
/* { dg-do compile } */
/* { dg-require-effective-target arm_v8_1m_mve_ok } */
/* { dg-add-options arm_v8_1m_mve } */
/* { dg-additional-options "-O2" } */
#include "arm_mve.h"
uint64_t
foo (uint32x4_t a, mve_pred16_t p)
{
return vaddlvq_p_u32 (a, p);
}
/* { dg-final { scan-assembler "vaddlvt.u32" } } */
uint64_t
foo1 (uint32x4_t a, mve_pred16_t p)
{
return vaddlvq_p (a, p);
}
/* { dg-final { scan-assembler "vaddlvt.u32" } } */
/* { dg-do compile } */
/* { dg-require-effective-target arm_v8_1m_mve_ok } */
/* { dg-add-options arm_v8_1m_mve } */
/* { dg-additional-options "-O2" } */
#include "arm_mve.h"
mve_pred16_t
foo (int16x8_t a, int16x8_t b)
{
return vcmpneq_s16 (a, b);
}
/* { dg-final { scan-assembler "vcmp.i16" } } */
mve_pred16_t
foo1 (int16x8_t a, int16x8_t b)
{
return vcmpneq (a, b);
}
/* { dg-final { scan-assembler "vcmp.i16" } } */
/* { dg-do compile } */
/* { dg-require-effective-target arm_v8_1m_mve_ok } */
/* { dg-add-options arm_v8_1m_mve } */
/* { dg-additional-options "-O2" } */
#include "arm_mve.h"
mve_pred16_t
foo (int32x4_t a, int32x4_t b)
{
return vcmpneq_s32 (a, b);
}
/* { dg-final { scan-assembler "vcmp.i32" } } */
mve_pred16_t
foo1 (int32x4_t a, int32x4_t b)
{
return vcmpneq (a, b);
}
/* { dg-final { scan-assembler "vcmp.i32" } } */
/* { dg-do compile } */
/* { dg-require-effective-target arm_v8_1m_mve_ok } */
/* { dg-add-options arm_v8_1m_mve } */
/* { dg-additional-options "-O2" } */
#include "arm_mve.h"
mve_pred16_t
foo (int8x16_t a, int8x16_t b)
{
return vcmpneq_s8 (a, b);
}
/* { dg-final { scan-assembler "vcmp.i8" } } */
mve_pred16_t
foo1 (int8x16_t a, int8x16_t b)
{
return vcmpneq (a, b);
}
/* { dg-final { scan-assembler "vcmp.i8" } } */
/* { dg-do compile } */
/* { dg-require-effective-target arm_v8_1m_mve_ok } */
/* { dg-add-options arm_v8_1m_mve } */
/* { dg-additional-options "-O2" } */
#include "arm_mve.h"
mve_pred16_t
foo (uint16x8_t a, uint16x8_t b)
{
return vcmpneq_u16 (a, b);
}
/* { dg-final { scan-assembler "vcmp.i16" } } */
mve_pred16_t
foo1 (uint16x8_t a, uint16x8_t b)
{
return vcmpneq (a, b);
}
/* { dg-final { scan-assembler "vcmp.i16" } } */
/* { dg-do compile } */
/* { dg-require-effective-target arm_v8_1m_mve_ok } */
/* { dg-add-options arm_v8_1m_mve } */
/* { dg-additional-options "-O2" } */
#include "arm_mve.h"
mve_pred16_t
foo (uint32x4_t a, uint32x4_t b)
{
return vcmpneq_u32 (a, b);
}
/* { dg-final { scan-assembler "vcmp.i32" } } */
mve_pred16_t
foo1 (uint32x4_t a, uint32x4_t b)
{
return vcmpneq (a, b);
}
/* { dg-final { scan-assembler "vcmp.i32" } } */
/* { dg-do compile } */
/* { dg-require-effective-target arm_v8_1m_mve_ok } */
/* { dg-add-options arm_v8_1m_mve } */
/* { dg-additional-options "-O2" } */
#include "arm_mve.h"
mve_pred16_t
foo (uint8x16_t a, uint8x16_t b)
{
return vcmpneq_u8 (a, b);
}
/* { dg-final { scan-assembler "vcmp.i8" } } */
mve_pred16_t
foo1 (uint8x16_t a, uint8x16_t b)
{
return vcmpneq (a, b);
}
/* { dg-final { scan-assembler "vcmp.i8" } } */
/* { dg-do compile } */
/* { dg-require-effective-target arm_v8_1m_mve_ok } */
/* { dg-add-options arm_v8_1m_mve } */
/* { dg-additional-options "-O2" } */
#include "arm_mve.h"
int16x8_t
foo (int16x8_t a, int16x8_t b)
{
return vshlq_s16 (a, b);
}
/* { dg-final { scan-assembler "vshl.s16" } } */
int16x8_t
foo1 (int16x8_t a, int16x8_t b)
{
return vshlq (a, b);
}
/* { dg-final { scan-assembler "vshl.s16" } } */
/* { dg-do compile } */
/* { dg-require-effective-target arm_v8_1m_mve_ok } */
/* { dg-add-options arm_v8_1m_mve } */
/* { dg-additional-options "-O2" } */
#include "arm_mve.h"
int32x4_t
foo (int32x4_t a, int32x4_t b)
{
return vshlq_s32 (a, b);
}
/* { dg-final { scan-assembler "vshl.s32" } } */
int32x4_t
foo1 (int32x4_t a, int32x4_t b)
{
return vshlq (a, b);
}
/* { dg-final { scan-assembler "vshl.s32" } } */
/* { dg-do compile } */
/* { dg-require-effective-target arm_v8_1m_mve_ok } */
/* { dg-add-options arm_v8_1m_mve } */
/* { dg-additional-options "-O2" } */
#include "arm_mve.h"
int8x16_t
foo (int8x16_t a, int8x16_t b)
{
return vshlq_s8 (a, b);
}
/* { dg-final { scan-assembler "vshl.s8" } } */
int8x16_t
foo1 (int8x16_t a, int8x16_t b)
{
return vshlq (a, b);
}
/* { dg-final { scan-assembler "vshl.s8" } } */
/* { dg-do compile } */
/* { dg-require-effective-target arm_v8_1m_mve_ok } */
/* { dg-add-options arm_v8_1m_mve } */
/* { dg-additional-options "-O2" } */
#include "arm_mve.h"
uint16x8_t
foo (uint16x8_t a, int16x8_t b)
{
return vshlq_u16 (a, b);
}
/* { dg-final { scan-assembler "vshl.u16" } } */
uint16x8_t
foo1 (uint16x8_t a, int16x8_t b)
{
return vshlq (a, b);
}
/* { dg-final { scan-assembler "vshl.u16" } } */
/* { dg-do compile } */
/* { dg-require-effective-target arm_v8_1m_mve_ok } */
/* { dg-add-options arm_v8_1m_mve } */
/* { dg-additional-options "-O2" } */
#include "arm_mve.h"
uint32x4_t
foo (uint32x4_t a, int32x4_t b)
{
return vshlq_u32 (a, b);
}
/* { dg-final { scan-assembler "vshl.u32" } } */
uint32x4_t
foo1 (uint32x4_t a, int32x4_t b)
{
return vshlq (a, b);
}
/* { dg-final { scan-assembler "vshl.u32" } } */
/* { dg-do compile } */
/* { dg-require-effective-target arm_v8_1m_mve_ok } */
/* { dg-add-options arm_v8_1m_mve } */
/* { dg-additional-options "-O2" } */
#include "arm_mve.h"
uint8x16_t
foo (uint8x16_t a, int8x16_t b)
{
return vshlq_u8 (a, b);
}
/* { dg-final { scan-assembler "vshl.u8" } } */
uint8x16_t
foo1 (uint8x16_t a, int8x16_t b)
{
return vshlq (a, b);
}
/* { dg-final { scan-assembler "vshl.u8" } } */
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment