Commit dc8d7a0f by Tejas Belagod Committed by Tejas Belagod

arm_neon.h (__AARCH64_UINT64_C, [...]): New arm_neon.h's internal macros to…

arm_neon.h (__AARCH64_UINT64_C, [...]): New arm_neon.h's internal macros to specify 64-bit constants.

2013-08-30  Tejas Belagod  <tejas.belagod@arm.com>

gcc/
	* config/aarch64/arm_neon.h (__AARCH64_UINT64_C, __AARCH64_INT64_C): New
	arm_neon.h's internal macros to specify 64-bit constants. Avoid using
	stdint.h's macros.

From-SVN: r202108
parent ac7eacd2
2013-08-30 Tejas Belagod <tejas.belagod@arm.com>
* config/aarch64/arm_neon.h (__AARCH64_UINT64_C, __AARCH64_INT64_C): New
arm_neon.h's internal macros to specify 64-bit constants. Avoid using
stdint.h's macros.
2013-08-30 Joern Rennecke <joern.rennecke@embecosm.com> 2013-08-30 Joern Rennecke <joern.rennecke@embecosm.com>
* recog.c (verify_changes): Verify that changes[i].old is non-zero * recog.c (verify_changes): Verify that changes[i].old is non-zero
......
...@@ -29,6 +29,9 @@ ...@@ -29,6 +29,9 @@
#include <stdint.h> #include <stdint.h>
#define __AARCH64_UINT64_C(__C) ((uint64_t) __C)
#define __AARCH64_INT64_C(__C) ((int64_t) __C)
typedef __builtin_aarch64_simd_qi int8x8_t typedef __builtin_aarch64_simd_qi int8x8_t
__attribute__ ((__vector_size__ (8))); __attribute__ ((__vector_size__ (8)));
typedef __builtin_aarch64_simd_hi int16x4_t typedef __builtin_aarch64_simd_hi int16x4_t
...@@ -9694,7 +9697,7 @@ vmovl_u32 (uint32x2_t a) ...@@ -9694,7 +9697,7 @@ vmovl_u32 (uint32x2_t a)
__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
vmovn_high_s16 (int8x8_t a, int16x8_t b) vmovn_high_s16 (int8x8_t a, int16x8_t b)
{ {
int8x16_t result = vcombine_s8 (a, vcreate_s8 (UINT64_C (0x0))); int8x16_t result = vcombine_s8 (a, vcreate_s8 (__AARCH64_UINT64_C (0x0)));
__asm__ ("xtn2 %0.16b,%1.8h" __asm__ ("xtn2 %0.16b,%1.8h"
: "+w"(result) : "+w"(result)
: "w"(b) : "w"(b)
...@@ -9705,7 +9708,7 @@ vmovn_high_s16 (int8x8_t a, int16x8_t b) ...@@ -9705,7 +9708,7 @@ vmovn_high_s16 (int8x8_t a, int16x8_t b)
__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
vmovn_high_s32 (int16x4_t a, int32x4_t b) vmovn_high_s32 (int16x4_t a, int32x4_t b)
{ {
int16x8_t result = vcombine_s16 (a, vcreate_s16 (UINT64_C (0x0))); int16x8_t result = vcombine_s16 (a, vcreate_s16 (__AARCH64_UINT64_C (0x0)));
__asm__ ("xtn2 %0.8h,%1.4s" __asm__ ("xtn2 %0.8h,%1.4s"
: "+w"(result) : "+w"(result)
: "w"(b) : "w"(b)
...@@ -9716,7 +9719,7 @@ vmovn_high_s32 (int16x4_t a, int32x4_t b) ...@@ -9716,7 +9719,7 @@ vmovn_high_s32 (int16x4_t a, int32x4_t b)
__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vmovn_high_s64 (int32x2_t a, int64x2_t b) vmovn_high_s64 (int32x2_t a, int64x2_t b)
{ {
int32x4_t result = vcombine_s32 (a, vcreate_s32 (UINT64_C (0x0))); int32x4_t result = vcombine_s32 (a, vcreate_s32 (__AARCH64_UINT64_C (0x0)));
__asm__ ("xtn2 %0.4s,%1.2d" __asm__ ("xtn2 %0.4s,%1.2d"
: "+w"(result) : "+w"(result)
: "w"(b) : "w"(b)
...@@ -9727,7 +9730,7 @@ vmovn_high_s64 (int32x2_t a, int64x2_t b) ...@@ -9727,7 +9730,7 @@ vmovn_high_s64 (int32x2_t a, int64x2_t b)
__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
vmovn_high_u16 (uint8x8_t a, uint16x8_t b) vmovn_high_u16 (uint8x8_t a, uint16x8_t b)
{ {
uint8x16_t result = vcombine_u8 (a, vcreate_u8 (UINT64_C (0x0))); uint8x16_t result = vcombine_u8 (a, vcreate_u8 (__AARCH64_UINT64_C (0x0)));
__asm__ ("xtn2 %0.16b,%1.8h" __asm__ ("xtn2 %0.16b,%1.8h"
: "+w"(result) : "+w"(result)
: "w"(b) : "w"(b)
...@@ -9738,7 +9741,7 @@ vmovn_high_u16 (uint8x8_t a, uint16x8_t b) ...@@ -9738,7 +9741,7 @@ vmovn_high_u16 (uint8x8_t a, uint16x8_t b)
__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vmovn_high_u32 (uint16x4_t a, uint32x4_t b) vmovn_high_u32 (uint16x4_t a, uint32x4_t b)
{ {
uint16x8_t result = vcombine_u16 (a, vcreate_u16 (UINT64_C (0x0))); uint16x8_t result = vcombine_u16 (a, vcreate_u16 (__AARCH64_UINT64_C (0x0)));
__asm__ ("xtn2 %0.8h,%1.4s" __asm__ ("xtn2 %0.8h,%1.4s"
: "+w"(result) : "+w"(result)
: "w"(b) : "w"(b)
...@@ -9749,7 +9752,7 @@ vmovn_high_u32 (uint16x4_t a, uint32x4_t b) ...@@ -9749,7 +9752,7 @@ vmovn_high_u32 (uint16x4_t a, uint32x4_t b)
__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vmovn_high_u64 (uint32x2_t a, uint64x2_t b) vmovn_high_u64 (uint32x2_t a, uint64x2_t b)
{ {
uint32x4_t result = vcombine_u32 (a, vcreate_u32 (UINT64_C (0x0))); uint32x4_t result = vcombine_u32 (a, vcreate_u32 (__AARCH64_UINT64_C (0x0)));
__asm__ ("xtn2 %0.4s,%1.2d" __asm__ ("xtn2 %0.4s,%1.2d"
: "+w"(result) : "+w"(result)
: "w"(b) : "w"(b)
...@@ -12168,7 +12171,7 @@ vqdmulhq_n_s32 (int32x4_t a, int32_t b) ...@@ -12168,7 +12171,7 @@ vqdmulhq_n_s32 (int32x4_t a, int32_t b)
__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
vqmovn_high_s16 (int8x8_t a, int16x8_t b) vqmovn_high_s16 (int8x8_t a, int16x8_t b)
{ {
int8x16_t result = vcombine_s8 (a, vcreate_s8 (UINT64_C (0x0))); int8x16_t result = vcombine_s8 (a, vcreate_s8 (__AARCH64_UINT64_C (0x0)));
__asm__ ("sqxtn2 %0.16b, %1.8h" __asm__ ("sqxtn2 %0.16b, %1.8h"
: "+w"(result) : "+w"(result)
: "w"(b) : "w"(b)
...@@ -12179,7 +12182,7 @@ vqmovn_high_s16 (int8x8_t a, int16x8_t b) ...@@ -12179,7 +12182,7 @@ vqmovn_high_s16 (int8x8_t a, int16x8_t b)
__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
vqmovn_high_s32 (int16x4_t a, int32x4_t b) vqmovn_high_s32 (int16x4_t a, int32x4_t b)
{ {
int16x8_t result = vcombine_s16 (a, vcreate_s16 (UINT64_C (0x0))); int16x8_t result = vcombine_s16 (a, vcreate_s16 (__AARCH64_UINT64_C (0x0)));
__asm__ ("sqxtn2 %0.8h, %1.4s" __asm__ ("sqxtn2 %0.8h, %1.4s"
: "+w"(result) : "+w"(result)
: "w"(b) : "w"(b)
...@@ -12190,7 +12193,7 @@ vqmovn_high_s32 (int16x4_t a, int32x4_t b) ...@@ -12190,7 +12193,7 @@ vqmovn_high_s32 (int16x4_t a, int32x4_t b)
__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vqmovn_high_s64 (int32x2_t a, int64x2_t b) vqmovn_high_s64 (int32x2_t a, int64x2_t b)
{ {
int32x4_t result = vcombine_s32 (a, vcreate_s32 (UINT64_C (0x0))); int32x4_t result = vcombine_s32 (a, vcreate_s32 (__AARCH64_UINT64_C (0x0)));
__asm__ ("sqxtn2 %0.4s, %1.2d" __asm__ ("sqxtn2 %0.4s, %1.2d"
: "+w"(result) : "+w"(result)
: "w"(b) : "w"(b)
...@@ -12201,7 +12204,7 @@ vqmovn_high_s64 (int32x2_t a, int64x2_t b) ...@@ -12201,7 +12204,7 @@ vqmovn_high_s64 (int32x2_t a, int64x2_t b)
__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
vqmovn_high_u16 (uint8x8_t a, uint16x8_t b) vqmovn_high_u16 (uint8x8_t a, uint16x8_t b)
{ {
uint8x16_t result = vcombine_u8 (a, vcreate_u8 (UINT64_C (0x0))); uint8x16_t result = vcombine_u8 (a, vcreate_u8 (__AARCH64_UINT64_C (0x0)));
__asm__ ("uqxtn2 %0.16b, %1.8h" __asm__ ("uqxtn2 %0.16b, %1.8h"
: "+w"(result) : "+w"(result)
: "w"(b) : "w"(b)
...@@ -12212,7 +12215,7 @@ vqmovn_high_u16 (uint8x8_t a, uint16x8_t b) ...@@ -12212,7 +12215,7 @@ vqmovn_high_u16 (uint8x8_t a, uint16x8_t b)
__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vqmovn_high_u32 (uint16x4_t a, uint32x4_t b) vqmovn_high_u32 (uint16x4_t a, uint32x4_t b)
{ {
uint16x8_t result = vcombine_u16 (a, vcreate_u16 (UINT64_C (0x0))); uint16x8_t result = vcombine_u16 (a, vcreate_u16 (__AARCH64_UINT64_C (0x0)));
__asm__ ("uqxtn2 %0.8h, %1.4s" __asm__ ("uqxtn2 %0.8h, %1.4s"
: "+w"(result) : "+w"(result)
: "w"(b) : "w"(b)
...@@ -12223,7 +12226,7 @@ vqmovn_high_u32 (uint16x4_t a, uint32x4_t b) ...@@ -12223,7 +12226,7 @@ vqmovn_high_u32 (uint16x4_t a, uint32x4_t b)
__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vqmovn_high_u64 (uint32x2_t a, uint64x2_t b) vqmovn_high_u64 (uint32x2_t a, uint64x2_t b)
{ {
uint32x4_t result = vcombine_u32 (a, vcreate_u32 (UINT64_C (0x0))); uint32x4_t result = vcombine_u32 (a, vcreate_u32 (__AARCH64_UINT64_C (0x0)));
__asm__ ("uqxtn2 %0.4s, %1.2d" __asm__ ("uqxtn2 %0.4s, %1.2d"
: "+w"(result) : "+w"(result)
: "w"(b) : "w"(b)
...@@ -12234,7 +12237,7 @@ vqmovn_high_u64 (uint32x2_t a, uint64x2_t b) ...@@ -12234,7 +12237,7 @@ vqmovn_high_u64 (uint32x2_t a, uint64x2_t b)
__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
vqmovun_high_s16 (uint8x8_t a, int16x8_t b) vqmovun_high_s16 (uint8x8_t a, int16x8_t b)
{ {
uint8x16_t result = vcombine_u8 (a, vcreate_u8 (UINT64_C (0x0))); uint8x16_t result = vcombine_u8 (a, vcreate_u8 (__AARCH64_UINT64_C (0x0)));
__asm__ ("sqxtun2 %0.16b, %1.8h" __asm__ ("sqxtun2 %0.16b, %1.8h"
: "+w"(result) : "+w"(result)
: "w"(b) : "w"(b)
...@@ -12245,7 +12248,7 @@ vqmovun_high_s16 (uint8x8_t a, int16x8_t b) ...@@ -12245,7 +12248,7 @@ vqmovun_high_s16 (uint8x8_t a, int16x8_t b)
__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vqmovun_high_s32 (uint16x4_t a, int32x4_t b) vqmovun_high_s32 (uint16x4_t a, int32x4_t b)
{ {
uint16x8_t result = vcombine_u16 (a, vcreate_u16 (UINT64_C (0x0))); uint16x8_t result = vcombine_u16 (a, vcreate_u16 (__AARCH64_UINT64_C (0x0)));
__asm__ ("sqxtun2 %0.8h, %1.4s" __asm__ ("sqxtun2 %0.8h, %1.4s"
: "+w"(result) : "+w"(result)
: "w"(b) : "w"(b)
...@@ -12256,7 +12259,7 @@ vqmovun_high_s32 (uint16x4_t a, int32x4_t b) ...@@ -12256,7 +12259,7 @@ vqmovun_high_s32 (uint16x4_t a, int32x4_t b)
__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vqmovun_high_s64 (uint32x2_t a, int64x2_t b) vqmovun_high_s64 (uint32x2_t a, int64x2_t b)
{ {
uint32x4_t result = vcombine_u32 (a, vcreate_u32 (UINT64_C (0x0))); uint32x4_t result = vcombine_u32 (a, vcreate_u32 (__AARCH64_UINT64_C (0x0)));
__asm__ ("sqxtun2 %0.4s, %1.2d" __asm__ ("sqxtun2 %0.4s, %1.2d"
: "+w"(result) : "+w"(result)
: "w"(b) : "w"(b)
...@@ -12314,7 +12317,8 @@ vqrdmulhq_n_s32 (int32x4_t a, int32_t b) ...@@ -12314,7 +12317,8 @@ vqrdmulhq_n_s32 (int32x4_t a, int32_t b)
int16x8_t b_ = (b); \ int16x8_t b_ = (b); \
int8x8_t a_ = (a); \ int8x8_t a_ = (a); \
int8x16_t result = vcombine_s8 \ int8x16_t result = vcombine_s8 \
(a_, vcreate_s8 (UINT64_C (0x0))); \ (a_, vcreate_s8 \
(__AARCH64_UINT64_C (0x0))); \
__asm__ ("sqrshrn2 %0.16b, %1.8h, #%2" \ __asm__ ("sqrshrn2 %0.16b, %1.8h, #%2" \
: "+w"(result) \ : "+w"(result) \
: "w"(b_), "i"(c) \ : "w"(b_), "i"(c) \
...@@ -12328,7 +12332,8 @@ vqrdmulhq_n_s32 (int32x4_t a, int32_t b) ...@@ -12328,7 +12332,8 @@ vqrdmulhq_n_s32 (int32x4_t a, int32_t b)
int32x4_t b_ = (b); \ int32x4_t b_ = (b); \
int16x4_t a_ = (a); \ int16x4_t a_ = (a); \
int16x8_t result = vcombine_s16 \ int16x8_t result = vcombine_s16 \
(a_, vcreate_s16 (UINT64_C (0x0))); \ (a_, vcreate_s16 \
(__AARCH64_UINT64_C (0x0))); \
__asm__ ("sqrshrn2 %0.8h, %1.4s, #%2" \ __asm__ ("sqrshrn2 %0.8h, %1.4s, #%2" \
: "+w"(result) \ : "+w"(result) \
: "w"(b_), "i"(c) \ : "w"(b_), "i"(c) \
...@@ -12342,7 +12347,8 @@ vqrdmulhq_n_s32 (int32x4_t a, int32_t b) ...@@ -12342,7 +12347,8 @@ vqrdmulhq_n_s32 (int32x4_t a, int32_t b)
int64x2_t b_ = (b); \ int64x2_t b_ = (b); \
int32x2_t a_ = (a); \ int32x2_t a_ = (a); \
int32x4_t result = vcombine_s32 \ int32x4_t result = vcombine_s32 \
(a_, vcreate_s32 (UINT64_C (0x0))); \ (a_, vcreate_s32 \
(__AARCH64_UINT64_C (0x0))); \
__asm__ ("sqrshrn2 %0.4s, %1.2d, #%2" \ __asm__ ("sqrshrn2 %0.4s, %1.2d, #%2" \
: "+w"(result) \ : "+w"(result) \
: "w"(b_), "i"(c) \ : "w"(b_), "i"(c) \
...@@ -12356,7 +12362,8 @@ vqrdmulhq_n_s32 (int32x4_t a, int32_t b) ...@@ -12356,7 +12362,8 @@ vqrdmulhq_n_s32 (int32x4_t a, int32_t b)
uint16x8_t b_ = (b); \ uint16x8_t b_ = (b); \
uint8x8_t a_ = (a); \ uint8x8_t a_ = (a); \
uint8x16_t result = vcombine_u8 \ uint8x16_t result = vcombine_u8 \
(a_, vcreate_u8 (UINT64_C (0x0))); \ (a_, vcreate_u8 \
(__AARCH64_UINT64_C (0x0))); \
__asm__ ("uqrshrn2 %0.16b, %1.8h, #%2" \ __asm__ ("uqrshrn2 %0.16b, %1.8h, #%2" \
: "+w"(result) \ : "+w"(result) \
: "w"(b_), "i"(c) \ : "w"(b_), "i"(c) \
...@@ -12370,7 +12377,8 @@ vqrdmulhq_n_s32 (int32x4_t a, int32_t b) ...@@ -12370,7 +12377,8 @@ vqrdmulhq_n_s32 (int32x4_t a, int32_t b)
uint32x4_t b_ = (b); \ uint32x4_t b_ = (b); \
uint16x4_t a_ = (a); \ uint16x4_t a_ = (a); \
uint16x8_t result = vcombine_u16 \ uint16x8_t result = vcombine_u16 \
(a_, vcreate_u16 (UINT64_C (0x0))); \ (a_, vcreate_u16 \
(__AARCH64_UINT64_C (0x0))); \
__asm__ ("uqrshrn2 %0.8h, %1.4s, #%2" \ __asm__ ("uqrshrn2 %0.8h, %1.4s, #%2" \
: "+w"(result) \ : "+w"(result) \
: "w"(b_), "i"(c) \ : "w"(b_), "i"(c) \
...@@ -12384,7 +12392,8 @@ vqrdmulhq_n_s32 (int32x4_t a, int32_t b) ...@@ -12384,7 +12392,8 @@ vqrdmulhq_n_s32 (int32x4_t a, int32_t b)
uint64x2_t b_ = (b); \ uint64x2_t b_ = (b); \
uint32x2_t a_ = (a); \ uint32x2_t a_ = (a); \
uint32x4_t result = vcombine_u32 \ uint32x4_t result = vcombine_u32 \
(a_, vcreate_u32 (UINT64_C (0x0))); \ (a_, vcreate_u32 \
(__AARCH64_UINT64_C (0x0))); \
__asm__ ("uqrshrn2 %0.4s, %1.2d, #%2" \ __asm__ ("uqrshrn2 %0.4s, %1.2d, #%2" \
: "+w"(result) \ : "+w"(result) \
: "w"(b_), "i"(c) \ : "w"(b_), "i"(c) \
...@@ -12398,7 +12407,8 @@ vqrdmulhq_n_s32 (int32x4_t a, int32_t b) ...@@ -12398,7 +12407,8 @@ vqrdmulhq_n_s32 (int32x4_t a, int32_t b)
int16x8_t b_ = (b); \ int16x8_t b_ = (b); \
uint8x8_t a_ = (a); \ uint8x8_t a_ = (a); \
uint8x16_t result = vcombine_u8 \ uint8x16_t result = vcombine_u8 \
(a_, vcreate_u8 (UINT64_C (0x0))); \ (a_, vcreate_u8 \
(__AARCH64_UINT64_C (0x0))); \
__asm__ ("sqrshrun2 %0.16b, %1.8h, #%2" \ __asm__ ("sqrshrun2 %0.16b, %1.8h, #%2" \
: "+w"(result) \ : "+w"(result) \
: "w"(b_), "i"(c) \ : "w"(b_), "i"(c) \
...@@ -12412,7 +12422,8 @@ vqrdmulhq_n_s32 (int32x4_t a, int32_t b) ...@@ -12412,7 +12422,8 @@ vqrdmulhq_n_s32 (int32x4_t a, int32_t b)
int32x4_t b_ = (b); \ int32x4_t b_ = (b); \
uint16x4_t a_ = (a); \ uint16x4_t a_ = (a); \
uint16x8_t result = vcombine_u16 \ uint16x8_t result = vcombine_u16 \
(a_, vcreate_u16 (UINT64_C (0x0))); \ (a_, vcreate_u16 \
(__AARCH64_UINT64_C (0x0))); \
__asm__ ("sqrshrun2 %0.8h, %1.4s, #%2" \ __asm__ ("sqrshrun2 %0.8h, %1.4s, #%2" \
: "+w"(result) \ : "+w"(result) \
: "w"(b_), "i"(c) \ : "w"(b_), "i"(c) \
...@@ -12426,7 +12437,8 @@ vqrdmulhq_n_s32 (int32x4_t a, int32_t b) ...@@ -12426,7 +12437,8 @@ vqrdmulhq_n_s32 (int32x4_t a, int32_t b)
int64x2_t b_ = (b); \ int64x2_t b_ = (b); \
uint32x2_t a_ = (a); \ uint32x2_t a_ = (a); \
uint32x4_t result = vcombine_u32 \ uint32x4_t result = vcombine_u32 \
(a_, vcreate_u32 (UINT64_C (0x0))); \ (a_, vcreate_u32 \
(__AARCH64_UINT64_C (0x0))); \
__asm__ ("sqrshrun2 %0.4s, %1.2d, #%2" \ __asm__ ("sqrshrun2 %0.4s, %1.2d, #%2" \
: "+w"(result) \ : "+w"(result) \
: "w"(b_), "i"(c) \ : "w"(b_), "i"(c) \
...@@ -12440,7 +12452,8 @@ vqrdmulhq_n_s32 (int32x4_t a, int32_t b) ...@@ -12440,7 +12452,8 @@ vqrdmulhq_n_s32 (int32x4_t a, int32_t b)
int16x8_t b_ = (b); \ int16x8_t b_ = (b); \
int8x8_t a_ = (a); \ int8x8_t a_ = (a); \
int8x16_t result = vcombine_s8 \ int8x16_t result = vcombine_s8 \
(a_, vcreate_s8 (UINT64_C (0x0))); \ (a_, vcreate_s8 \
(__AARCH64_UINT64_C (0x0))); \
__asm__ ("sqshrn2 %0.16b, %1.8h, #%2" \ __asm__ ("sqshrn2 %0.16b, %1.8h, #%2" \
: "+w"(result) \ : "+w"(result) \
: "w"(b_), "i"(c) \ : "w"(b_), "i"(c) \
...@@ -12454,7 +12467,8 @@ vqrdmulhq_n_s32 (int32x4_t a, int32_t b) ...@@ -12454,7 +12467,8 @@ vqrdmulhq_n_s32 (int32x4_t a, int32_t b)
int32x4_t b_ = (b); \ int32x4_t b_ = (b); \
int16x4_t a_ = (a); \ int16x4_t a_ = (a); \
int16x8_t result = vcombine_s16 \ int16x8_t result = vcombine_s16 \
(a_, vcreate_s16 (UINT64_C (0x0))); \ (a_, vcreate_s16 \
(__AARCH64_UINT64_C (0x0))); \
__asm__ ("sqshrn2 %0.8h, %1.4s, #%2" \ __asm__ ("sqshrn2 %0.8h, %1.4s, #%2" \
: "+w"(result) \ : "+w"(result) \
: "w"(b_), "i"(c) \ : "w"(b_), "i"(c) \
...@@ -12468,7 +12482,8 @@ vqrdmulhq_n_s32 (int32x4_t a, int32_t b) ...@@ -12468,7 +12482,8 @@ vqrdmulhq_n_s32 (int32x4_t a, int32_t b)
int64x2_t b_ = (b); \ int64x2_t b_ = (b); \
int32x2_t a_ = (a); \ int32x2_t a_ = (a); \
int32x4_t result = vcombine_s32 \ int32x4_t result = vcombine_s32 \
(a_, vcreate_s32 (UINT64_C (0x0))); \ (a_, vcreate_s32 \
(__AARCH64_UINT64_C (0x0))); \
__asm__ ("sqshrn2 %0.4s, %1.2d, #%2" \ __asm__ ("sqshrn2 %0.4s, %1.2d, #%2" \
: "+w"(result) \ : "+w"(result) \
: "w"(b_), "i"(c) \ : "w"(b_), "i"(c) \
...@@ -12482,7 +12497,8 @@ vqrdmulhq_n_s32 (int32x4_t a, int32_t b) ...@@ -12482,7 +12497,8 @@ vqrdmulhq_n_s32 (int32x4_t a, int32_t b)
uint16x8_t b_ = (b); \ uint16x8_t b_ = (b); \
uint8x8_t a_ = (a); \ uint8x8_t a_ = (a); \
uint8x16_t result = vcombine_u8 \ uint8x16_t result = vcombine_u8 \
(a_, vcreate_u8 (UINT64_C (0x0))); \ (a_, vcreate_u8 \
(__AARCH64_UINT64_C (0x0))); \
__asm__ ("uqshrn2 %0.16b, %1.8h, #%2" \ __asm__ ("uqshrn2 %0.16b, %1.8h, #%2" \
: "+w"(result) \ : "+w"(result) \
: "w"(b_), "i"(c) \ : "w"(b_), "i"(c) \
...@@ -12496,7 +12512,8 @@ vqrdmulhq_n_s32 (int32x4_t a, int32_t b) ...@@ -12496,7 +12512,8 @@ vqrdmulhq_n_s32 (int32x4_t a, int32_t b)
uint32x4_t b_ = (b); \ uint32x4_t b_ = (b); \
uint16x4_t a_ = (a); \ uint16x4_t a_ = (a); \
uint16x8_t result = vcombine_u16 \ uint16x8_t result = vcombine_u16 \
(a_, vcreate_u16 (UINT64_C (0x0))); \ (a_, vcreate_u16 \
(__AARCH64_UINT64_C (0x0))); \
__asm__ ("uqshrn2 %0.8h, %1.4s, #%2" \ __asm__ ("uqshrn2 %0.8h, %1.4s, #%2" \
: "+w"(result) \ : "+w"(result) \
: "w"(b_), "i"(c) \ : "w"(b_), "i"(c) \
...@@ -12510,7 +12527,8 @@ vqrdmulhq_n_s32 (int32x4_t a, int32_t b) ...@@ -12510,7 +12527,8 @@ vqrdmulhq_n_s32 (int32x4_t a, int32_t b)
uint64x2_t b_ = (b); \ uint64x2_t b_ = (b); \
uint32x2_t a_ = (a); \ uint32x2_t a_ = (a); \
uint32x4_t result = vcombine_u32 \ uint32x4_t result = vcombine_u32 \
(a_, vcreate_u32 (UINT64_C (0x0))); \ (a_, vcreate_u32 \
(__AARCH64_UINT64_C (0x0))); \
__asm__ ("uqshrn2 %0.4s, %1.2d, #%2" \ __asm__ ("uqshrn2 %0.4s, %1.2d, #%2" \
: "+w"(result) \ : "+w"(result) \
: "w"(b_), "i"(c) \ : "w"(b_), "i"(c) \
...@@ -12524,7 +12542,8 @@ vqrdmulhq_n_s32 (int32x4_t a, int32_t b) ...@@ -12524,7 +12542,8 @@ vqrdmulhq_n_s32 (int32x4_t a, int32_t b)
int16x8_t b_ = (b); \ int16x8_t b_ = (b); \
uint8x8_t a_ = (a); \ uint8x8_t a_ = (a); \
uint8x16_t result = vcombine_u8 \ uint8x16_t result = vcombine_u8 \
(a_, vcreate_u8 (UINT64_C (0x0))); \ (a_, vcreate_u8 \
(__AARCH64_UINT64_C (0x0))); \
__asm__ ("sqshrun2 %0.16b, %1.8h, #%2" \ __asm__ ("sqshrun2 %0.16b, %1.8h, #%2" \
: "+w"(result) \ : "+w"(result) \
: "w"(b_), "i"(c) \ : "w"(b_), "i"(c) \
...@@ -12538,7 +12557,8 @@ vqrdmulhq_n_s32 (int32x4_t a, int32_t b) ...@@ -12538,7 +12557,8 @@ vqrdmulhq_n_s32 (int32x4_t a, int32_t b)
int32x4_t b_ = (b); \ int32x4_t b_ = (b); \
uint16x4_t a_ = (a); \ uint16x4_t a_ = (a); \
uint16x8_t result = vcombine_u16 \ uint16x8_t result = vcombine_u16 \
(a_, vcreate_u16 (UINT64_C (0x0))); \ (a_, vcreate_u16 \
(__AARCH64_UINT64_C (0x0))); \
__asm__ ("sqshrun2 %0.8h, %1.4s, #%2" \ __asm__ ("sqshrun2 %0.8h, %1.4s, #%2" \
: "+w"(result) \ : "+w"(result) \
: "w"(b_), "i"(c) \ : "w"(b_), "i"(c) \
...@@ -12552,7 +12572,8 @@ vqrdmulhq_n_s32 (int32x4_t a, int32_t b) ...@@ -12552,7 +12572,8 @@ vqrdmulhq_n_s32 (int32x4_t a, int32_t b)
int64x2_t b_ = (b); \ int64x2_t b_ = (b); \
uint32x2_t a_ = (a); \ uint32x2_t a_ = (a); \
uint32x4_t result = vcombine_u32 \ uint32x4_t result = vcombine_u32 \
(a_, vcreate_u32 (UINT64_C (0x0))); \ (a_, vcreate_u32 \
(__AARCH64_UINT64_C (0x0))); \
__asm__ ("sqshrun2 %0.4s, %1.2d, #%2" \ __asm__ ("sqshrun2 %0.4s, %1.2d, #%2" \
: "+w"(result) \ : "+w"(result) \
: "w"(b_), "i"(c) \ : "w"(b_), "i"(c) \
...@@ -13028,7 +13049,8 @@ vrev64q_u32 (uint32x4_t a) ...@@ -13028,7 +13049,8 @@ vrev64q_u32 (uint32x4_t a)
int16x8_t b_ = (b); \ int16x8_t b_ = (b); \
int8x8_t a_ = (a); \ int8x8_t a_ = (a); \
int8x16_t result = vcombine_s8 \ int8x16_t result = vcombine_s8 \
(a_, vcreate_s8 (UINT64_C (0x0))); \ (a_, vcreate_s8 \
(__AARCH64_UINT64_C (0x0))); \
__asm__ ("rshrn2 %0.16b,%1.8h,#%2" \ __asm__ ("rshrn2 %0.16b,%1.8h,#%2" \
: "+w"(result) \ : "+w"(result) \
: "w"(b_), "i"(c) \ : "w"(b_), "i"(c) \
...@@ -13042,7 +13064,8 @@ vrev64q_u32 (uint32x4_t a) ...@@ -13042,7 +13064,8 @@ vrev64q_u32 (uint32x4_t a)
int32x4_t b_ = (b); \ int32x4_t b_ = (b); \
int16x4_t a_ = (a); \ int16x4_t a_ = (a); \
int16x8_t result = vcombine_s16 \ int16x8_t result = vcombine_s16 \
(a_, vcreate_s16 (UINT64_C (0x0))); \ (a_, vcreate_s16 \
(__AARCH64_UINT64_C (0x0))); \
__asm__ ("rshrn2 %0.8h,%1.4s,#%2" \ __asm__ ("rshrn2 %0.8h,%1.4s,#%2" \
: "+w"(result) \ : "+w"(result) \
: "w"(b_), "i"(c) \ : "w"(b_), "i"(c) \
...@@ -13056,7 +13079,8 @@ vrev64q_u32 (uint32x4_t a) ...@@ -13056,7 +13079,8 @@ vrev64q_u32 (uint32x4_t a)
int64x2_t b_ = (b); \ int64x2_t b_ = (b); \
int32x2_t a_ = (a); \ int32x2_t a_ = (a); \
int32x4_t result = vcombine_s32 \ int32x4_t result = vcombine_s32 \
(a_, vcreate_s32 (UINT64_C (0x0))); \ (a_, vcreate_s32 \
(__AARCH64_UINT64_C (0x0))); \
__asm__ ("rshrn2 %0.4s,%1.2d,#%2" \ __asm__ ("rshrn2 %0.4s,%1.2d,#%2" \
: "+w"(result) \ : "+w"(result) \
: "w"(b_), "i"(c) \ : "w"(b_), "i"(c) \
...@@ -13070,7 +13094,8 @@ vrev64q_u32 (uint32x4_t a) ...@@ -13070,7 +13094,8 @@ vrev64q_u32 (uint32x4_t a)
uint16x8_t b_ = (b); \ uint16x8_t b_ = (b); \
uint8x8_t a_ = (a); \ uint8x8_t a_ = (a); \
uint8x16_t result = vcombine_u8 \ uint8x16_t result = vcombine_u8 \
(a_, vcreate_u8 (UINT64_C (0x0))); \ (a_, vcreate_u8 \
(__AARCH64_UINT64_C (0x0))); \
__asm__ ("rshrn2 %0.16b,%1.8h,#%2" \ __asm__ ("rshrn2 %0.16b,%1.8h,#%2" \
: "+w"(result) \ : "+w"(result) \
: "w"(b_), "i"(c) \ : "w"(b_), "i"(c) \
...@@ -13084,7 +13109,8 @@ vrev64q_u32 (uint32x4_t a) ...@@ -13084,7 +13109,8 @@ vrev64q_u32 (uint32x4_t a)
uint32x4_t b_ = (b); \ uint32x4_t b_ = (b); \
uint16x4_t a_ = (a); \ uint16x4_t a_ = (a); \
uint16x8_t result = vcombine_u16 \ uint16x8_t result = vcombine_u16 \
(a_, vcreate_u16 (UINT64_C (0x0))); \ (a_, vcreate_u16 \
(__AARCH64_UINT64_C (0x0))); \
__asm__ ("rshrn2 %0.8h,%1.4s,#%2" \ __asm__ ("rshrn2 %0.8h,%1.4s,#%2" \
: "+w"(result) \ : "+w"(result) \
: "w"(b_), "i"(c) \ : "w"(b_), "i"(c) \
...@@ -13098,7 +13124,8 @@ vrev64q_u32 (uint32x4_t a) ...@@ -13098,7 +13124,8 @@ vrev64q_u32 (uint32x4_t a)
uint64x2_t b_ = (b); \ uint64x2_t b_ = (b); \
uint32x2_t a_ = (a); \ uint32x2_t a_ = (a); \
uint32x4_t result = vcombine_u32 \ uint32x4_t result = vcombine_u32 \
(a_, vcreate_u32 (UINT64_C (0x0))); \ (a_, vcreate_u32 \
(__AARCH64_UINT64_C (0x0))); \
__asm__ ("rshrn2 %0.4s,%1.2d,#%2" \ __asm__ ("rshrn2 %0.4s,%1.2d,#%2" \
: "+w"(result) \ : "+w"(result) \
: "w"(b_), "i"(c) \ : "w"(b_), "i"(c) \
...@@ -13335,7 +13362,7 @@ vrsrtsq_f64 (float64x2_t a, float64x2_t b) ...@@ -13335,7 +13362,7 @@ vrsrtsq_f64 (float64x2_t a, float64x2_t b)
__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
vrsubhn_high_s16 (int8x8_t a, int16x8_t b, int16x8_t c) vrsubhn_high_s16 (int8x8_t a, int16x8_t b, int16x8_t c)
{ {
int8x16_t result = vcombine_s8 (a, vcreate_s8 (UINT64_C (0x0))); int8x16_t result = vcombine_s8 (a, vcreate_s8 (__AARCH64_UINT64_C (0x0)));
__asm__ ("rsubhn2 %0.16b, %1.8h, %2.8h" __asm__ ("rsubhn2 %0.16b, %1.8h, %2.8h"
: "+w"(result) : "+w"(result)
: "w"(b), "w"(c) : "w"(b), "w"(c)
...@@ -13346,7 +13373,7 @@ vrsubhn_high_s16 (int8x8_t a, int16x8_t b, int16x8_t c) ...@@ -13346,7 +13373,7 @@ vrsubhn_high_s16 (int8x8_t a, int16x8_t b, int16x8_t c)
__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
vrsubhn_high_s32 (int16x4_t a, int32x4_t b, int32x4_t c) vrsubhn_high_s32 (int16x4_t a, int32x4_t b, int32x4_t c)
{ {
int16x8_t result = vcombine_s16 (a, vcreate_s16 (UINT64_C (0x0))); int16x8_t result = vcombine_s16 (a, vcreate_s16 (__AARCH64_UINT64_C (0x0)));
__asm__ ("rsubhn2 %0.8h, %1.4s, %2.4s" __asm__ ("rsubhn2 %0.8h, %1.4s, %2.4s"
: "+w"(result) : "+w"(result)
: "w"(b), "w"(c) : "w"(b), "w"(c)
...@@ -13357,7 +13384,7 @@ vrsubhn_high_s32 (int16x4_t a, int32x4_t b, int32x4_t c) ...@@ -13357,7 +13384,7 @@ vrsubhn_high_s32 (int16x4_t a, int32x4_t b, int32x4_t c)
__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vrsubhn_high_s64 (int32x2_t a, int64x2_t b, int64x2_t c) vrsubhn_high_s64 (int32x2_t a, int64x2_t b, int64x2_t c)
{ {
int32x4_t result = vcombine_s32 (a, vcreate_s32 (UINT64_C (0x0))); int32x4_t result = vcombine_s32 (a, vcreate_s32 (__AARCH64_UINT64_C (0x0)));
__asm__ ("rsubhn2 %0.4s, %1.2d, %2.2d" __asm__ ("rsubhn2 %0.4s, %1.2d, %2.2d"
: "+w"(result) : "+w"(result)
: "w"(b), "w"(c) : "w"(b), "w"(c)
...@@ -13368,7 +13395,7 @@ vrsubhn_high_s64 (int32x2_t a, int64x2_t b, int64x2_t c) ...@@ -13368,7 +13395,7 @@ vrsubhn_high_s64 (int32x2_t a, int64x2_t b, int64x2_t c)
__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
vrsubhn_high_u16 (uint8x8_t a, uint16x8_t b, uint16x8_t c) vrsubhn_high_u16 (uint8x8_t a, uint16x8_t b, uint16x8_t c)
{ {
uint8x16_t result = vcombine_u8 (a, vcreate_u8 (UINT64_C (0x0))); uint8x16_t result = vcombine_u8 (a, vcreate_u8 (__AARCH64_UINT64_C (0x0)));
__asm__ ("rsubhn2 %0.16b, %1.8h, %2.8h" __asm__ ("rsubhn2 %0.16b, %1.8h, %2.8h"
: "+w"(result) : "+w"(result)
: "w"(b), "w"(c) : "w"(b), "w"(c)
...@@ -13379,7 +13406,7 @@ vrsubhn_high_u16 (uint8x8_t a, uint16x8_t b, uint16x8_t c) ...@@ -13379,7 +13406,7 @@ vrsubhn_high_u16 (uint8x8_t a, uint16x8_t b, uint16x8_t c)
__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vrsubhn_high_u32 (uint16x4_t a, uint32x4_t b, uint32x4_t c) vrsubhn_high_u32 (uint16x4_t a, uint32x4_t b, uint32x4_t c)
{ {
uint16x8_t result = vcombine_u16 (a, vcreate_u16 (UINT64_C (0x0))); uint16x8_t result = vcombine_u16 (a, vcreate_u16 (__AARCH64_UINT64_C (0x0)));
__asm__ ("rsubhn2 %0.8h, %1.4s, %2.4s" __asm__ ("rsubhn2 %0.8h, %1.4s, %2.4s"
: "+w"(result) : "+w"(result)
: "w"(b), "w"(c) : "w"(b), "w"(c)
...@@ -13390,7 +13417,7 @@ vrsubhn_high_u32 (uint16x4_t a, uint32x4_t b, uint32x4_t c) ...@@ -13390,7 +13417,7 @@ vrsubhn_high_u32 (uint16x4_t a, uint32x4_t b, uint32x4_t c)
__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vrsubhn_high_u64 (uint32x2_t a, uint64x2_t b, uint64x2_t c) vrsubhn_high_u64 (uint32x2_t a, uint64x2_t b, uint64x2_t c)
{ {
uint32x4_t result = vcombine_u32 (a, vcreate_u32 (UINT64_C (0x0))); uint32x4_t result = vcombine_u32 (a, vcreate_u32 (__AARCH64_UINT64_C (0x0)));
__asm__ ("rsubhn2 %0.4s, %1.2d, %2.2d" __asm__ ("rsubhn2 %0.4s, %1.2d, %2.2d"
: "+w"(result) : "+w"(result)
: "w"(b), "w"(c) : "w"(b), "w"(c)
...@@ -13782,7 +13809,8 @@ vrsubhn_u64 (uint64x2_t a, uint64x2_t b) ...@@ -13782,7 +13809,8 @@ vrsubhn_u64 (uint64x2_t a, uint64x2_t b)
int16x8_t b_ = (b); \ int16x8_t b_ = (b); \
int8x8_t a_ = (a); \ int8x8_t a_ = (a); \
int8x16_t result = vcombine_s8 \ int8x16_t result = vcombine_s8 \
(a_, vcreate_s8 (UINT64_C (0x0))); \ (a_, vcreate_s8 \
(__AARCH64_UINT64_C (0x0))); \
__asm__ ("shrn2 %0.16b,%1.8h,#%2" \ __asm__ ("shrn2 %0.16b,%1.8h,#%2" \
: "+w"(result) \ : "+w"(result) \
: "w"(b_), "i"(c) \ : "w"(b_), "i"(c) \
...@@ -13796,7 +13824,8 @@ vrsubhn_u64 (uint64x2_t a, uint64x2_t b) ...@@ -13796,7 +13824,8 @@ vrsubhn_u64 (uint64x2_t a, uint64x2_t b)
int32x4_t b_ = (b); \ int32x4_t b_ = (b); \
int16x4_t a_ = (a); \ int16x4_t a_ = (a); \
int16x8_t result = vcombine_s16 \ int16x8_t result = vcombine_s16 \
(a_, vcreate_s16 (UINT64_C (0x0))); \ (a_, vcreate_s16 \
(__AARCH64_UINT64_C (0x0))); \
__asm__ ("shrn2 %0.8h,%1.4s,#%2" \ __asm__ ("shrn2 %0.8h,%1.4s,#%2" \
: "+w"(result) \ : "+w"(result) \
: "w"(b_), "i"(c) \ : "w"(b_), "i"(c) \
...@@ -13810,7 +13839,8 @@ vrsubhn_u64 (uint64x2_t a, uint64x2_t b) ...@@ -13810,7 +13839,8 @@ vrsubhn_u64 (uint64x2_t a, uint64x2_t b)
int64x2_t b_ = (b); \ int64x2_t b_ = (b); \
int32x2_t a_ = (a); \ int32x2_t a_ = (a); \
int32x4_t result = vcombine_s32 \ int32x4_t result = vcombine_s32 \
(a_, vcreate_s32 (UINT64_C (0x0))); \ (a_, vcreate_s32 \
(__AARCH64_UINT64_C (0x0))); \
__asm__ ("shrn2 %0.4s,%1.2d,#%2" \ __asm__ ("shrn2 %0.4s,%1.2d,#%2" \
: "+w"(result) \ : "+w"(result) \
: "w"(b_), "i"(c) \ : "w"(b_), "i"(c) \
...@@ -13824,7 +13854,8 @@ vrsubhn_u64 (uint64x2_t a, uint64x2_t b) ...@@ -13824,7 +13854,8 @@ vrsubhn_u64 (uint64x2_t a, uint64x2_t b)
uint16x8_t b_ = (b); \ uint16x8_t b_ = (b); \
uint8x8_t a_ = (a); \ uint8x8_t a_ = (a); \
uint8x16_t result = vcombine_u8 \ uint8x16_t result = vcombine_u8 \
(a_, vcreate_u8 (UINT64_C (0x0))); \ (a_, vcreate_u8 \
(__AARCH64_UINT64_C (0x0))); \
__asm__ ("shrn2 %0.16b,%1.8h,#%2" \ __asm__ ("shrn2 %0.16b,%1.8h,#%2" \
: "+w"(result) \ : "+w"(result) \
: "w"(b_), "i"(c) \ : "w"(b_), "i"(c) \
...@@ -13838,7 +13869,8 @@ vrsubhn_u64 (uint64x2_t a, uint64x2_t b) ...@@ -13838,7 +13869,8 @@ vrsubhn_u64 (uint64x2_t a, uint64x2_t b)
uint32x4_t b_ = (b); \ uint32x4_t b_ = (b); \
uint16x4_t a_ = (a); \ uint16x4_t a_ = (a); \
uint16x8_t result = vcombine_u16 \ uint16x8_t result = vcombine_u16 \
(a_, vcreate_u16 (UINT64_C (0x0))); \ (a_, vcreate_u16 \
(__AARCH64_UINT64_C (0x0))); \
__asm__ ("shrn2 %0.8h,%1.4s,#%2" \ __asm__ ("shrn2 %0.8h,%1.4s,#%2" \
: "+w"(result) \ : "+w"(result) \
: "w"(b_), "i"(c) \ : "w"(b_), "i"(c) \
...@@ -13852,7 +13884,8 @@ vrsubhn_u64 (uint64x2_t a, uint64x2_t b) ...@@ -13852,7 +13884,8 @@ vrsubhn_u64 (uint64x2_t a, uint64x2_t b)
uint64x2_t b_ = (b); \ uint64x2_t b_ = (b); \
uint32x2_t a_ = (a); \ uint32x2_t a_ = (a); \
uint32x4_t result = vcombine_u32 \ uint32x4_t result = vcombine_u32 \
(a_, vcreate_u32 (UINT64_C (0x0))); \ (a_, vcreate_u32 \
(__AARCH64_UINT64_C (0x0))); \
__asm__ ("shrn2 %0.4s,%1.2d,#%2" \ __asm__ ("shrn2 %0.4s,%1.2d,#%2" \
: "+w"(result) \ : "+w"(result) \
: "w"(b_), "i"(c) \ : "w"(b_), "i"(c) \
...@@ -14304,7 +14337,7 @@ vrsubhn_u64 (uint64x2_t a, uint64x2_t b) ...@@ -14304,7 +14337,7 @@ vrsubhn_u64 (uint64x2_t a, uint64x2_t b)
__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
vsubhn_high_s16 (int8x8_t a, int16x8_t b, int16x8_t c) vsubhn_high_s16 (int8x8_t a, int16x8_t b, int16x8_t c)
{ {
int8x16_t result = vcombine_s8 (a, vcreate_s8 (UINT64_C (0x0))); int8x16_t result = vcombine_s8 (a, vcreate_s8 (__AARCH64_UINT64_C (0x0)));
__asm__ ("subhn2 %0.16b, %1.8h, %2.8h" __asm__ ("subhn2 %0.16b, %1.8h, %2.8h"
: "+w"(result) : "+w"(result)
: "w"(b), "w"(c) : "w"(b), "w"(c)
...@@ -14315,7 +14348,7 @@ vsubhn_high_s16 (int8x8_t a, int16x8_t b, int16x8_t c) ...@@ -14315,7 +14348,7 @@ vsubhn_high_s16 (int8x8_t a, int16x8_t b, int16x8_t c)
__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
vsubhn_high_s32 (int16x4_t a, int32x4_t b, int32x4_t c) vsubhn_high_s32 (int16x4_t a, int32x4_t b, int32x4_t c)
{ {
int16x8_t result = vcombine_s16 (a, vcreate_s16 (UINT64_C (0x0))); int16x8_t result = vcombine_s16 (a, vcreate_s16 (__AARCH64_UINT64_C (0x0)));
__asm__ ("subhn2 %0.8h, %1.4s, %2.4s" __asm__ ("subhn2 %0.8h, %1.4s, %2.4s"
: "+w"(result) : "+w"(result)
: "w"(b), "w"(c) : "w"(b), "w"(c)
...@@ -14326,7 +14359,7 @@ vsubhn_high_s32 (int16x4_t a, int32x4_t b, int32x4_t c) ...@@ -14326,7 +14359,7 @@ vsubhn_high_s32 (int16x4_t a, int32x4_t b, int32x4_t c)
__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vsubhn_high_s64 (int32x2_t a, int64x2_t b, int64x2_t c) vsubhn_high_s64 (int32x2_t a, int64x2_t b, int64x2_t c)
{ {
int32x4_t result = vcombine_s32 (a, vcreate_s32 (UINT64_C (0x0))); int32x4_t result = vcombine_s32 (a, vcreate_s32 (__AARCH64_UINT64_C (0x0)));
__asm__ ("subhn2 %0.4s, %1.2d, %2.2d" __asm__ ("subhn2 %0.4s, %1.2d, %2.2d"
: "+w"(result) : "+w"(result)
: "w"(b), "w"(c) : "w"(b), "w"(c)
...@@ -14337,7 +14370,7 @@ vsubhn_high_s64 (int32x2_t a, int64x2_t b, int64x2_t c) ...@@ -14337,7 +14370,7 @@ vsubhn_high_s64 (int32x2_t a, int64x2_t b, int64x2_t c)
__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
vsubhn_high_u16 (uint8x8_t a, uint16x8_t b, uint16x8_t c) vsubhn_high_u16 (uint8x8_t a, uint16x8_t b, uint16x8_t c)
{ {
uint8x16_t result = vcombine_u8 (a, vcreate_u8 (UINT64_C (0x0))); uint8x16_t result = vcombine_u8 (a, vcreate_u8 (__AARCH64_UINT64_C (0x0)));
__asm__ ("subhn2 %0.16b, %1.8h, %2.8h" __asm__ ("subhn2 %0.16b, %1.8h, %2.8h"
: "+w"(result) : "+w"(result)
: "w"(b), "w"(c) : "w"(b), "w"(c)
...@@ -14348,7 +14381,7 @@ vsubhn_high_u16 (uint8x8_t a, uint16x8_t b, uint16x8_t c) ...@@ -14348,7 +14381,7 @@ vsubhn_high_u16 (uint8x8_t a, uint16x8_t b, uint16x8_t c)
__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vsubhn_high_u32 (uint16x4_t a, uint32x4_t b, uint32x4_t c) vsubhn_high_u32 (uint16x4_t a, uint32x4_t b, uint32x4_t c)
{ {
uint16x8_t result = vcombine_u16 (a, vcreate_u16 (UINT64_C (0x0))); uint16x8_t result = vcombine_u16 (a, vcreate_u16 (__AARCH64_UINT64_C (0x0)));
__asm__ ("subhn2 %0.8h, %1.4s, %2.4s" __asm__ ("subhn2 %0.8h, %1.4s, %2.4s"
: "+w"(result) : "+w"(result)
: "w"(b), "w"(c) : "w"(b), "w"(c)
...@@ -14359,7 +14392,7 @@ vsubhn_high_u32 (uint16x4_t a, uint32x4_t b, uint32x4_t c) ...@@ -14359,7 +14392,7 @@ vsubhn_high_u32 (uint16x4_t a, uint32x4_t b, uint32x4_t c)
__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vsubhn_high_u64 (uint32x2_t a, uint64x2_t b, uint64x2_t c) vsubhn_high_u64 (uint32x2_t a, uint64x2_t b, uint64x2_t c)
{ {
uint32x4_t result = vcombine_u32 (a, vcreate_u32 (UINT64_C (0x0))); uint32x4_t result = vcombine_u32 (a, vcreate_u32 (__AARCH64_UINT64_C (0x0)));
__asm__ ("subhn2 %0.4s, %1.2d, %2.2d" __asm__ ("subhn2 %0.4s, %1.2d, %2.2d"
: "+w"(result) : "+w"(result)
: "w"(b), "w"(c) : "w"(b), "w"(c)
...@@ -16957,7 +16990,7 @@ __extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) ...@@ -16957,7 +16990,7 @@ __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
vtbl1_s8 (int8x8_t tab, int8x8_t idx) vtbl1_s8 (int8x8_t tab, int8x8_t idx)
{ {
int8x8_t result; int8x8_t result;
int8x16_t temp = vcombine_s8 (tab, vcreate_s8 (UINT64_C (0x0))); int8x16_t temp = vcombine_s8 (tab, vcreate_s8 (__AARCH64_UINT64_C (0x0)));
__asm__ ("tbl %0.8b, {%1.16b}, %2.8b" __asm__ ("tbl %0.8b, {%1.16b}, %2.8b"
: "=w"(result) : "=w"(result)
: "w"(temp), "w"(idx) : "w"(temp), "w"(idx)
...@@ -16969,7 +17002,7 @@ __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) ...@@ -16969,7 +17002,7 @@ __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vtbl1_u8 (uint8x8_t tab, uint8x8_t idx) vtbl1_u8 (uint8x8_t tab, uint8x8_t idx)
{ {
uint8x8_t result; uint8x8_t result;
uint8x16_t temp = vcombine_u8 (tab, vcreate_u8 (UINT64_C (0x0))); uint8x16_t temp = vcombine_u8 (tab, vcreate_u8 (__AARCH64_UINT64_C (0x0)));
__asm__ ("tbl %0.8b, {%1.16b}, %2.8b" __asm__ ("tbl %0.8b, {%1.16b}, %2.8b"
: "=w"(result) : "=w"(result)
: "w"(temp), "w"(idx) : "w"(temp), "w"(idx)
...@@ -16981,7 +17014,7 @@ __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) ...@@ -16981,7 +17014,7 @@ __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
vtbl1_p8 (poly8x8_t tab, uint8x8_t idx) vtbl1_p8 (poly8x8_t tab, uint8x8_t idx)
{ {
poly8x8_t result; poly8x8_t result;
poly8x16_t temp = vcombine_p8 (tab, vcreate_p8 (UINT64_C (0x0))); poly8x16_t temp = vcombine_p8 (tab, vcreate_p8 (__AARCH64_UINT64_C (0x0)));
__asm__ ("tbl %0.8b, {%1.16b}, %2.8b" __asm__ ("tbl %0.8b, {%1.16b}, %2.8b"
: "=w"(result) : "=w"(result)
: "w"(temp), "w"(idx) : "w"(temp), "w"(idx)
...@@ -17031,7 +17064,7 @@ vtbl3_s8 (int8x8x3_t tab, int8x8_t idx) ...@@ -17031,7 +17064,7 @@ vtbl3_s8 (int8x8x3_t tab, int8x8_t idx)
int8x8_t result; int8x8_t result;
int8x16x2_t temp; int8x16x2_t temp;
temp.val[0] = vcombine_s8 (tab.val[0], tab.val[1]); temp.val[0] = vcombine_s8 (tab.val[0], tab.val[1]);
temp.val[1] = vcombine_s8 (tab.val[2], vcreate_s8 (UINT64_C (0x0))); temp.val[1] = vcombine_s8 (tab.val[2], vcreate_s8 (__AARCH64_UINT64_C (0x0)));
__asm__ ("ld1 {v16.16b - v17.16b }, %1\n\t" __asm__ ("ld1 {v16.16b - v17.16b }, %1\n\t"
"tbl %0.8b, {v16.16b - v17.16b}, %2.8b\n\t" "tbl %0.8b, {v16.16b - v17.16b}, %2.8b\n\t"
: "=w"(result) : "=w"(result)
...@@ -17046,7 +17079,7 @@ vtbl3_u8 (uint8x8x3_t tab, uint8x8_t idx) ...@@ -17046,7 +17079,7 @@ vtbl3_u8 (uint8x8x3_t tab, uint8x8_t idx)
uint8x8_t result; uint8x8_t result;
uint8x16x2_t temp; uint8x16x2_t temp;
temp.val[0] = vcombine_u8 (tab.val[0], tab.val[1]); temp.val[0] = vcombine_u8 (tab.val[0], tab.val[1]);
temp.val[1] = vcombine_u8 (tab.val[2], vcreate_u8 (UINT64_C (0x0))); temp.val[1] = vcombine_u8 (tab.val[2], vcreate_u8 (__AARCH64_UINT64_C (0x0)));
__asm__ ("ld1 {v16.16b - v17.16b }, %1\n\t" __asm__ ("ld1 {v16.16b - v17.16b }, %1\n\t"
"tbl %0.8b, {v16.16b - v17.16b}, %2.8b\n\t" "tbl %0.8b, {v16.16b - v17.16b}, %2.8b\n\t"
: "=w"(result) : "=w"(result)
...@@ -17061,7 +17094,7 @@ vtbl3_p8 (poly8x8x3_t tab, uint8x8_t idx) ...@@ -17061,7 +17094,7 @@ vtbl3_p8 (poly8x8x3_t tab, uint8x8_t idx)
poly8x8_t result; poly8x8_t result;
poly8x16x2_t temp; poly8x16x2_t temp;
temp.val[0] = vcombine_p8 (tab.val[0], tab.val[1]); temp.val[0] = vcombine_p8 (tab.val[0], tab.val[1]);
temp.val[1] = vcombine_p8 (tab.val[2], vcreate_p8 (UINT64_C (0x0))); temp.val[1] = vcombine_p8 (tab.val[2], vcreate_p8 (__AARCH64_UINT64_C (0x0)));
__asm__ ("ld1 {v16.16b - v17.16b }, %1\n\t" __asm__ ("ld1 {v16.16b - v17.16b }, %1\n\t"
"tbl %0.8b, {v16.16b - v17.16b}, %2.8b\n\t" "tbl %0.8b, {v16.16b - v17.16b}, %2.8b\n\t"
: "=w"(result) : "=w"(result)
...@@ -17120,7 +17153,7 @@ vtbx1_s8 (int8x8_t r, int8x8_t tab, int8x8_t idx) ...@@ -17120,7 +17153,7 @@ vtbx1_s8 (int8x8_t r, int8x8_t tab, int8x8_t idx)
{ {
int8x8_t result; int8x8_t result;
int8x8_t tmp1; int8x8_t tmp1;
int8x16_t temp = vcombine_s8 (tab, vcreate_s8 (UINT64_C (0x0))); int8x16_t temp = vcombine_s8 (tab, vcreate_s8 (__AARCH64_UINT64_C (0x0)));
__asm__ ("movi %0.8b, 8\n\t" __asm__ ("movi %0.8b, 8\n\t"
"cmhs %0.8b, %3.8b, %0.8b\n\t" "cmhs %0.8b, %3.8b, %0.8b\n\t"
"tbl %1.8b, {%2.16b}, %3.8b\n\t" "tbl %1.8b, {%2.16b}, %3.8b\n\t"
...@@ -17136,7 +17169,7 @@ vtbx1_u8 (uint8x8_t r, uint8x8_t tab, uint8x8_t idx) ...@@ -17136,7 +17169,7 @@ vtbx1_u8 (uint8x8_t r, uint8x8_t tab, uint8x8_t idx)
{ {
uint8x8_t result; uint8x8_t result;
uint8x8_t tmp1; uint8x8_t tmp1;
uint8x16_t temp = vcombine_u8 (tab, vcreate_u8 (UINT64_C (0x0))); uint8x16_t temp = vcombine_u8 (tab, vcreate_u8 (__AARCH64_UINT64_C (0x0)));
__asm__ ("movi %0.8b, 8\n\t" __asm__ ("movi %0.8b, 8\n\t"
"cmhs %0.8b, %3.8b, %0.8b\n\t" "cmhs %0.8b, %3.8b, %0.8b\n\t"
"tbl %1.8b, {%2.16b}, %3.8b\n\t" "tbl %1.8b, {%2.16b}, %3.8b\n\t"
...@@ -17152,7 +17185,7 @@ vtbx1_p8 (poly8x8_t r, poly8x8_t tab, uint8x8_t idx) ...@@ -17152,7 +17185,7 @@ vtbx1_p8 (poly8x8_t r, poly8x8_t tab, uint8x8_t idx)
{ {
poly8x8_t result; poly8x8_t result;
poly8x8_t tmp1; poly8x8_t tmp1;
poly8x16_t temp = vcombine_p8 (tab, vcreate_p8 (UINT64_C (0x0))); poly8x16_t temp = vcombine_p8 (tab, vcreate_p8 (__AARCH64_UINT64_C (0x0)));
__asm__ ("movi %0.8b, 8\n\t" __asm__ ("movi %0.8b, 8\n\t"
"cmhs %0.8b, %3.8b, %0.8b\n\t" "cmhs %0.8b, %3.8b, %0.8b\n\t"
"tbl %1.8b, {%2.16b}, %3.8b\n\t" "tbl %1.8b, {%2.16b}, %3.8b\n\t"
...@@ -17206,7 +17239,7 @@ vtbx3_s8 (int8x8_t r, int8x8x3_t tab, int8x8_t idx) ...@@ -17206,7 +17239,7 @@ vtbx3_s8 (int8x8_t r, int8x8x3_t tab, int8x8_t idx)
int8x8_t tmp1; int8x8_t tmp1;
int8x16x2_t temp; int8x16x2_t temp;
temp.val[0] = vcombine_s8 (tab.val[0], tab.val[1]); temp.val[0] = vcombine_s8 (tab.val[0], tab.val[1]);
temp.val[1] = vcombine_s8 (tab.val[2], vcreate_s8 (UINT64_C (0x0))); temp.val[1] = vcombine_s8 (tab.val[2], vcreate_s8 (__AARCH64_UINT64_C (0x0)));
__asm__ ("ld1 {v16.16b - v17.16b}, %2\n\t" __asm__ ("ld1 {v16.16b - v17.16b}, %2\n\t"
"movi %0.8b, 24\n\t" "movi %0.8b, 24\n\t"
"cmhs %0.8b, %3.8b, %0.8b\n\t" "cmhs %0.8b, %3.8b, %0.8b\n\t"
...@@ -17225,7 +17258,7 @@ vtbx3_u8 (uint8x8_t r, uint8x8x3_t tab, uint8x8_t idx) ...@@ -17225,7 +17258,7 @@ vtbx3_u8 (uint8x8_t r, uint8x8x3_t tab, uint8x8_t idx)
uint8x8_t tmp1; uint8x8_t tmp1;
uint8x16x2_t temp; uint8x16x2_t temp;
temp.val[0] = vcombine_u8 (tab.val[0], tab.val[1]); temp.val[0] = vcombine_u8 (tab.val[0], tab.val[1]);
temp.val[1] = vcombine_u8 (tab.val[2], vcreate_u8 (UINT64_C (0x0))); temp.val[1] = vcombine_u8 (tab.val[2], vcreate_u8 (__AARCH64_UINT64_C (0x0)));
__asm__ ("ld1 {v16.16b - v17.16b}, %2\n\t" __asm__ ("ld1 {v16.16b - v17.16b}, %2\n\t"
"movi %0.8b, 24\n\t" "movi %0.8b, 24\n\t"
"cmhs %0.8b, %3.8b, %0.8b\n\t" "cmhs %0.8b, %3.8b, %0.8b\n\t"
...@@ -17244,7 +17277,7 @@ vtbx3_p8 (poly8x8_t r, poly8x8x3_t tab, uint8x8_t idx) ...@@ -17244,7 +17277,7 @@ vtbx3_p8 (poly8x8_t r, poly8x8x3_t tab, uint8x8_t idx)
poly8x8_t tmp1; poly8x8_t tmp1;
poly8x16x2_t temp; poly8x16x2_t temp;
temp.val[0] = vcombine_p8 (tab.val[0], tab.val[1]); temp.val[0] = vcombine_p8 (tab.val[0], tab.val[1]);
temp.val[1] = vcombine_p8 (tab.val[2], vcreate_p8 (UINT64_C (0x0))); temp.val[1] = vcombine_p8 (tab.val[2], vcreate_p8 (__AARCH64_UINT64_C (0x0)));
__asm__ ("ld1 {v16.16b - v17.16b}, %2\n\t" __asm__ ("ld1 {v16.16b - v17.16b}, %2\n\t"
"movi %0.8b, 24\n\t" "movi %0.8b, 24\n\t"
"cmhs %0.8b, %3.8b, %0.8b\n\t" "cmhs %0.8b, %3.8b, %0.8b\n\t"
...@@ -21420,7 +21453,7 @@ vqdmlal_high_n_s16 (int32x4_t __a, int16x8_t __b, int16_t __c) ...@@ -21420,7 +21453,7 @@ vqdmlal_high_n_s16 (int32x4_t __a, int16x8_t __b, int16_t __c)
__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vqdmlal_lane_s16 (int32x4_t __a, int16x4_t __b, int16x4_t __c, int const __d) vqdmlal_lane_s16 (int32x4_t __a, int16x4_t __b, int16x4_t __c, int const __d)
{ {
int16x8_t __tmp = vcombine_s16 (__c, vcreate_s16 (INT64_C (0))); int16x8_t __tmp = vcombine_s16 (__c, vcreate_s16 (__AARCH64_INT64_C (0)));
return __builtin_aarch64_sqdmlal_lanev4hi (__a, __b, __tmp, __d); return __builtin_aarch64_sqdmlal_lanev4hi (__a, __b, __tmp, __d);
} }
...@@ -21471,7 +21504,7 @@ vqdmlal_high_n_s32 (int64x2_t __a, int32x4_t __b, int32_t __c) ...@@ -21471,7 +21504,7 @@ vqdmlal_high_n_s32 (int64x2_t __a, int32x4_t __b, int32_t __c)
__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
vqdmlal_lane_s32 (int64x2_t __a, int32x2_t __b, int32x2_t __c, int const __d) vqdmlal_lane_s32 (int64x2_t __a, int32x2_t __b, int32x2_t __c, int const __d)
{ {
int32x4_t __tmp = vcombine_s32 (__c, vcreate_s32 (INT64_C (0))); int32x4_t __tmp = vcombine_s32 (__c, vcreate_s32 (__AARCH64_INT64_C (0)));
return __builtin_aarch64_sqdmlal_lanev2si (__a, __b, __tmp, __d); return __builtin_aarch64_sqdmlal_lanev2si (__a, __b, __tmp, __d);
} }
...@@ -21548,7 +21581,7 @@ vqdmlsl_high_n_s16 (int32x4_t __a, int16x8_t __b, int16_t __c) ...@@ -21548,7 +21581,7 @@ vqdmlsl_high_n_s16 (int32x4_t __a, int16x8_t __b, int16_t __c)
__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vqdmlsl_lane_s16 (int32x4_t __a, int16x4_t __b, int16x4_t __c, int const __d) vqdmlsl_lane_s16 (int32x4_t __a, int16x4_t __b, int16x4_t __c, int const __d)
{ {
int16x8_t __tmp = vcombine_s16 (__c, vcreate_s16 (INT64_C (0))); int16x8_t __tmp = vcombine_s16 (__c, vcreate_s16 (__AARCH64_INT64_C (0)));
return __builtin_aarch64_sqdmlsl_lanev4hi (__a, __b, __tmp, __d); return __builtin_aarch64_sqdmlsl_lanev4hi (__a, __b, __tmp, __d);
} }
...@@ -21599,7 +21632,7 @@ vqdmlsl_high_n_s32 (int64x2_t __a, int32x4_t __b, int32_t __c) ...@@ -21599,7 +21632,7 @@ vqdmlsl_high_n_s32 (int64x2_t __a, int32x4_t __b, int32_t __c)
__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
vqdmlsl_lane_s32 (int64x2_t __a, int32x2_t __b, int32x2_t __c, int const __d) vqdmlsl_lane_s32 (int64x2_t __a, int32x2_t __b, int32x2_t __c, int const __d)
{ {
int32x4_t __tmp = vcombine_s32 (__c, vcreate_s32 (INT64_C (0))); int32x4_t __tmp = vcombine_s32 (__c, vcreate_s32 (__AARCH64_INT64_C (0)));
return __builtin_aarch64_sqdmlsl_lanev2si (__a, __b, __tmp, __d); return __builtin_aarch64_sqdmlsl_lanev2si (__a, __b, __tmp, __d);
} }
...@@ -21724,7 +21757,7 @@ vqdmull_high_n_s16 (int16x8_t __a, int16_t __b) ...@@ -21724,7 +21757,7 @@ vqdmull_high_n_s16 (int16x8_t __a, int16_t __b)
__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vqdmull_lane_s16 (int16x4_t __a, int16x4_t __b, int const __c) vqdmull_lane_s16 (int16x4_t __a, int16x4_t __b, int const __c)
{ {
int16x8_t __tmp = vcombine_s16 (__b, vcreate_s16 (INT64_C (0))); int16x8_t __tmp = vcombine_s16 (__b, vcreate_s16 (__AARCH64_INT64_C (0)));
return __builtin_aarch64_sqdmull_lanev4hi (__a, __tmp, __c); return __builtin_aarch64_sqdmull_lanev4hi (__a, __tmp, __c);
} }
...@@ -21773,7 +21806,7 @@ vqdmull_high_n_s32 (int32x4_t __a, int32_t __b) ...@@ -21773,7 +21806,7 @@ vqdmull_high_n_s32 (int32x4_t __a, int32_t __b)
__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
vqdmull_lane_s32 (int32x2_t __a, int32x2_t __b, int const __c) vqdmull_lane_s32 (int32x2_t __a, int32x2_t __b, int const __c)
{ {
int32x4_t __tmp = vcombine_s32 (__b, vcreate_s32 (INT64_C (0))); int32x4_t __tmp = vcombine_s32 (__b, vcreate_s32 (__AARCH64_INT64_C (0)));
return __builtin_aarch64_sqdmull_lanev2si (__a, __tmp, __c); return __builtin_aarch64_sqdmull_lanev2si (__a, __tmp, __c);
} }
...@@ -24360,8 +24393,8 @@ vst2_s64 (int64_t * __a, int64x1x2_t val) ...@@ -24360,8 +24393,8 @@ vst2_s64 (int64_t * __a, int64x1x2_t val)
{ {
__builtin_aarch64_simd_oi __o; __builtin_aarch64_simd_oi __o;
int64x2x2_t temp; int64x2x2_t temp;
temp.val[0] = vcombine_s64 (val.val[0], vcreate_s64 (INT64_C (0))); temp.val[0] = vcombine_s64 (val.val[0], vcreate_s64 (__AARCH64_INT64_C (0)));
temp.val[1] = vcombine_s64 (val.val[1], vcreate_s64 (INT64_C (0))); temp.val[1] = vcombine_s64 (val.val[1], vcreate_s64 (__AARCH64_INT64_C (0)));
__o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) temp.val[0], 0); __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) temp.val[0], 0);
__o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) temp.val[1], 1); __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) temp.val[1], 1);
__builtin_aarch64_st2di ((__builtin_aarch64_simd_di *) __a, __o); __builtin_aarch64_st2di ((__builtin_aarch64_simd_di *) __a, __o);
...@@ -24372,8 +24405,8 @@ vst2_u64 (uint64_t * __a, uint64x1x2_t val) ...@@ -24372,8 +24405,8 @@ vst2_u64 (uint64_t * __a, uint64x1x2_t val)
{ {
__builtin_aarch64_simd_oi __o; __builtin_aarch64_simd_oi __o;
uint64x2x2_t temp; uint64x2x2_t temp;
temp.val[0] = vcombine_u64 (val.val[0], vcreate_u64 (UINT64_C (0))); temp.val[0] = vcombine_u64 (val.val[0], vcreate_u64 (__AARCH64_UINT64_C (0)));
temp.val[1] = vcombine_u64 (val.val[1], vcreate_u64 (UINT64_C (0))); temp.val[1] = vcombine_u64 (val.val[1], vcreate_u64 (__AARCH64_UINT64_C (0)));
__o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) temp.val[0], 0); __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) temp.val[0], 0);
__o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) temp.val[1], 1); __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) temp.val[1], 1);
__builtin_aarch64_st2di ((__builtin_aarch64_simd_di *) __a, __o); __builtin_aarch64_st2di ((__builtin_aarch64_simd_di *) __a, __o);
...@@ -24384,8 +24417,8 @@ vst2_f64 (float64_t * __a, float64x1x2_t val) ...@@ -24384,8 +24417,8 @@ vst2_f64 (float64_t * __a, float64x1x2_t val)
{ {
__builtin_aarch64_simd_oi __o; __builtin_aarch64_simd_oi __o;
float64x2x2_t temp; float64x2x2_t temp;
temp.val[0] = vcombine_f64 (val.val[0], vcreate_f64 (UINT64_C (0))); temp.val[0] = vcombine_f64 (val.val[0], vcreate_f64 (__AARCH64_UINT64_C (0)));
temp.val[1] = vcombine_f64 (val.val[1], vcreate_f64 (UINT64_C (0))); temp.val[1] = vcombine_f64 (val.val[1], vcreate_f64 (__AARCH64_UINT64_C (0)));
__o = __builtin_aarch64_set_qregoiv2df (__o, (float64x2_t) temp.val[0], 0); __o = __builtin_aarch64_set_qregoiv2df (__o, (float64x2_t) temp.val[0], 0);
__o = __builtin_aarch64_set_qregoiv2df (__o, (float64x2_t) temp.val[1], 1); __o = __builtin_aarch64_set_qregoiv2df (__o, (float64x2_t) temp.val[1], 1);
__builtin_aarch64_st2df ((__builtin_aarch64_simd_df *) __a, __o); __builtin_aarch64_st2df ((__builtin_aarch64_simd_df *) __a, __o);
...@@ -24396,8 +24429,8 @@ vst2_s8 (int8_t * __a, int8x8x2_t val) ...@@ -24396,8 +24429,8 @@ vst2_s8 (int8_t * __a, int8x8x2_t val)
{ {
__builtin_aarch64_simd_oi __o; __builtin_aarch64_simd_oi __o;
int8x16x2_t temp; int8x16x2_t temp;
temp.val[0] = vcombine_s8 (val.val[0], vcreate_s8 (INT64_C (0))); temp.val[0] = vcombine_s8 (val.val[0], vcreate_s8 (__AARCH64_INT64_C (0)));
temp.val[1] = vcombine_s8 (val.val[1], vcreate_s8 (INT64_C (0))); temp.val[1] = vcombine_s8 (val.val[1], vcreate_s8 (__AARCH64_INT64_C (0)));
__o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) temp.val[0], 0); __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) temp.val[0], 0);
__o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) temp.val[1], 1); __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) temp.val[1], 1);
__builtin_aarch64_st2v8qi ((__builtin_aarch64_simd_qi *) __a, __o); __builtin_aarch64_st2v8qi ((__builtin_aarch64_simd_qi *) __a, __o);
...@@ -24408,8 +24441,8 @@ vst2_p8 (poly8_t * __a, poly8x8x2_t val) ...@@ -24408,8 +24441,8 @@ vst2_p8 (poly8_t * __a, poly8x8x2_t val)
{ {
__builtin_aarch64_simd_oi __o; __builtin_aarch64_simd_oi __o;
poly8x16x2_t temp; poly8x16x2_t temp;
temp.val[0] = vcombine_p8 (val.val[0], vcreate_p8 (UINT64_C (0))); temp.val[0] = vcombine_p8 (val.val[0], vcreate_p8 (__AARCH64_UINT64_C (0)));
temp.val[1] = vcombine_p8 (val.val[1], vcreate_p8 (UINT64_C (0))); temp.val[1] = vcombine_p8 (val.val[1], vcreate_p8 (__AARCH64_UINT64_C (0)));
__o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) temp.val[0], 0); __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) temp.val[0], 0);
__o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) temp.val[1], 1); __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) temp.val[1], 1);
__builtin_aarch64_st2v8qi ((__builtin_aarch64_simd_qi *) __a, __o); __builtin_aarch64_st2v8qi ((__builtin_aarch64_simd_qi *) __a, __o);
...@@ -24420,8 +24453,8 @@ vst2_s16 (int16_t * __a, int16x4x2_t val) ...@@ -24420,8 +24453,8 @@ vst2_s16 (int16_t * __a, int16x4x2_t val)
{ {
__builtin_aarch64_simd_oi __o; __builtin_aarch64_simd_oi __o;
int16x8x2_t temp; int16x8x2_t temp;
temp.val[0] = vcombine_s16 (val.val[0], vcreate_s16 (INT64_C (0))); temp.val[0] = vcombine_s16 (val.val[0], vcreate_s16 (__AARCH64_INT64_C (0)));
temp.val[1] = vcombine_s16 (val.val[1], vcreate_s16 (INT64_C (0))); temp.val[1] = vcombine_s16 (val.val[1], vcreate_s16 (__AARCH64_INT64_C (0)));
__o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) temp.val[0], 0); __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) temp.val[0], 0);
__o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) temp.val[1], 1); __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) temp.val[1], 1);
__builtin_aarch64_st2v4hi ((__builtin_aarch64_simd_hi *) __a, __o); __builtin_aarch64_st2v4hi ((__builtin_aarch64_simd_hi *) __a, __o);
...@@ -24432,8 +24465,8 @@ vst2_p16 (poly16_t * __a, poly16x4x2_t val) ...@@ -24432,8 +24465,8 @@ vst2_p16 (poly16_t * __a, poly16x4x2_t val)
{ {
__builtin_aarch64_simd_oi __o; __builtin_aarch64_simd_oi __o;
poly16x8x2_t temp; poly16x8x2_t temp;
temp.val[0] = vcombine_p16 (val.val[0], vcreate_p16 (UINT64_C (0))); temp.val[0] = vcombine_p16 (val.val[0], vcreate_p16 (__AARCH64_UINT64_C (0)));
temp.val[1] = vcombine_p16 (val.val[1], vcreate_p16 (UINT64_C (0))); temp.val[1] = vcombine_p16 (val.val[1], vcreate_p16 (__AARCH64_UINT64_C (0)));
__o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) temp.val[0], 0); __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) temp.val[0], 0);
__o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) temp.val[1], 1); __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) temp.val[1], 1);
__builtin_aarch64_st2v4hi ((__builtin_aarch64_simd_hi *) __a, __o); __builtin_aarch64_st2v4hi ((__builtin_aarch64_simd_hi *) __a, __o);
...@@ -24444,8 +24477,8 @@ vst2_s32 (int32_t * __a, int32x2x2_t val) ...@@ -24444,8 +24477,8 @@ vst2_s32 (int32_t * __a, int32x2x2_t val)
{ {
__builtin_aarch64_simd_oi __o; __builtin_aarch64_simd_oi __o;
int32x4x2_t temp; int32x4x2_t temp;
temp.val[0] = vcombine_s32 (val.val[0], vcreate_s32 (INT64_C (0))); temp.val[0] = vcombine_s32 (val.val[0], vcreate_s32 (__AARCH64_INT64_C (0)));
temp.val[1] = vcombine_s32 (val.val[1], vcreate_s32 (INT64_C (0))); temp.val[1] = vcombine_s32 (val.val[1], vcreate_s32 (__AARCH64_INT64_C (0)));
__o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) temp.val[0], 0); __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) temp.val[0], 0);
__o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) temp.val[1], 1); __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) temp.val[1], 1);
__builtin_aarch64_st2v2si ((__builtin_aarch64_simd_si *) __a, __o); __builtin_aarch64_st2v2si ((__builtin_aarch64_simd_si *) __a, __o);
...@@ -24456,8 +24489,8 @@ vst2_u8 (uint8_t * __a, uint8x8x2_t val) ...@@ -24456,8 +24489,8 @@ vst2_u8 (uint8_t * __a, uint8x8x2_t val)
{ {
__builtin_aarch64_simd_oi __o; __builtin_aarch64_simd_oi __o;
uint8x16x2_t temp; uint8x16x2_t temp;
temp.val[0] = vcombine_u8 (val.val[0], vcreate_u8 (UINT64_C (0))); temp.val[0] = vcombine_u8 (val.val[0], vcreate_u8 (__AARCH64_UINT64_C (0)));
temp.val[1] = vcombine_u8 (val.val[1], vcreate_u8 (UINT64_C (0))); temp.val[1] = vcombine_u8 (val.val[1], vcreate_u8 (__AARCH64_UINT64_C (0)));
__o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) temp.val[0], 0); __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) temp.val[0], 0);
__o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) temp.val[1], 1); __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) temp.val[1], 1);
__builtin_aarch64_st2v8qi ((__builtin_aarch64_simd_qi *) __a, __o); __builtin_aarch64_st2v8qi ((__builtin_aarch64_simd_qi *) __a, __o);
...@@ -24468,8 +24501,8 @@ vst2_u16 (uint16_t * __a, uint16x4x2_t val) ...@@ -24468,8 +24501,8 @@ vst2_u16 (uint16_t * __a, uint16x4x2_t val)
{ {
__builtin_aarch64_simd_oi __o; __builtin_aarch64_simd_oi __o;
uint16x8x2_t temp; uint16x8x2_t temp;
temp.val[0] = vcombine_u16 (val.val[0], vcreate_u16 (UINT64_C (0))); temp.val[0] = vcombine_u16 (val.val[0], vcreate_u16 (__AARCH64_UINT64_C (0)));
temp.val[1] = vcombine_u16 (val.val[1], vcreate_u16 (UINT64_C (0))); temp.val[1] = vcombine_u16 (val.val[1], vcreate_u16 (__AARCH64_UINT64_C (0)));
__o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) temp.val[0], 0); __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) temp.val[0], 0);
__o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) temp.val[1], 1); __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) temp.val[1], 1);
__builtin_aarch64_st2v4hi ((__builtin_aarch64_simd_hi *) __a, __o); __builtin_aarch64_st2v4hi ((__builtin_aarch64_simd_hi *) __a, __o);
...@@ -24480,8 +24513,8 @@ vst2_u32 (uint32_t * __a, uint32x2x2_t val) ...@@ -24480,8 +24513,8 @@ vst2_u32 (uint32_t * __a, uint32x2x2_t val)
{ {
__builtin_aarch64_simd_oi __o; __builtin_aarch64_simd_oi __o;
uint32x4x2_t temp; uint32x4x2_t temp;
temp.val[0] = vcombine_u32 (val.val[0], vcreate_u32 (UINT64_C (0))); temp.val[0] = vcombine_u32 (val.val[0], vcreate_u32 (__AARCH64_UINT64_C (0)));
temp.val[1] = vcombine_u32 (val.val[1], vcreate_u32 (UINT64_C (0))); temp.val[1] = vcombine_u32 (val.val[1], vcreate_u32 (__AARCH64_UINT64_C (0)));
__o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) temp.val[0], 0); __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) temp.val[0], 0);
__o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) temp.val[1], 1); __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) temp.val[1], 1);
__builtin_aarch64_st2v2si ((__builtin_aarch64_simd_si *) __a, __o); __builtin_aarch64_st2v2si ((__builtin_aarch64_simd_si *) __a, __o);
...@@ -24492,8 +24525,8 @@ vst2_f32 (float32_t * __a, float32x2x2_t val) ...@@ -24492,8 +24525,8 @@ vst2_f32 (float32_t * __a, float32x2x2_t val)
{ {
__builtin_aarch64_simd_oi __o; __builtin_aarch64_simd_oi __o;
float32x4x2_t temp; float32x4x2_t temp;
temp.val[0] = vcombine_f32 (val.val[0], vcreate_f32 (UINT64_C (0))); temp.val[0] = vcombine_f32 (val.val[0], vcreate_f32 (__AARCH64_UINT64_C (0)));
temp.val[1] = vcombine_f32 (val.val[1], vcreate_f32 (UINT64_C (0))); temp.val[1] = vcombine_f32 (val.val[1], vcreate_f32 (__AARCH64_UINT64_C (0)));
__o = __builtin_aarch64_set_qregoiv4sf (__o, (float32x4_t) temp.val[0], 0); __o = __builtin_aarch64_set_qregoiv4sf (__o, (float32x4_t) temp.val[0], 0);
__o = __builtin_aarch64_set_qregoiv4sf (__o, (float32x4_t) temp.val[1], 1); __o = __builtin_aarch64_set_qregoiv4sf (__o, (float32x4_t) temp.val[1], 1);
__builtin_aarch64_st2v2sf ((__builtin_aarch64_simd_sf *) __a, __o); __builtin_aarch64_st2v2sf ((__builtin_aarch64_simd_sf *) __a, __o);
...@@ -24612,9 +24645,9 @@ vst3_s64 (int64_t * __a, int64x1x3_t val) ...@@ -24612,9 +24645,9 @@ vst3_s64 (int64_t * __a, int64x1x3_t val)
{ {
__builtin_aarch64_simd_ci __o; __builtin_aarch64_simd_ci __o;
int64x2x3_t temp; int64x2x3_t temp;
temp.val[0] = vcombine_s64 (val.val[0], vcreate_s64 (INT64_C (0))); temp.val[0] = vcombine_s64 (val.val[0], vcreate_s64 (__AARCH64_INT64_C (0)));
temp.val[1] = vcombine_s64 (val.val[1], vcreate_s64 (INT64_C (0))); temp.val[1] = vcombine_s64 (val.val[1], vcreate_s64 (__AARCH64_INT64_C (0)));
temp.val[2] = vcombine_s64 (val.val[2], vcreate_s64 (INT64_C (0))); temp.val[2] = vcombine_s64 (val.val[2], vcreate_s64 (__AARCH64_INT64_C (0)));
__o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) temp.val[0], 0); __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) temp.val[0], 0);
__o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) temp.val[1], 1); __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) temp.val[1], 1);
__o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) temp.val[2], 2); __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) temp.val[2], 2);
...@@ -24626,9 +24659,9 @@ vst3_u64 (uint64_t * __a, uint64x1x3_t val) ...@@ -24626,9 +24659,9 @@ vst3_u64 (uint64_t * __a, uint64x1x3_t val)
{ {
__builtin_aarch64_simd_ci __o; __builtin_aarch64_simd_ci __o;
uint64x2x3_t temp; uint64x2x3_t temp;
temp.val[0] = vcombine_u64 (val.val[0], vcreate_u64 (UINT64_C (0))); temp.val[0] = vcombine_u64 (val.val[0], vcreate_u64 (__AARCH64_UINT64_C (0)));
temp.val[1] = vcombine_u64 (val.val[1], vcreate_u64 (UINT64_C (0))); temp.val[1] = vcombine_u64 (val.val[1], vcreate_u64 (__AARCH64_UINT64_C (0)));
temp.val[2] = vcombine_u64 (val.val[2], vcreate_u64 (UINT64_C (0))); temp.val[2] = vcombine_u64 (val.val[2], vcreate_u64 (__AARCH64_UINT64_C (0)));
__o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) temp.val[0], 0); __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) temp.val[0], 0);
__o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) temp.val[1], 1); __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) temp.val[1], 1);
__o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) temp.val[2], 2); __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) temp.val[2], 2);
...@@ -24640,9 +24673,9 @@ vst3_f64 (float64_t * __a, float64x1x3_t val) ...@@ -24640,9 +24673,9 @@ vst3_f64 (float64_t * __a, float64x1x3_t val)
{ {
__builtin_aarch64_simd_ci __o; __builtin_aarch64_simd_ci __o;
float64x2x3_t temp; float64x2x3_t temp;
temp.val[0] = vcombine_f64 (val.val[0], vcreate_f64 (UINT64_C (0))); temp.val[0] = vcombine_f64 (val.val[0], vcreate_f64 (__AARCH64_UINT64_C (0)));
temp.val[1] = vcombine_f64 (val.val[1], vcreate_f64 (UINT64_C (0))); temp.val[1] = vcombine_f64 (val.val[1], vcreate_f64 (__AARCH64_UINT64_C (0)));
temp.val[2] = vcombine_f64 (val.val[2], vcreate_f64 (UINT64_C (0))); temp.val[2] = vcombine_f64 (val.val[2], vcreate_f64 (__AARCH64_UINT64_C (0)));
__o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) temp.val[0], 0); __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) temp.val[0], 0);
__o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) temp.val[1], 1); __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) temp.val[1], 1);
__o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) temp.val[2], 2); __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) temp.val[2], 2);
...@@ -24654,9 +24687,9 @@ vst3_s8 (int8_t * __a, int8x8x3_t val) ...@@ -24654,9 +24687,9 @@ vst3_s8 (int8_t * __a, int8x8x3_t val)
{ {
__builtin_aarch64_simd_ci __o; __builtin_aarch64_simd_ci __o;
int8x16x3_t temp; int8x16x3_t temp;
temp.val[0] = vcombine_s8 (val.val[0], vcreate_s8 (INT64_C (0))); temp.val[0] = vcombine_s8 (val.val[0], vcreate_s8 (__AARCH64_INT64_C (0)));
temp.val[1] = vcombine_s8 (val.val[1], vcreate_s8 (INT64_C (0))); temp.val[1] = vcombine_s8 (val.val[1], vcreate_s8 (__AARCH64_INT64_C (0)));
temp.val[2] = vcombine_s8 (val.val[2], vcreate_s8 (INT64_C (0))); temp.val[2] = vcombine_s8 (val.val[2], vcreate_s8 (__AARCH64_INT64_C (0)));
__o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[0], 0); __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[0], 0);
__o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[1], 1); __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[1], 1);
__o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[2], 2); __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[2], 2);
...@@ -24668,9 +24701,9 @@ vst3_p8 (poly8_t * __a, poly8x8x3_t val) ...@@ -24668,9 +24701,9 @@ vst3_p8 (poly8_t * __a, poly8x8x3_t val)
{ {
__builtin_aarch64_simd_ci __o; __builtin_aarch64_simd_ci __o;
poly8x16x3_t temp; poly8x16x3_t temp;
temp.val[0] = vcombine_p8 (val.val[0], vcreate_p8 (UINT64_C (0))); temp.val[0] = vcombine_p8 (val.val[0], vcreate_p8 (__AARCH64_UINT64_C (0)));
temp.val[1] = vcombine_p8 (val.val[1], vcreate_p8 (UINT64_C (0))); temp.val[1] = vcombine_p8 (val.val[1], vcreate_p8 (__AARCH64_UINT64_C (0)));
temp.val[2] = vcombine_p8 (val.val[2], vcreate_p8 (UINT64_C (0))); temp.val[2] = vcombine_p8 (val.val[2], vcreate_p8 (__AARCH64_UINT64_C (0)));
__o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[0], 0); __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[0], 0);
__o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[1], 1); __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[1], 1);
__o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[2], 2); __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[2], 2);
...@@ -24682,9 +24715,9 @@ vst3_s16 (int16_t * __a, int16x4x3_t val) ...@@ -24682,9 +24715,9 @@ vst3_s16 (int16_t * __a, int16x4x3_t val)
{ {
__builtin_aarch64_simd_ci __o; __builtin_aarch64_simd_ci __o;
int16x8x3_t temp; int16x8x3_t temp;
temp.val[0] = vcombine_s16 (val.val[0], vcreate_s16 (INT64_C (0))); temp.val[0] = vcombine_s16 (val.val[0], vcreate_s16 (__AARCH64_INT64_C (0)));
temp.val[1] = vcombine_s16 (val.val[1], vcreate_s16 (INT64_C (0))); temp.val[1] = vcombine_s16 (val.val[1], vcreate_s16 (__AARCH64_INT64_C (0)));
temp.val[2] = vcombine_s16 (val.val[2], vcreate_s16 (INT64_C (0))); temp.val[2] = vcombine_s16 (val.val[2], vcreate_s16 (__AARCH64_INT64_C (0)));
__o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[0], 0); __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[0], 0);
__o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[1], 1); __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[1], 1);
__o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[2], 2); __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[2], 2);
...@@ -24696,9 +24729,9 @@ vst3_p16 (poly16_t * __a, poly16x4x3_t val) ...@@ -24696,9 +24729,9 @@ vst3_p16 (poly16_t * __a, poly16x4x3_t val)
{ {
__builtin_aarch64_simd_ci __o; __builtin_aarch64_simd_ci __o;
poly16x8x3_t temp; poly16x8x3_t temp;
temp.val[0] = vcombine_p16 (val.val[0], vcreate_p16 (UINT64_C (0))); temp.val[0] = vcombine_p16 (val.val[0], vcreate_p16 (__AARCH64_UINT64_C (0)));
temp.val[1] = vcombine_p16 (val.val[1], vcreate_p16 (UINT64_C (0))); temp.val[1] = vcombine_p16 (val.val[1], vcreate_p16 (__AARCH64_UINT64_C (0)));
temp.val[2] = vcombine_p16 (val.val[2], vcreate_p16 (UINT64_C (0))); temp.val[2] = vcombine_p16 (val.val[2], vcreate_p16 (__AARCH64_UINT64_C (0)));
__o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[0], 0); __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[0], 0);
__o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[1], 1); __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[1], 1);
__o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[2], 2); __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[2], 2);
...@@ -24710,9 +24743,9 @@ vst3_s32 (int32_t * __a, int32x2x3_t val) ...@@ -24710,9 +24743,9 @@ vst3_s32 (int32_t * __a, int32x2x3_t val)
{ {
__builtin_aarch64_simd_ci __o; __builtin_aarch64_simd_ci __o;
int32x4x3_t temp; int32x4x3_t temp;
temp.val[0] = vcombine_s32 (val.val[0], vcreate_s32 (INT64_C (0))); temp.val[0] = vcombine_s32 (val.val[0], vcreate_s32 (__AARCH64_INT64_C (0)));
temp.val[1] = vcombine_s32 (val.val[1], vcreate_s32 (INT64_C (0))); temp.val[1] = vcombine_s32 (val.val[1], vcreate_s32 (__AARCH64_INT64_C (0)));
temp.val[2] = vcombine_s32 (val.val[2], vcreate_s32 (INT64_C (0))); temp.val[2] = vcombine_s32 (val.val[2], vcreate_s32 (__AARCH64_INT64_C (0)));
__o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) temp.val[0], 0); __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) temp.val[0], 0);
__o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) temp.val[1], 1); __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) temp.val[1], 1);
__o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) temp.val[2], 2); __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) temp.val[2], 2);
...@@ -24724,9 +24757,9 @@ vst3_u8 (uint8_t * __a, uint8x8x3_t val) ...@@ -24724,9 +24757,9 @@ vst3_u8 (uint8_t * __a, uint8x8x3_t val)
{ {
__builtin_aarch64_simd_ci __o; __builtin_aarch64_simd_ci __o;
uint8x16x3_t temp; uint8x16x3_t temp;
temp.val[0] = vcombine_u8 (val.val[0], vcreate_u8 (UINT64_C (0))); temp.val[0] = vcombine_u8 (val.val[0], vcreate_u8 (__AARCH64_UINT64_C (0)));
temp.val[1] = vcombine_u8 (val.val[1], vcreate_u8 (UINT64_C (0))); temp.val[1] = vcombine_u8 (val.val[1], vcreate_u8 (__AARCH64_UINT64_C (0)));
temp.val[2] = vcombine_u8 (val.val[2], vcreate_u8 (UINT64_C (0))); temp.val[2] = vcombine_u8 (val.val[2], vcreate_u8 (__AARCH64_UINT64_C (0)));
__o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[0], 0); __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[0], 0);
__o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[1], 1); __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[1], 1);
__o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[2], 2); __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[2], 2);
...@@ -24738,9 +24771,9 @@ vst3_u16 (uint16_t * __a, uint16x4x3_t val) ...@@ -24738,9 +24771,9 @@ vst3_u16 (uint16_t * __a, uint16x4x3_t val)
{ {
__builtin_aarch64_simd_ci __o; __builtin_aarch64_simd_ci __o;
uint16x8x3_t temp; uint16x8x3_t temp;
temp.val[0] = vcombine_u16 (val.val[0], vcreate_u16 (UINT64_C (0))); temp.val[0] = vcombine_u16 (val.val[0], vcreate_u16 (__AARCH64_UINT64_C (0)));
temp.val[1] = vcombine_u16 (val.val[1], vcreate_u16 (UINT64_C (0))); temp.val[1] = vcombine_u16 (val.val[1], vcreate_u16 (__AARCH64_UINT64_C (0)));
temp.val[2] = vcombine_u16 (val.val[2], vcreate_u16 (UINT64_C (0))); temp.val[2] = vcombine_u16 (val.val[2], vcreate_u16 (__AARCH64_UINT64_C (0)));
__o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[0], 0); __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[0], 0);
__o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[1], 1); __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[1], 1);
__o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[2], 2); __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[2], 2);
...@@ -24752,9 +24785,9 @@ vst3_u32 (uint32_t * __a, uint32x2x3_t val) ...@@ -24752,9 +24785,9 @@ vst3_u32 (uint32_t * __a, uint32x2x3_t val)
{ {
__builtin_aarch64_simd_ci __o; __builtin_aarch64_simd_ci __o;
uint32x4x3_t temp; uint32x4x3_t temp;
temp.val[0] = vcombine_u32 (val.val[0], vcreate_u32 (UINT64_C (0))); temp.val[0] = vcombine_u32 (val.val[0], vcreate_u32 (__AARCH64_UINT64_C (0)));
temp.val[1] = vcombine_u32 (val.val[1], vcreate_u32 (UINT64_C (0))); temp.val[1] = vcombine_u32 (val.val[1], vcreate_u32 (__AARCH64_UINT64_C (0)));
temp.val[2] = vcombine_u32 (val.val[2], vcreate_u32 (UINT64_C (0))); temp.val[2] = vcombine_u32 (val.val[2], vcreate_u32 (__AARCH64_UINT64_C (0)));
__o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) temp.val[0], 0); __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) temp.val[0], 0);
__o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) temp.val[1], 1); __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) temp.val[1], 1);
__o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) temp.val[2], 2); __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) temp.val[2], 2);
...@@ -24766,9 +24799,9 @@ vst3_f32 (float32_t * __a, float32x2x3_t val) ...@@ -24766,9 +24799,9 @@ vst3_f32 (float32_t * __a, float32x2x3_t val)
{ {
__builtin_aarch64_simd_ci __o; __builtin_aarch64_simd_ci __o;
float32x4x3_t temp; float32x4x3_t temp;
temp.val[0] = vcombine_f32 (val.val[0], vcreate_f32 (UINT64_C (0))); temp.val[0] = vcombine_f32 (val.val[0], vcreate_f32 (__AARCH64_UINT64_C (0)));
temp.val[1] = vcombine_f32 (val.val[1], vcreate_f32 (UINT64_C (0))); temp.val[1] = vcombine_f32 (val.val[1], vcreate_f32 (__AARCH64_UINT64_C (0)));
temp.val[2] = vcombine_f32 (val.val[2], vcreate_f32 (UINT64_C (0))); temp.val[2] = vcombine_f32 (val.val[2], vcreate_f32 (__AARCH64_UINT64_C (0)));
__o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) temp.val[0], 0); __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) temp.val[0], 0);
__o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) temp.val[1], 1); __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) temp.val[1], 1);
__o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) temp.val[2], 2); __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) temp.val[2], 2);
...@@ -24900,10 +24933,10 @@ vst4_s64 (int64_t * __a, int64x1x4_t val) ...@@ -24900,10 +24933,10 @@ vst4_s64 (int64_t * __a, int64x1x4_t val)
{ {
__builtin_aarch64_simd_xi __o; __builtin_aarch64_simd_xi __o;
int64x2x4_t temp; int64x2x4_t temp;
temp.val[0] = vcombine_s64 (val.val[0], vcreate_s64 (INT64_C (0))); temp.val[0] = vcombine_s64 (val.val[0], vcreate_s64 (__AARCH64_INT64_C (0)));
temp.val[1] = vcombine_s64 (val.val[1], vcreate_s64 (INT64_C (0))); temp.val[1] = vcombine_s64 (val.val[1], vcreate_s64 (__AARCH64_INT64_C (0)));
temp.val[2] = vcombine_s64 (val.val[2], vcreate_s64 (INT64_C (0))); temp.val[2] = vcombine_s64 (val.val[2], vcreate_s64 (__AARCH64_INT64_C (0)));
temp.val[3] = vcombine_s64 (val.val[3], vcreate_s64 (INT64_C (0))); temp.val[3] = vcombine_s64 (val.val[3], vcreate_s64 (__AARCH64_INT64_C (0)));
__o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) temp.val[0], 0); __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) temp.val[0], 0);
__o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) temp.val[1], 1); __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) temp.val[1], 1);
__o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) temp.val[2], 2); __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) temp.val[2], 2);
...@@ -24916,10 +24949,10 @@ vst4_u64 (uint64_t * __a, uint64x1x4_t val) ...@@ -24916,10 +24949,10 @@ vst4_u64 (uint64_t * __a, uint64x1x4_t val)
{ {
__builtin_aarch64_simd_xi __o; __builtin_aarch64_simd_xi __o;
uint64x2x4_t temp; uint64x2x4_t temp;
temp.val[0] = vcombine_u64 (val.val[0], vcreate_u64 (UINT64_C (0))); temp.val[0] = vcombine_u64 (val.val[0], vcreate_u64 (__AARCH64_UINT64_C (0)));
temp.val[1] = vcombine_u64 (val.val[1], vcreate_u64 (UINT64_C (0))); temp.val[1] = vcombine_u64 (val.val[1], vcreate_u64 (__AARCH64_UINT64_C (0)));
temp.val[2] = vcombine_u64 (val.val[2], vcreate_u64 (UINT64_C (0))); temp.val[2] = vcombine_u64 (val.val[2], vcreate_u64 (__AARCH64_UINT64_C (0)));
temp.val[3] = vcombine_u64 (val.val[3], vcreate_u64 (UINT64_C (0))); temp.val[3] = vcombine_u64 (val.val[3], vcreate_u64 (__AARCH64_UINT64_C (0)));
__o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) temp.val[0], 0); __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) temp.val[0], 0);
__o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) temp.val[1], 1); __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) temp.val[1], 1);
__o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) temp.val[2], 2); __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) temp.val[2], 2);
...@@ -24932,10 +24965,10 @@ vst4_f64 (float64_t * __a, float64x1x4_t val) ...@@ -24932,10 +24965,10 @@ vst4_f64 (float64_t * __a, float64x1x4_t val)
{ {
__builtin_aarch64_simd_xi __o; __builtin_aarch64_simd_xi __o;
float64x2x4_t temp; float64x2x4_t temp;
temp.val[0] = vcombine_f64 (val.val[0], vcreate_f64 (UINT64_C (0))); temp.val[0] = vcombine_f64 (val.val[0], vcreate_f64 (__AARCH64_UINT64_C (0)));
temp.val[1] = vcombine_f64 (val.val[1], vcreate_f64 (UINT64_C (0))); temp.val[1] = vcombine_f64 (val.val[1], vcreate_f64 (__AARCH64_UINT64_C (0)));
temp.val[2] = vcombine_f64 (val.val[2], vcreate_f64 (UINT64_C (0))); temp.val[2] = vcombine_f64 (val.val[2], vcreate_f64 (__AARCH64_UINT64_C (0)));
temp.val[3] = vcombine_f64 (val.val[3], vcreate_f64 (UINT64_C (0))); temp.val[3] = vcombine_f64 (val.val[3], vcreate_f64 (__AARCH64_UINT64_C (0)));
__o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) temp.val[0], 0); __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) temp.val[0], 0);
__o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) temp.val[1], 1); __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) temp.val[1], 1);
__o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) temp.val[2], 2); __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) temp.val[2], 2);
...@@ -24948,10 +24981,10 @@ vst4_s8 (int8_t * __a, int8x8x4_t val) ...@@ -24948,10 +24981,10 @@ vst4_s8 (int8_t * __a, int8x8x4_t val)
{ {
__builtin_aarch64_simd_xi __o; __builtin_aarch64_simd_xi __o;
int8x16x4_t temp; int8x16x4_t temp;
temp.val[0] = vcombine_s8 (val.val[0], vcreate_s8 (INT64_C (0))); temp.val[0] = vcombine_s8 (val.val[0], vcreate_s8 (__AARCH64_INT64_C (0)));
temp.val[1] = vcombine_s8 (val.val[1], vcreate_s8 (INT64_C (0))); temp.val[1] = vcombine_s8 (val.val[1], vcreate_s8 (__AARCH64_INT64_C (0)));
temp.val[2] = vcombine_s8 (val.val[2], vcreate_s8 (INT64_C (0))); temp.val[2] = vcombine_s8 (val.val[2], vcreate_s8 (__AARCH64_INT64_C (0)));
temp.val[3] = vcombine_s8 (val.val[3], vcreate_s8 (INT64_C (0))); temp.val[3] = vcombine_s8 (val.val[3], vcreate_s8 (__AARCH64_INT64_C (0)));
__o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[0], 0); __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[0], 0);
__o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[1], 1); __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[1], 1);
__o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[2], 2); __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[2], 2);
...@@ -24964,10 +24997,10 @@ vst4_p8 (poly8_t * __a, poly8x8x4_t val) ...@@ -24964,10 +24997,10 @@ vst4_p8 (poly8_t * __a, poly8x8x4_t val)
{ {
__builtin_aarch64_simd_xi __o; __builtin_aarch64_simd_xi __o;
poly8x16x4_t temp; poly8x16x4_t temp;
temp.val[0] = vcombine_p8 (val.val[0], vcreate_p8 (UINT64_C (0))); temp.val[0] = vcombine_p8 (val.val[0], vcreate_p8 (__AARCH64_UINT64_C (0)));
temp.val[1] = vcombine_p8 (val.val[1], vcreate_p8 (UINT64_C (0))); temp.val[1] = vcombine_p8 (val.val[1], vcreate_p8 (__AARCH64_UINT64_C (0)));
temp.val[2] = vcombine_p8 (val.val[2], vcreate_p8 (UINT64_C (0))); temp.val[2] = vcombine_p8 (val.val[2], vcreate_p8 (__AARCH64_UINT64_C (0)));
temp.val[3] = vcombine_p8 (val.val[3], vcreate_p8 (UINT64_C (0))); temp.val[3] = vcombine_p8 (val.val[3], vcreate_p8 (__AARCH64_UINT64_C (0)));
__o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[0], 0); __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[0], 0);
__o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[1], 1); __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[1], 1);
__o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[2], 2); __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[2], 2);
...@@ -24980,10 +25013,10 @@ vst4_s16 (int16_t * __a, int16x4x4_t val) ...@@ -24980,10 +25013,10 @@ vst4_s16 (int16_t * __a, int16x4x4_t val)
{ {
__builtin_aarch64_simd_xi __o; __builtin_aarch64_simd_xi __o;
int16x8x4_t temp; int16x8x4_t temp;
temp.val[0] = vcombine_s16 (val.val[0], vcreate_s16 (INT64_C (0))); temp.val[0] = vcombine_s16 (val.val[0], vcreate_s16 (__AARCH64_INT64_C (0)));
temp.val[1] = vcombine_s16 (val.val[1], vcreate_s16 (INT64_C (0))); temp.val[1] = vcombine_s16 (val.val[1], vcreate_s16 (__AARCH64_INT64_C (0)));
temp.val[2] = vcombine_s16 (val.val[2], vcreate_s16 (INT64_C (0))); temp.val[2] = vcombine_s16 (val.val[2], vcreate_s16 (__AARCH64_INT64_C (0)));
temp.val[3] = vcombine_s16 (val.val[3], vcreate_s16 (INT64_C (0))); temp.val[3] = vcombine_s16 (val.val[3], vcreate_s16 (__AARCH64_INT64_C (0)));
__o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[0], 0); __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[0], 0);
__o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[1], 1); __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[1], 1);
__o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[2], 2); __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[2], 2);
...@@ -24996,10 +25029,10 @@ vst4_p16 (poly16_t * __a, poly16x4x4_t val) ...@@ -24996,10 +25029,10 @@ vst4_p16 (poly16_t * __a, poly16x4x4_t val)
{ {
__builtin_aarch64_simd_xi __o; __builtin_aarch64_simd_xi __o;
poly16x8x4_t temp; poly16x8x4_t temp;
temp.val[0] = vcombine_p16 (val.val[0], vcreate_p16 (UINT64_C (0))); temp.val[0] = vcombine_p16 (val.val[0], vcreate_p16 (__AARCH64_UINT64_C (0)));
temp.val[1] = vcombine_p16 (val.val[1], vcreate_p16 (UINT64_C (0))); temp.val[1] = vcombine_p16 (val.val[1], vcreate_p16 (__AARCH64_UINT64_C (0)));
temp.val[2] = vcombine_p16 (val.val[2], vcreate_p16 (UINT64_C (0))); temp.val[2] = vcombine_p16 (val.val[2], vcreate_p16 (__AARCH64_UINT64_C (0)));
temp.val[3] = vcombine_p16 (val.val[3], vcreate_p16 (UINT64_C (0))); temp.val[3] = vcombine_p16 (val.val[3], vcreate_p16 (__AARCH64_UINT64_C (0)));
__o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[0], 0); __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[0], 0);
__o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[1], 1); __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[1], 1);
__o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[2], 2); __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[2], 2);
...@@ -25012,10 +25045,10 @@ vst4_s32 (int32_t * __a, int32x2x4_t val) ...@@ -25012,10 +25045,10 @@ vst4_s32 (int32_t * __a, int32x2x4_t val)
{ {
__builtin_aarch64_simd_xi __o; __builtin_aarch64_simd_xi __o;
int32x4x4_t temp; int32x4x4_t temp;
temp.val[0] = vcombine_s32 (val.val[0], vcreate_s32 (INT64_C (0))); temp.val[0] = vcombine_s32 (val.val[0], vcreate_s32 (__AARCH64_INT64_C (0)));
temp.val[1] = vcombine_s32 (val.val[1], vcreate_s32 (INT64_C (0))); temp.val[1] = vcombine_s32 (val.val[1], vcreate_s32 (__AARCH64_INT64_C (0)));
temp.val[2] = vcombine_s32 (val.val[2], vcreate_s32 (INT64_C (0))); temp.val[2] = vcombine_s32 (val.val[2], vcreate_s32 (__AARCH64_INT64_C (0)));
temp.val[3] = vcombine_s32 (val.val[3], vcreate_s32 (INT64_C (0))); temp.val[3] = vcombine_s32 (val.val[3], vcreate_s32 (__AARCH64_INT64_C (0)));
__o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) temp.val[0], 0); __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) temp.val[0], 0);
__o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) temp.val[1], 1); __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) temp.val[1], 1);
__o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) temp.val[2], 2); __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) temp.val[2], 2);
...@@ -25028,10 +25061,10 @@ vst4_u8 (uint8_t * __a, uint8x8x4_t val) ...@@ -25028,10 +25061,10 @@ vst4_u8 (uint8_t * __a, uint8x8x4_t val)
{ {
__builtin_aarch64_simd_xi __o; __builtin_aarch64_simd_xi __o;
uint8x16x4_t temp; uint8x16x4_t temp;
temp.val[0] = vcombine_u8 (val.val[0], vcreate_u8 (UINT64_C (0))); temp.val[0] = vcombine_u8 (val.val[0], vcreate_u8 (__AARCH64_UINT64_C (0)));
temp.val[1] = vcombine_u8 (val.val[1], vcreate_u8 (UINT64_C (0))); temp.val[1] = vcombine_u8 (val.val[1], vcreate_u8 (__AARCH64_UINT64_C (0)));
temp.val[2] = vcombine_u8 (val.val[2], vcreate_u8 (UINT64_C (0))); temp.val[2] = vcombine_u8 (val.val[2], vcreate_u8 (__AARCH64_UINT64_C (0)));
temp.val[3] = vcombine_u8 (val.val[3], vcreate_u8 (UINT64_C (0))); temp.val[3] = vcombine_u8 (val.val[3], vcreate_u8 (__AARCH64_UINT64_C (0)));
__o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[0], 0); __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[0], 0);
__o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[1], 1); __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[1], 1);
__o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[2], 2); __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[2], 2);
...@@ -25044,10 +25077,10 @@ vst4_u16 (uint16_t * __a, uint16x4x4_t val) ...@@ -25044,10 +25077,10 @@ vst4_u16 (uint16_t * __a, uint16x4x4_t val)
{ {
__builtin_aarch64_simd_xi __o; __builtin_aarch64_simd_xi __o;
uint16x8x4_t temp; uint16x8x4_t temp;
temp.val[0] = vcombine_u16 (val.val[0], vcreate_u16 (UINT64_C (0))); temp.val[0] = vcombine_u16 (val.val[0], vcreate_u16 (__AARCH64_UINT64_C (0)));
temp.val[1] = vcombine_u16 (val.val[1], vcreate_u16 (UINT64_C (0))); temp.val[1] = vcombine_u16 (val.val[1], vcreate_u16 (__AARCH64_UINT64_C (0)));
temp.val[2] = vcombine_u16 (val.val[2], vcreate_u16 (UINT64_C (0))); temp.val[2] = vcombine_u16 (val.val[2], vcreate_u16 (__AARCH64_UINT64_C (0)));
temp.val[3] = vcombine_u16 (val.val[3], vcreate_u16 (UINT64_C (0))); temp.val[3] = vcombine_u16 (val.val[3], vcreate_u16 (__AARCH64_UINT64_C (0)));
__o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[0], 0); __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[0], 0);
__o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[1], 1); __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[1], 1);
__o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[2], 2); __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[2], 2);
...@@ -25060,10 +25093,10 @@ vst4_u32 (uint32_t * __a, uint32x2x4_t val) ...@@ -25060,10 +25093,10 @@ vst4_u32 (uint32_t * __a, uint32x2x4_t val)
{ {
__builtin_aarch64_simd_xi __o; __builtin_aarch64_simd_xi __o;
uint32x4x4_t temp; uint32x4x4_t temp;
temp.val[0] = vcombine_u32 (val.val[0], vcreate_u32 (UINT64_C (0))); temp.val[0] = vcombine_u32 (val.val[0], vcreate_u32 (__AARCH64_UINT64_C (0)));
temp.val[1] = vcombine_u32 (val.val[1], vcreate_u32 (UINT64_C (0))); temp.val[1] = vcombine_u32 (val.val[1], vcreate_u32 (__AARCH64_UINT64_C (0)));
temp.val[2] = vcombine_u32 (val.val[2], vcreate_u32 (UINT64_C (0))); temp.val[2] = vcombine_u32 (val.val[2], vcreate_u32 (__AARCH64_UINT64_C (0)));
temp.val[3] = vcombine_u32 (val.val[3], vcreate_u32 (UINT64_C (0))); temp.val[3] = vcombine_u32 (val.val[3], vcreate_u32 (__AARCH64_UINT64_C (0)));
__o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) temp.val[0], 0); __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) temp.val[0], 0);
__o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) temp.val[1], 1); __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) temp.val[1], 1);
__o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) temp.val[2], 2); __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) temp.val[2], 2);
...@@ -25076,10 +25109,10 @@ vst4_f32 (float32_t * __a, float32x2x4_t val) ...@@ -25076,10 +25109,10 @@ vst4_f32 (float32_t * __a, float32x2x4_t val)
{ {
__builtin_aarch64_simd_xi __o; __builtin_aarch64_simd_xi __o;
float32x4x4_t temp; float32x4x4_t temp;
temp.val[0] = vcombine_f32 (val.val[0], vcreate_f32 (UINT64_C (0))); temp.val[0] = vcombine_f32 (val.val[0], vcreate_f32 (__AARCH64_UINT64_C (0)));
temp.val[1] = vcombine_f32 (val.val[1], vcreate_f32 (UINT64_C (0))); temp.val[1] = vcombine_f32 (val.val[1], vcreate_f32 (__AARCH64_UINT64_C (0)));
temp.val[2] = vcombine_f32 (val.val[2], vcreate_f32 (UINT64_C (0))); temp.val[2] = vcombine_f32 (val.val[2], vcreate_f32 (__AARCH64_UINT64_C (0)));
temp.val[3] = vcombine_f32 (val.val[3], vcreate_f32 (UINT64_C (0))); temp.val[3] = vcombine_f32 (val.val[3], vcreate_f32 (__AARCH64_UINT64_C (0)));
__o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) temp.val[0], 0); __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) temp.val[0], 0);
__o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) temp.val[1], 1); __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) temp.val[1], 1);
__o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) temp.val[2], 2); __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) temp.val[2], 2);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment