Commit 7211512a by Alan Lawrence Committed by Alan Lawrence

Rewrite AArch64 UZP Intrinsics using __builtin_shuffle.

gcc/testsuite/ChangeLog:

	* gcc.target/aarch64/vuzps32_1.c: Expect zip1/2 insn rather than uzp1/2.
	* gcc.target/aarch64/vuzpu32_1.c: Likewise.
	* gcc.target/aarch64/vuzpf32_1.c: Likewise.

gcc/ChangeLog:

	* config/aarch64/arm_neon.h (vuzp1_f32, vuzp1_p8, vuzp1_p16, vuzp1_s8,
	vuzp1_s16, vuzp1_s32, vuzp1_u8, vuzp1_u16, vuzp1_u32, vuzp1q_f32,
	vuzp1q_f64, vuzp1q_p8, vuzp1q_p16, vuzp1q_s8, vuzp1q_s16, vuzp1q_s32,
	vuzp1q_s64, vuzp1q_u8, vuzp1q_u16, vuzp1q_u32, vuzp1q_u64, vuzp2_f32,
	vuzp2_p8, vuzp2_p16, vuzp2_s8, vuzp2_s16, vuzp2_s32, vuzp2_u8,
	vuzp2_u16, vuzp2_u32, vuzp2q_f32, vuzp2q_f64, vuzp2q_p8, vuzp2q_p16,
	vuzp2q_s8, vuzp2q_s16, vuzp2q_s32, vuzp2q_s64, vuzp2q_u8, vuzp2q_u16,
	vuzp2q_u32, vuzp2q_u64): Replace temporary asm with __builtin_shuffle.

From-SVN: r209943
parent e3fe9b5b
2014-04-30 Alan Lawrence <alan.lawrence@arm.com>
* config/aarch64/arm_neon.h (vuzp1_f32, vuzp1_p8, vuzp1_p16, vuzp1_s8,
vuzp1_s16, vuzp1_s32, vuzp1_u8, vuzp1_u16, vuzp1_u32, vuzp1q_f32,
vuzp1q_f64, vuzp1q_p8, vuzp1q_p16, vuzp1q_s8, vuzp1q_s16, vuzp1q_s32,
vuzp1q_s64, vuzp1q_u8, vuzp1q_u16, vuzp1q_u32, vuzp1q_u64, vuzp2_f32,
vuzp2_p8, vuzp2_p16, vuzp2_s8, vuzp2_s16, vuzp2_s32, vuzp2_u8,
vuzp2_u16, vuzp2_u32, vuzp2q_f32, vuzp2q_f64, vuzp2q_p8, vuzp2q_p16,
vuzp2q_s8, vuzp2q_s16, vuzp2q_s32, vuzp2q_s64, vuzp2q_u8, vuzp2q_u16,
vuzp2q_u32, vuzp2q_u64): Replace temporary asm with __builtin_shuffle.
2014-04-30 Joern Rennecke <joern.rennecke@embecosm.com> 2014-04-30 Joern Rennecke <joern.rennecke@embecosm.com>
* config/arc/arc.opt (mlra): Move comment above option name * config/arc/arc.opt (mlra): Move comment above option name
......
...@@ -13199,467 +13199,6 @@ vtstq_p16 (poly16x8_t a, poly16x8_t b) ...@@ -13199,467 +13199,6 @@ vtstq_p16 (poly16x8_t a, poly16x8_t b)
: /* No clobbers */); : /* No clobbers */);
return result; return result;
} }
__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
vuzp1_f32 (float32x2_t a, float32x2_t b)
{
float32x2_t result;
__asm__ ("uzp1 %0.2s,%1.2s,%2.2s"
: "=w"(result)
: "w"(a), "w"(b)
: /* No clobbers */);
return result;
}
__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
vuzp1_p8 (poly8x8_t a, poly8x8_t b)
{
poly8x8_t result;
__asm__ ("uzp1 %0.8b,%1.8b,%2.8b"
: "=w"(result)
: "w"(a), "w"(b)
: /* No clobbers */);
return result;
}
__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
vuzp1_p16 (poly16x4_t a, poly16x4_t b)
{
poly16x4_t result;
__asm__ ("uzp1 %0.4h,%1.4h,%2.4h"
: "=w"(result)
: "w"(a), "w"(b)
: /* No clobbers */);
return result;
}
__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
vuzp1_s8 (int8x8_t a, int8x8_t b)
{
int8x8_t result;
__asm__ ("uzp1 %0.8b,%1.8b,%2.8b"
: "=w"(result)
: "w"(a), "w"(b)
: /* No clobbers */);
return result;
}
__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
vuzp1_s16 (int16x4_t a, int16x4_t b)
{
int16x4_t result;
__asm__ ("uzp1 %0.4h,%1.4h,%2.4h"
: "=w"(result)
: "w"(a), "w"(b)
: /* No clobbers */);
return result;
}
__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
vuzp1_s32 (int32x2_t a, int32x2_t b)
{
int32x2_t result;
__asm__ ("uzp1 %0.2s,%1.2s,%2.2s"
: "=w"(result)
: "w"(a), "w"(b)
: /* No clobbers */);
return result;
}
__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vuzp1_u8 (uint8x8_t a, uint8x8_t b)
{
uint8x8_t result;
__asm__ ("uzp1 %0.8b,%1.8b,%2.8b"
: "=w"(result)
: "w"(a), "w"(b)
: /* No clobbers */);
return result;
}
__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
vuzp1_u16 (uint16x4_t a, uint16x4_t b)
{
uint16x4_t result;
__asm__ ("uzp1 %0.4h,%1.4h,%2.4h"
: "=w"(result)
: "w"(a), "w"(b)
: /* No clobbers */);
return result;
}
__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vuzp1_u32 (uint32x2_t a, uint32x2_t b)
{
uint32x2_t result;
__asm__ ("uzp1 %0.2s,%1.2s,%2.2s"
: "=w"(result)
: "w"(a), "w"(b)
: /* No clobbers */);
return result;
}
__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
vuzp1q_f32 (float32x4_t a, float32x4_t b)
{
float32x4_t result;
__asm__ ("uzp1 %0.4s,%1.4s,%2.4s"
: "=w"(result)
: "w"(a), "w"(b)
: /* No clobbers */);
return result;
}
__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
vuzp1q_f64 (float64x2_t a, float64x2_t b)
{
float64x2_t result;
__asm__ ("uzp1 %0.2d,%1.2d,%2.2d"
: "=w"(result)
: "w"(a), "w"(b)
: /* No clobbers */);
return result;
}
__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
vuzp1q_p8 (poly8x16_t a, poly8x16_t b)
{
poly8x16_t result;
__asm__ ("uzp1 %0.16b,%1.16b,%2.16b"
: "=w"(result)
: "w"(a), "w"(b)
: /* No clobbers */);
return result;
}
__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
vuzp1q_p16 (poly16x8_t a, poly16x8_t b)
{
poly16x8_t result;
__asm__ ("uzp1 %0.8h,%1.8h,%2.8h"
: "=w"(result)
: "w"(a), "w"(b)
: /* No clobbers */);
return result;
}
__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
vuzp1q_s8 (int8x16_t a, int8x16_t b)
{
int8x16_t result;
__asm__ ("uzp1 %0.16b,%1.16b,%2.16b"
: "=w"(result)
: "w"(a), "w"(b)
: /* No clobbers */);
return result;
}
__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
vuzp1q_s16 (int16x8_t a, int16x8_t b)
{
int16x8_t result;
__asm__ ("uzp1 %0.8h,%1.8h,%2.8h"
: "=w"(result)
: "w"(a), "w"(b)
: /* No clobbers */);
return result;
}
__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vuzp1q_s32 (int32x4_t a, int32x4_t b)
{
int32x4_t result;
__asm__ ("uzp1 %0.4s,%1.4s,%2.4s"
: "=w"(result)
: "w"(a), "w"(b)
: /* No clobbers */);
return result;
}
__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
vuzp1q_s64 (int64x2_t a, int64x2_t b)
{
int64x2_t result;
__asm__ ("uzp1 %0.2d,%1.2d,%2.2d"
: "=w"(result)
: "w"(a), "w"(b)
: /* No clobbers */);
return result;
}
__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
vuzp1q_u8 (uint8x16_t a, uint8x16_t b)
{
uint8x16_t result;
__asm__ ("uzp1 %0.16b,%1.16b,%2.16b"
: "=w"(result)
: "w"(a), "w"(b)
: /* No clobbers */);
return result;
}
__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vuzp1q_u16 (uint16x8_t a, uint16x8_t b)
{
uint16x8_t result;
__asm__ ("uzp1 %0.8h,%1.8h,%2.8h"
: "=w"(result)
: "w"(a), "w"(b)
: /* No clobbers */);
return result;
}
__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vuzp1q_u32 (uint32x4_t a, uint32x4_t b)
{
uint32x4_t result;
__asm__ ("uzp1 %0.4s,%1.4s,%2.4s"
: "=w"(result)
: "w"(a), "w"(b)
: /* No clobbers */);
return result;
}
__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
vuzp1q_u64 (uint64x2_t a, uint64x2_t b)
{
uint64x2_t result;
__asm__ ("uzp1 %0.2d,%1.2d,%2.2d"
: "=w"(result)
: "w"(a), "w"(b)
: /* No clobbers */);
return result;
}
__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
vuzp2_f32 (float32x2_t a, float32x2_t b)
{
float32x2_t result;
__asm__ ("uzp2 %0.2s,%1.2s,%2.2s"
: "=w"(result)
: "w"(a), "w"(b)
: /* No clobbers */);
return result;
}
__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
vuzp2_p8 (poly8x8_t a, poly8x8_t b)
{
poly8x8_t result;
__asm__ ("uzp2 %0.8b,%1.8b,%2.8b"
: "=w"(result)
: "w"(a), "w"(b)
: /* No clobbers */);
return result;
}
__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
vuzp2_p16 (poly16x4_t a, poly16x4_t b)
{
poly16x4_t result;
__asm__ ("uzp2 %0.4h,%1.4h,%2.4h"
: "=w"(result)
: "w"(a), "w"(b)
: /* No clobbers */);
return result;
}
__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
vuzp2_s8 (int8x8_t a, int8x8_t b)
{
int8x8_t result;
__asm__ ("uzp2 %0.8b,%1.8b,%2.8b"
: "=w"(result)
: "w"(a), "w"(b)
: /* No clobbers */);
return result;
}
__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
vuzp2_s16 (int16x4_t a, int16x4_t b)
{
int16x4_t result;
__asm__ ("uzp2 %0.4h,%1.4h,%2.4h"
: "=w"(result)
: "w"(a), "w"(b)
: /* No clobbers */);
return result;
}
__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
vuzp2_s32 (int32x2_t a, int32x2_t b)
{
int32x2_t result;
__asm__ ("uzp2 %0.2s,%1.2s,%2.2s"
: "=w"(result)
: "w"(a), "w"(b)
: /* No clobbers */);
return result;
}
__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vuzp2_u8 (uint8x8_t a, uint8x8_t b)
{
uint8x8_t result;
__asm__ ("uzp2 %0.8b,%1.8b,%2.8b"
: "=w"(result)
: "w"(a), "w"(b)
: /* No clobbers */);
return result;
}
__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
vuzp2_u16 (uint16x4_t a, uint16x4_t b)
{
uint16x4_t result;
__asm__ ("uzp2 %0.4h,%1.4h,%2.4h"
: "=w"(result)
: "w"(a), "w"(b)
: /* No clobbers */);
return result;
}
__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vuzp2_u32 (uint32x2_t a, uint32x2_t b)
{
uint32x2_t result;
__asm__ ("uzp2 %0.2s,%1.2s,%2.2s"
: "=w"(result)
: "w"(a), "w"(b)
: /* No clobbers */);
return result;
}
__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
vuzp2q_f32 (float32x4_t a, float32x4_t b)
{
float32x4_t result;
__asm__ ("uzp2 %0.4s,%1.4s,%2.4s"
: "=w"(result)
: "w"(a), "w"(b)
: /* No clobbers */);
return result;
}
__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
vuzp2q_f64 (float64x2_t a, float64x2_t b)
{
float64x2_t result;
__asm__ ("uzp2 %0.2d,%1.2d,%2.2d"
: "=w"(result)
: "w"(a), "w"(b)
: /* No clobbers */);
return result;
}
__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
vuzp2q_p8 (poly8x16_t a, poly8x16_t b)
{
poly8x16_t result;
__asm__ ("uzp2 %0.16b,%1.16b,%2.16b"
: "=w"(result)
: "w"(a), "w"(b)
: /* No clobbers */);
return result;
}
__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
vuzp2q_p16 (poly16x8_t a, poly16x8_t b)
{
poly16x8_t result;
__asm__ ("uzp2 %0.8h,%1.8h,%2.8h"
: "=w"(result)
: "w"(a), "w"(b)
: /* No clobbers */);
return result;
}
__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
vuzp2q_s8 (int8x16_t a, int8x16_t b)
{
int8x16_t result;
__asm__ ("uzp2 %0.16b,%1.16b,%2.16b"
: "=w"(result)
: "w"(a), "w"(b)
: /* No clobbers */);
return result;
}
__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
vuzp2q_s16 (int16x8_t a, int16x8_t b)
{
int16x8_t result;
__asm__ ("uzp2 %0.8h,%1.8h,%2.8h"
: "=w"(result)
: "w"(a), "w"(b)
: /* No clobbers */);
return result;
}
__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vuzp2q_s32 (int32x4_t a, int32x4_t b)
{
int32x4_t result;
__asm__ ("uzp2 %0.4s,%1.4s,%2.4s"
: "=w"(result)
: "w"(a), "w"(b)
: /* No clobbers */);
return result;
}
__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
vuzp2q_s64 (int64x2_t a, int64x2_t b)
{
int64x2_t result;
__asm__ ("uzp2 %0.2d,%1.2d,%2.2d"
: "=w"(result)
: "w"(a), "w"(b)
: /* No clobbers */);
return result;
}
__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
vuzp2q_u8 (uint8x16_t a, uint8x16_t b)
{
uint8x16_t result;
__asm__ ("uzp2 %0.16b,%1.16b,%2.16b"
: "=w"(result)
: "w"(a), "w"(b)
: /* No clobbers */);
return result;
}
__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vuzp2q_u16 (uint16x8_t a, uint16x8_t b)
{
uint16x8_t result;
__asm__ ("uzp2 %0.8h,%1.8h,%2.8h"
: "=w"(result)
: "w"(a), "w"(b)
: /* No clobbers */);
return result;
}
__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vuzp2q_u32 (uint32x4_t a, uint32x4_t b)
{
uint32x4_t result;
__asm__ ("uzp2 %0.4s,%1.4s,%2.4s"
: "=w"(result)
: "w"(a), "w"(b)
: /* No clobbers */);
return result;
}
__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
vuzp2q_u64 (uint64x2_t a, uint64x2_t b)
{
uint64x2_t result;
__asm__ ("uzp2 %0.2d,%1.2d,%2.2d"
: "=w"(result)
: "w"(a), "w"(b)
: /* No clobbers */);
return result;
}
/* End of temporary inline asm implementations. */ /* End of temporary inline asm implementations. */
...@@ -24844,407 +24383,839 @@ vst4q_f64 (float64_t * __a, float64x2x4_t val) ...@@ -24844,407 +24383,839 @@ vst4q_f64 (float64_t * __a, float64x2x4_t val)
__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
vsubd_s64 (int64x1_t __a, int64x1_t __b) vsubd_s64 (int64x1_t __a, int64x1_t __b)
{ {
return __a - __b; return __a - __b;
}
__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
vsubd_u64 (uint64x1_t __a, uint64x1_t __b)
{
return __a - __b;
}
/* vtbx1 */
__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
vtbx1_s8 (int8x8_t __r, int8x8_t __tab, int8x8_t __idx)
{
uint8x8_t __mask = vclt_u8 (vreinterpret_u8_s8 (__idx),
vmov_n_u8 (8));
int8x8_t __tbl = vtbl1_s8 (__tab, __idx);
return vbsl_s8 (__mask, __tbl, __r);
}
__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vtbx1_u8 (uint8x8_t __r, uint8x8_t __tab, uint8x8_t __idx)
{
uint8x8_t __mask = vclt_u8 (__idx, vmov_n_u8 (8));
uint8x8_t __tbl = vtbl1_u8 (__tab, __idx);
return vbsl_u8 (__mask, __tbl, __r);
}
__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
vtbx1_p8 (poly8x8_t __r, poly8x8_t __tab, uint8x8_t __idx)
{
uint8x8_t __mask = vclt_u8 (__idx, vmov_n_u8 (8));
poly8x8_t __tbl = vtbl1_p8 (__tab, __idx);
return vbsl_p8 (__mask, __tbl, __r);
}
/* vtbx3 */
__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
vtbx3_s8 (int8x8_t __r, int8x8x3_t __tab, int8x8_t __idx)
{
uint8x8_t __mask = vclt_u8 (vreinterpret_u8_s8 (__idx),
vmov_n_u8 (24));
int8x8_t __tbl = vtbl3_s8 (__tab, __idx);
return vbsl_s8 (__mask, __tbl, __r);
}
__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vtbx3_u8 (uint8x8_t __r, uint8x8x3_t __tab, uint8x8_t __idx)
{
uint8x8_t __mask = vclt_u8 (__idx, vmov_n_u8 (24));
uint8x8_t __tbl = vtbl3_u8 (__tab, __idx);
return vbsl_u8 (__mask, __tbl, __r);
}
__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
vtbx3_p8 (poly8x8_t __r, poly8x8x3_t __tab, uint8x8_t __idx)
{
uint8x8_t __mask = vclt_u8 (__idx, vmov_n_u8 (24));
poly8x8_t __tbl = vtbl3_p8 (__tab, __idx);
return vbsl_p8 (__mask, __tbl, __r);
}
/* vtrn */
__extension__ static __inline float32x2x2_t __attribute__ ((__always_inline__))
vtrn_f32 (float32x2_t a, float32x2_t b)
{
return (float32x2x2_t) {vtrn1_f32 (a, b), vtrn2_f32 (a, b)};
}
__extension__ static __inline poly8x8x2_t __attribute__ ((__always_inline__))
vtrn_p8 (poly8x8_t a, poly8x8_t b)
{
return (poly8x8x2_t) {vtrn1_p8 (a, b), vtrn2_p8 (a, b)};
}
__extension__ static __inline poly16x4x2_t __attribute__ ((__always_inline__))
vtrn_p16 (poly16x4_t a, poly16x4_t b)
{
return (poly16x4x2_t) {vtrn1_p16 (a, b), vtrn2_p16 (a, b)};
}
__extension__ static __inline int8x8x2_t __attribute__ ((__always_inline__))
vtrn_s8 (int8x8_t a, int8x8_t b)
{
return (int8x8x2_t) {vtrn1_s8 (a, b), vtrn2_s8 (a, b)};
}
__extension__ static __inline int16x4x2_t __attribute__ ((__always_inline__))
vtrn_s16 (int16x4_t a, int16x4_t b)
{
return (int16x4x2_t) {vtrn1_s16 (a, b), vtrn2_s16 (a, b)};
}
__extension__ static __inline int32x2x2_t __attribute__ ((__always_inline__))
vtrn_s32 (int32x2_t a, int32x2_t b)
{
return (int32x2x2_t) {vtrn1_s32 (a, b), vtrn2_s32 (a, b)};
}
__extension__ static __inline uint8x8x2_t __attribute__ ((__always_inline__))
vtrn_u8 (uint8x8_t a, uint8x8_t b)
{
return (uint8x8x2_t) {vtrn1_u8 (a, b), vtrn2_u8 (a, b)};
}
__extension__ static __inline uint16x4x2_t __attribute__ ((__always_inline__))
vtrn_u16 (uint16x4_t a, uint16x4_t b)
{
return (uint16x4x2_t) {vtrn1_u16 (a, b), vtrn2_u16 (a, b)};
}
__extension__ static __inline uint32x2x2_t __attribute__ ((__always_inline__))
vtrn_u32 (uint32x2_t a, uint32x2_t b)
{
return (uint32x2x2_t) {vtrn1_u32 (a, b), vtrn2_u32 (a, b)};
}
__extension__ static __inline float32x4x2_t __attribute__ ((__always_inline__))
vtrnq_f32 (float32x4_t a, float32x4_t b)
{
return (float32x4x2_t) {vtrn1q_f32 (a, b), vtrn2q_f32 (a, b)};
}
__extension__ static __inline poly8x16x2_t __attribute__ ((__always_inline__))
vtrnq_p8 (poly8x16_t a, poly8x16_t b)
{
return (poly8x16x2_t) {vtrn1q_p8 (a, b), vtrn2q_p8 (a, b)};
}
__extension__ static __inline poly16x8x2_t __attribute__ ((__always_inline__))
vtrnq_p16 (poly16x8_t a, poly16x8_t b)
{
return (poly16x8x2_t) {vtrn1q_p16 (a, b), vtrn2q_p16 (a, b)};
}
__extension__ static __inline int8x16x2_t __attribute__ ((__always_inline__))
vtrnq_s8 (int8x16_t a, int8x16_t b)
{
return (int8x16x2_t) {vtrn1q_s8 (a, b), vtrn2q_s8 (a, b)};
}
__extension__ static __inline int16x8x2_t __attribute__ ((__always_inline__))
vtrnq_s16 (int16x8_t a, int16x8_t b)
{
return (int16x8x2_t) {vtrn1q_s16 (a, b), vtrn2q_s16 (a, b)};
}
__extension__ static __inline int32x4x2_t __attribute__ ((__always_inline__))
vtrnq_s32 (int32x4_t a, int32x4_t b)
{
return (int32x4x2_t) {vtrn1q_s32 (a, b), vtrn2q_s32 (a, b)};
}
__extension__ static __inline uint8x16x2_t __attribute__ ((__always_inline__))
vtrnq_u8 (uint8x16_t a, uint8x16_t b)
{
return (uint8x16x2_t) {vtrn1q_u8 (a, b), vtrn2q_u8 (a, b)};
}
__extension__ static __inline uint16x8x2_t __attribute__ ((__always_inline__))
vtrnq_u16 (uint16x8_t a, uint16x8_t b)
{
return (uint16x8x2_t) {vtrn1q_u16 (a, b), vtrn2q_u16 (a, b)};
}
__extension__ static __inline uint32x4x2_t __attribute__ ((__always_inline__))
vtrnq_u32 (uint32x4_t a, uint32x4_t b)
{
return (uint32x4x2_t) {vtrn1q_u32 (a, b), vtrn2q_u32 (a, b)};
}
/* vtst */
__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vtst_s8 (int8x8_t __a, int8x8_t __b)
{
return (uint8x8_t) __builtin_aarch64_cmtstv8qi (__a, __b);
}
__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
vtst_s16 (int16x4_t __a, int16x4_t __b)
{
return (uint16x4_t) __builtin_aarch64_cmtstv4hi (__a, __b);
}
__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vtst_s32 (int32x2_t __a, int32x2_t __b)
{
return (uint32x2_t) __builtin_aarch64_cmtstv2si (__a, __b);
}
__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
vtst_s64 (int64x1_t __a, int64x1_t __b)
{
return (__a & __b) ? -1ll : 0ll;
}
__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vtst_u8 (uint8x8_t __a, uint8x8_t __b)
{
return (uint8x8_t) __builtin_aarch64_cmtstv8qi ((int8x8_t) __a,
(int8x8_t) __b);
}
__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
vtst_u16 (uint16x4_t __a, uint16x4_t __b)
{
return (uint16x4_t) __builtin_aarch64_cmtstv4hi ((int16x4_t) __a,
(int16x4_t) __b);
}
__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vtst_u32 (uint32x2_t __a, uint32x2_t __b)
{
return (uint32x2_t) __builtin_aarch64_cmtstv2si ((int32x2_t) __a,
(int32x2_t) __b);
}
__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
vtst_u64 (uint64x1_t __a, uint64x1_t __b)
{
return (__a & __b) ? -1ll : 0ll;
}
__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
vtstq_s8 (int8x16_t __a, int8x16_t __b)
{
return (uint8x16_t) __builtin_aarch64_cmtstv16qi (__a, __b);
}
__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vtstq_s16 (int16x8_t __a, int16x8_t __b)
{
return (uint16x8_t) __builtin_aarch64_cmtstv8hi (__a, __b);
}
__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vtstq_s32 (int32x4_t __a, int32x4_t __b)
{
return (uint32x4_t) __builtin_aarch64_cmtstv4si (__a, __b);
}
__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
vtstq_s64 (int64x2_t __a, int64x2_t __b)
{
return (uint64x2_t) __builtin_aarch64_cmtstv2di (__a, __b);
} }
__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
vsubd_u64 (uint64x1_t __a, uint64x1_t __b) vtstq_u8 (uint8x16_t __a, uint8x16_t __b)
{ {
return __a - __b; return (uint8x16_t) __builtin_aarch64_cmtstv16qi ((int8x16_t) __a,
(int8x16_t) __b);
} }
/* vtbx1 */ __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vtstq_u16 (uint16x8_t __a, uint16x8_t __b)
__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
vtbx1_s8 (int8x8_t __r, int8x8_t __tab, int8x8_t __idx)
{ {
uint8x8_t __mask = vclt_u8 (vreinterpret_u8_s8 (__idx), return (uint16x8_t) __builtin_aarch64_cmtstv8hi ((int16x8_t) __a,
vmov_n_u8 (8)); (int16x8_t) __b);
int8x8_t __tbl = vtbl1_s8 (__tab, __idx);
return vbsl_s8 (__mask, __tbl, __r);
} }
__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vtbx1_u8 (uint8x8_t __r, uint8x8_t __tab, uint8x8_t __idx) vtstq_u32 (uint32x4_t __a, uint32x4_t __b)
{ {
uint8x8_t __mask = vclt_u8 (__idx, vmov_n_u8 (8)); return (uint32x4_t) __builtin_aarch64_cmtstv4si ((int32x4_t) __a,
uint8x8_t __tbl = vtbl1_u8 (__tab, __idx); (int32x4_t) __b);
}
return vbsl_u8 (__mask, __tbl, __r); __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
vtstq_u64 (uint64x2_t __a, uint64x2_t __b)
{
return (uint64x2_t) __builtin_aarch64_cmtstv2di ((int64x2_t) __a,
(int64x2_t) __b);
} }
__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
vtbx1_p8 (poly8x8_t __r, poly8x8_t __tab, uint8x8_t __idx) vtstd_s64 (int64x1_t __a, int64x1_t __b)
{ {
uint8x8_t __mask = vclt_u8 (__idx, vmov_n_u8 (8)); return (__a & __b) ? -1ll : 0ll;
poly8x8_t __tbl = vtbl1_p8 (__tab, __idx); }
return vbsl_p8 (__mask, __tbl, __r); __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
vtstd_u64 (uint64x1_t __a, uint64x1_t __b)
{
return (__a & __b) ? -1ll : 0ll;
} }
/* vtbx3 */ /* vuqadd */
__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
vtbx3_s8 (int8x8_t __r, int8x8x3_t __tab, int8x8_t __idx) vuqadd_s8 (int8x8_t __a, uint8x8_t __b)
{ {
uint8x8_t __mask = vclt_u8 (vreinterpret_u8_s8 (__idx), return (int8x8_t) __builtin_aarch64_suqaddv8qi (__a, (int8x8_t) __b);
vmov_n_u8 (24));
int8x8_t __tbl = vtbl3_s8 (__tab, __idx);
return vbsl_s8 (__mask, __tbl, __r);
} }
__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
vtbx3_u8 (uint8x8_t __r, uint8x8x3_t __tab, uint8x8_t __idx) vuqadd_s16 (int16x4_t __a, uint16x4_t __b)
{ {
uint8x8_t __mask = vclt_u8 (__idx, vmov_n_u8 (24)); return (int16x4_t) __builtin_aarch64_suqaddv4hi (__a, (int16x4_t) __b);
uint8x8_t __tbl = vtbl3_u8 (__tab, __idx);
return vbsl_u8 (__mask, __tbl, __r);
} }
__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
vtbx3_p8 (poly8x8_t __r, poly8x8x3_t __tab, uint8x8_t __idx) vuqadd_s32 (int32x2_t __a, uint32x2_t __b)
{ {
uint8x8_t __mask = vclt_u8 (__idx, vmov_n_u8 (24)); return (int32x2_t) __builtin_aarch64_suqaddv2si (__a, (int32x2_t) __b);
poly8x8_t __tbl = vtbl3_p8 (__tab, __idx);
return vbsl_p8 (__mask, __tbl, __r);
} }
/* vtrn */ __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
vuqadd_s64 (int64x1_t __a, uint64x1_t __b)
__extension__ static __inline float32x2x2_t __attribute__ ((__always_inline__))
vtrn_f32 (float32x2_t a, float32x2_t b)
{ {
return (float32x2x2_t) {vtrn1_f32 (a, b), vtrn2_f32 (a, b)}; return (int64x1_t) __builtin_aarch64_suqadddi (__a, (int64x1_t) __b);
} }
__extension__ static __inline poly8x8x2_t __attribute__ ((__always_inline__)) __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
vtrn_p8 (poly8x8_t a, poly8x8_t b) vuqaddq_s8 (int8x16_t __a, uint8x16_t __b)
{ {
return (poly8x8x2_t) {vtrn1_p8 (a, b), vtrn2_p8 (a, b)}; return (int8x16_t) __builtin_aarch64_suqaddv16qi (__a, (int8x16_t) __b);
} }
__extension__ static __inline poly16x4x2_t __attribute__ ((__always_inline__)) __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
vtrn_p16 (poly16x4_t a, poly16x4_t b) vuqaddq_s16 (int16x8_t __a, uint16x8_t __b)
{ {
return (poly16x4x2_t) {vtrn1_p16 (a, b), vtrn2_p16 (a, b)}; return (int16x8_t) __builtin_aarch64_suqaddv8hi (__a, (int16x8_t) __b);
} }
__extension__ static __inline int8x8x2_t __attribute__ ((__always_inline__)) __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vtrn_s8 (int8x8_t a, int8x8_t b) vuqaddq_s32 (int32x4_t __a, uint32x4_t __b)
{ {
return (int8x8x2_t) {vtrn1_s8 (a, b), vtrn2_s8 (a, b)}; return (int32x4_t) __builtin_aarch64_suqaddv4si (__a, (int32x4_t) __b);
} }
__extension__ static __inline int16x4x2_t __attribute__ ((__always_inline__)) __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
vtrn_s16 (int16x4_t a, int16x4_t b) vuqaddq_s64 (int64x2_t __a, uint64x2_t __b)
{ {
return (int16x4x2_t) {vtrn1_s16 (a, b), vtrn2_s16 (a, b)}; return (int64x2_t) __builtin_aarch64_suqaddv2di (__a, (int64x2_t) __b);
} }
__extension__ static __inline int32x2x2_t __attribute__ ((__always_inline__)) __extension__ static __inline int8x1_t __attribute__ ((__always_inline__))
vtrn_s32 (int32x2_t a, int32x2_t b) vuqaddb_s8 (int8x1_t __a, uint8x1_t __b)
{ {
return (int32x2x2_t) {vtrn1_s32 (a, b), vtrn2_s32 (a, b)}; return (int8x1_t) __builtin_aarch64_suqaddqi (__a, (int8x1_t) __b);
} }
__extension__ static __inline uint8x8x2_t __attribute__ ((__always_inline__)) __extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
vtrn_u8 (uint8x8_t a, uint8x8_t b) vuqaddh_s16 (int16x1_t __a, uint16x1_t __b)
{ {
return (uint8x8x2_t) {vtrn1_u8 (a, b), vtrn2_u8 (a, b)}; return (int16x1_t) __builtin_aarch64_suqaddhi (__a, (int16x1_t) __b);
} }
__extension__ static __inline uint16x4x2_t __attribute__ ((__always_inline__)) __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
vtrn_u16 (uint16x4_t a, uint16x4_t b) vuqadds_s32 (int32x1_t __a, uint32x1_t __b)
{ {
return (uint16x4x2_t) {vtrn1_u16 (a, b), vtrn2_u16 (a, b)}; return (int32x1_t) __builtin_aarch64_suqaddsi (__a, (int32x1_t) __b);
} }
__extension__ static __inline uint32x2x2_t __attribute__ ((__always_inline__)) __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
vtrn_u32 (uint32x2_t a, uint32x2_t b) vuqaddd_s64 (int64x1_t __a, uint64x1_t __b)
{ {
return (uint32x2x2_t) {vtrn1_u32 (a, b), vtrn2_u32 (a, b)}; return (int64x1_t) __builtin_aarch64_suqadddi (__a, (int64x1_t) __b);
} }
__extension__ static __inline float32x4x2_t __attribute__ ((__always_inline__)) #define __DEFINTERLEAVE(op, rettype, intype, funcsuffix, Q) \
vtrnq_f32 (float32x4_t a, float32x4_t b) __extension__ static __inline rettype \
__attribute__ ((__always_inline__)) \
v ## op ## Q ## _ ## funcsuffix (intype a, intype b) \
{ \
return (rettype) {v ## op ## 1 ## Q ## _ ## funcsuffix (a, b), \
v ## op ## 2 ## Q ## _ ## funcsuffix (a, b)}; \
}
#define __INTERLEAVE_LIST(op) \
__DEFINTERLEAVE (op, float32x2x2_t, float32x2_t, f32,) \
__DEFINTERLEAVE (op, poly8x8x2_t, poly8x8_t, p8,) \
__DEFINTERLEAVE (op, poly16x4x2_t, poly16x4_t, p16,) \
__DEFINTERLEAVE (op, int8x8x2_t, int8x8_t, s8,) \
__DEFINTERLEAVE (op, int16x4x2_t, int16x4_t, s16,) \
__DEFINTERLEAVE (op, int32x2x2_t, int32x2_t, s32,) \
__DEFINTERLEAVE (op, uint8x8x2_t, uint8x8_t, u8,) \
__DEFINTERLEAVE (op, uint16x4x2_t, uint16x4_t, u16,) \
__DEFINTERLEAVE (op, uint32x2x2_t, uint32x2_t, u32,) \
__DEFINTERLEAVE (op, float32x4x2_t, float32x4_t, f32, q) \
__DEFINTERLEAVE (op, poly8x16x2_t, poly8x16_t, p8, q) \
__DEFINTERLEAVE (op, poly16x8x2_t, poly16x8_t, p16, q) \
__DEFINTERLEAVE (op, int8x16x2_t, int8x16_t, s8, q) \
__DEFINTERLEAVE (op, int16x8x2_t, int16x8_t, s16, q) \
__DEFINTERLEAVE (op, int32x4x2_t, int32x4_t, s32, q) \
__DEFINTERLEAVE (op, uint8x16x2_t, uint8x16_t, u8, q) \
__DEFINTERLEAVE (op, uint16x8x2_t, uint16x8_t, u16, q) \
__DEFINTERLEAVE (op, uint32x4x2_t, uint32x4_t, u32, q)
/* vuzp */
__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
vuzp1_f32 (float32x2_t __a, float32x2_t __b)
{ {
return (float32x4x2_t) {vtrn1q_f32 (a, b), vtrn2q_f32 (a, b)}; #ifdef __AARCH64EB__
return __builtin_shuffle (__a, __b, (uint32x2_t) {3, 1});
#else
return __builtin_shuffle (__a, __b, (uint32x2_t) {0, 2});
#endif
} }
__extension__ static __inline poly8x16x2_t __attribute__ ((__always_inline__)) __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
vtrnq_p8 (poly8x16_t a, poly8x16_t b) vuzp1_p8 (poly8x8_t __a, poly8x8_t __b)
{ {
return (poly8x16x2_t) {vtrn1q_p8 (a, b), vtrn2q_p8 (a, b)}; #ifdef __AARCH64EB__
return __builtin_shuffle (__a, __b, (uint8x8_t) {9, 11, 13, 15, 1, 3, 5, 7});
#else
return __builtin_shuffle (__a, __b, (uint8x8_t) {0, 2, 4, 6, 8, 10, 12, 14});
#endif
} }
__extension__ static __inline poly16x8x2_t __attribute__ ((__always_inline__)) __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
vtrnq_p16 (poly16x8_t a, poly16x8_t b) vuzp1_p16 (poly16x4_t __a, poly16x4_t __b)
{ {
return (poly16x8x2_t) {vtrn1q_p16 (a, b), vtrn2q_p16 (a, b)}; #ifdef __AARCH64EB__
return __builtin_shuffle (__a, __b, (uint16x4_t) {5, 7, 1, 3});
#else
return __builtin_shuffle (__a, __b, (uint16x4_t) {0, 2, 4, 6});
#endif
} }
__extension__ static __inline int8x16x2_t __attribute__ ((__always_inline__)) __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
vtrnq_s8 (int8x16_t a, int8x16_t b) vuzp1_s8 (int8x8_t __a, int8x8_t __b)
{ {
return (int8x16x2_t) {vtrn1q_s8 (a, b), vtrn2q_s8 (a, b)}; #ifdef __AARCH64EB__
return __builtin_shuffle (__a, __b, (uint8x8_t) {9, 11, 13, 15, 1, 3, 5, 7});
#else
return __builtin_shuffle (__a, __b, (uint8x8_t) {0, 2, 4, 6, 8, 10, 12, 14});
#endif
} }
__extension__ static __inline int16x8x2_t __attribute__ ((__always_inline__)) __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
vtrnq_s16 (int16x8_t a, int16x8_t b) vuzp1_s16 (int16x4_t __a, int16x4_t __b)
{ {
return (int16x8x2_t) {vtrn1q_s16 (a, b), vtrn2q_s16 (a, b)}; #ifdef __AARCH64EB__
return __builtin_shuffle (__a, __b, (uint16x4_t) {5, 7, 1, 3});
#else
return __builtin_shuffle (__a, __b, (uint16x4_t) {0, 2, 4, 6});
#endif
} }
__extension__ static __inline int32x4x2_t __attribute__ ((__always_inline__)) __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
vtrnq_s32 (int32x4_t a, int32x4_t b) vuzp1_s32 (int32x2_t __a, int32x2_t __b)
{ {
return (int32x4x2_t) {vtrn1q_s32 (a, b), vtrn2q_s32 (a, b)}; #ifdef __AARCH64EB__
return __builtin_shuffle (__a, __b, (uint32x2_t) {3, 1});
#else
return __builtin_shuffle (__a, __b, (uint32x2_t) {0, 2});
#endif
} }
__extension__ static __inline uint8x16x2_t __attribute__ ((__always_inline__)) __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vtrnq_u8 (uint8x16_t a, uint8x16_t b) vuzp1_u8 (uint8x8_t __a, uint8x8_t __b)
{ {
return (uint8x16x2_t) {vtrn1q_u8 (a, b), vtrn2q_u8 (a, b)}; #ifdef __AARCH64EB__
return __builtin_shuffle (__a, __b, (uint8x8_t) {9, 11, 13, 15, 1, 3, 5, 7});
#else
return __builtin_shuffle (__a, __b, (uint8x8_t) {0, 2, 4, 6, 8, 10, 12, 14});
#endif
} }
__extension__ static __inline uint16x8x2_t __attribute__ ((__always_inline__)) __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
vtrnq_u16 (uint16x8_t a, uint16x8_t b) vuzp1_u16 (uint16x4_t __a, uint16x4_t __b)
{ {
return (uint16x8x2_t) {vtrn1q_u16 (a, b), vtrn2q_u16 (a, b)}; #ifdef __AARCH64EB__
return __builtin_shuffle (__a, __b, (uint16x4_t) {5, 7, 1, 3});
#else
return __builtin_shuffle (__a, __b, (uint16x4_t) {0, 2, 4, 6});
#endif
} }
__extension__ static __inline uint32x4x2_t __attribute__ ((__always_inline__)) __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vtrnq_u32 (uint32x4_t a, uint32x4_t b) vuzp1_u32 (uint32x2_t __a, uint32x2_t __b)
{ {
return (uint32x4x2_t) {vtrn1q_u32 (a, b), vtrn2q_u32 (a, b)}; #ifdef __AARCH64EB__
return __builtin_shuffle (__a, __b, (uint32x2_t) {3, 1});
#else
return __builtin_shuffle (__a, __b, (uint32x2_t) {0, 2});
#endif
} }
/* vtst */ __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
vuzp1q_f32 (float32x4_t __a, float32x4_t __b)
__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vtst_s8 (int8x8_t __a, int8x8_t __b)
{ {
return (uint8x8_t) __builtin_aarch64_cmtstv8qi (__a, __b); #ifdef __AARCH64EB__
return __builtin_shuffle (__a, __b, (uint32x4_t) {5, 7, 1, 3});
#else
return __builtin_shuffle (__a, __b, (uint32x4_t) {0, 2, 4, 6});
#endif
} }
__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
vtst_s16 (int16x4_t __a, int16x4_t __b) vuzp1q_f64 (float64x2_t __a, float64x2_t __b)
{ {
return (uint16x4_t) __builtin_aarch64_cmtstv4hi (__a, __b); #ifdef __AARCH64EB__
return __builtin_shuffle (__a, __b, (uint64x2_t) {3, 1});
#else
return __builtin_shuffle (__a, __b, (uint64x2_t) {0, 2});
#endif
} }
__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
vtst_s32 (int32x2_t __a, int32x2_t __b) vuzp1q_p8 (poly8x16_t __a, poly8x16_t __b)
{ {
return (uint32x2_t) __builtin_aarch64_cmtstv2si (__a, __b); #ifdef __AARCH64EB__
return __builtin_shuffle (__a, __b, (uint8x16_t)
{17, 19, 21, 23, 25, 27, 29, 31, 1, 3, 5, 7, 9, 11, 13, 15});
#else
return __builtin_shuffle (__a, __b, (uint8x16_t)
{0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30});
#endif
} }
__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
vtst_s64 (int64x1_t __a, int64x1_t __b) vuzp1q_p16 (poly16x8_t __a, poly16x8_t __b)
{ {
return (__a & __b) ? -1ll : 0ll; #ifdef __AARCH64EB__
return __builtin_shuffle (__a, __b, (uint16x8_t) {9, 11, 13, 15, 1, 3, 5, 7});
#else
return __builtin_shuffle (__a, __b, (uint16x8_t) {0, 2, 4, 6, 8, 10, 12, 14});
#endif
} }
__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
vtst_u8 (uint8x8_t __a, uint8x8_t __b) vuzp1q_s8 (int8x16_t __a, int8x16_t __b)
{ {
return (uint8x8_t) __builtin_aarch64_cmtstv8qi ((int8x8_t) __a, #ifdef __AARCH64EB__
(int8x8_t) __b); return __builtin_shuffle (__a, __b,
(uint8x16_t) {17, 19, 21, 23, 25, 27, 29, 31, 1, 3, 5, 7, 9, 11, 13, 15});
#else
return __builtin_shuffle (__a, __b,
(uint8x16_t) {0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30});
#endif
} }
__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
vtst_u16 (uint16x4_t __a, uint16x4_t __b) vuzp1q_s16 (int16x8_t __a, int16x8_t __b)
{ {
return (uint16x4_t) __builtin_aarch64_cmtstv4hi ((int16x4_t) __a, #ifdef __AARCH64EB__
(int16x4_t) __b); return __builtin_shuffle (__a, __b, (uint16x8_t) {9, 11, 13, 15, 1, 3, 5, 7});
#else
return __builtin_shuffle (__a, __b, (uint16x8_t) {0, 2, 4, 6, 8, 10, 12, 14});
#endif
} }
__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vtst_u32 (uint32x2_t __a, uint32x2_t __b) vuzp1q_s32 (int32x4_t __a, int32x4_t __b)
{ {
return (uint32x2_t) __builtin_aarch64_cmtstv2si ((int32x2_t) __a, #ifdef __AARCH64EB__
(int32x2_t) __b); return __builtin_shuffle (__a, __b, (uint32x4_t) {5, 7, 1, 3});
#else
return __builtin_shuffle (__a, __b, (uint32x4_t) {0, 2, 4, 6});
#endif
} }
__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
vtst_u64 (uint64x1_t __a, uint64x1_t __b) vuzp1q_s64 (int64x2_t __a, int64x2_t __b)
{ {
return (__a & __b) ? -1ll : 0ll; #ifdef __AARCH64EB__
return __builtin_shuffle (__a, __b, (uint64x2_t) {3, 1});
#else
return __builtin_shuffle (__a, __b, (uint64x2_t) {0, 2});
#endif
} }
__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
vtstq_s8 (int8x16_t __a, int8x16_t __b) vuzp1q_u8 (uint8x16_t __a, uint8x16_t __b)
{ {
return (uint8x16_t) __builtin_aarch64_cmtstv16qi (__a, __b); #ifdef __AARCH64EB__
return __builtin_shuffle (__a, __b,
(uint8x16_t) {17, 19, 21, 23, 25, 27, 29, 31, 1, 3, 5, 7, 9, 11, 13, 15});
#else
return __builtin_shuffle (__a, __b,
(uint8x16_t) {0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30});
#endif
} }
__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vtstq_s16 (int16x8_t __a, int16x8_t __b) vuzp1q_u16 (uint16x8_t __a, uint16x8_t __b)
{ {
return (uint16x8_t) __builtin_aarch64_cmtstv8hi (__a, __b); #ifdef __AARCH64EB__
return __builtin_shuffle (__a, __b, (uint16x8_t) {9, 11, 13, 15, 1, 3, 5, 7});
#else
return __builtin_shuffle (__a, __b, (uint16x8_t) {0, 2, 4, 6, 8, 10, 12, 14});
#endif
} }
__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vtstq_s32 (int32x4_t __a, int32x4_t __b) vuzp1q_u32 (uint32x4_t __a, uint32x4_t __b)
{ {
return (uint32x4_t) __builtin_aarch64_cmtstv4si (__a, __b); #ifdef __AARCH64EB__
return __builtin_shuffle (__a, __b, (uint32x4_t) {5, 7, 1, 3});
#else
return __builtin_shuffle (__a, __b, (uint32x4_t) {0, 2, 4, 6});
#endif
} }
__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
vtstq_s64 (int64x2_t __a, int64x2_t __b) vuzp1q_u64 (uint64x2_t __a, uint64x2_t __b)
{ {
return (uint64x2_t) __builtin_aarch64_cmtstv2di (__a, __b); #ifdef __AARCH64EB__
return __builtin_shuffle (__a, __b, (uint64x2_t) {3, 1});
#else
return __builtin_shuffle (__a, __b, (uint64x2_t) {0, 2});
#endif
} }
__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
vtstq_u8 (uint8x16_t __a, uint8x16_t __b) vuzp2_f32 (float32x2_t __a, float32x2_t __b)
{ {
return (uint8x16_t) __builtin_aarch64_cmtstv16qi ((int8x16_t) __a, #ifdef __AARCH64EB__
(int8x16_t) __b); return __builtin_shuffle (__a, __b, (uint32x2_t) {2, 0});
#else
return __builtin_shuffle (__a, __b, (uint32x2_t) {1, 3});
#endif
} }
__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
vtstq_u16 (uint16x8_t __a, uint16x8_t __b) vuzp2_p8 (poly8x8_t __a, poly8x8_t __b)
{ {
return (uint16x8_t) __builtin_aarch64_cmtstv8hi ((int16x8_t) __a, #ifdef __AARCH64EB__
(int16x8_t) __b); return __builtin_shuffle (__a, __b, (uint8x8_t) {8, 10, 12, 14, 0, 2, 4, 6});
#else
return __builtin_shuffle (__a, __b, (uint8x8_t) {1, 3, 5, 7, 9, 11, 13, 15});
#endif
} }
__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
vtstq_u32 (uint32x4_t __a, uint32x4_t __b) vuzp2_p16 (poly16x4_t __a, poly16x4_t __b)
{ {
return (uint32x4_t) __builtin_aarch64_cmtstv4si ((int32x4_t) __a, #ifdef __AARCH64EB__
(int32x4_t) __b); return __builtin_shuffle (__a, __b, (uint16x4_t) {4, 6, 0, 2});
#else
return __builtin_shuffle (__a, __b, (uint16x4_t) {1, 3, 5, 7});
#endif
} }
__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
vtstq_u64 (uint64x2_t __a, uint64x2_t __b) vuzp2_s8 (int8x8_t __a, int8x8_t __b)
{ {
return (uint64x2_t) __builtin_aarch64_cmtstv2di ((int64x2_t) __a, #ifdef __AARCH64EB__
(int64x2_t) __b); return __builtin_shuffle (__a, __b, (uint8x8_t) {8, 10, 12, 14, 0, 2, 4, 6});
#else
return __builtin_shuffle (__a, __b, (uint8x8_t) {1, 3, 5, 7, 9, 11, 13, 15});
#endif
} }
__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
vtstd_s64 (int64x1_t __a, int64x1_t __b) vuzp2_s16 (int16x4_t __a, int16x4_t __b)
{ {
return (__a & __b) ? -1ll : 0ll; #ifdef __AARCH64EB__
return __builtin_shuffle (__a, __b, (uint16x4_t) {4, 6, 0, 2});
#else
return __builtin_shuffle (__a, __b, (uint16x4_t) {1, 3, 5, 7});
#endif
} }
__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
vtstd_u64 (uint64x1_t __a, uint64x1_t __b) vuzp2_s32 (int32x2_t __a, int32x2_t __b)
{ {
return (__a & __b) ? -1ll : 0ll; #ifdef __AARCH64EB__
return __builtin_shuffle (__a, __b, (uint32x2_t) {2, 0});
#else
return __builtin_shuffle (__a, __b, (uint32x2_t) {1, 3});
#endif
} }
/* vuqadd */ __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vuzp2_u8 (uint8x8_t __a, uint8x8_t __b)
{
#ifdef __AARCH64EB__
return __builtin_shuffle (__a, __b, (uint8x8_t) {8, 10, 12, 14, 0, 2, 4, 6});
#else
return __builtin_shuffle (__a, __b, (uint8x8_t) {1, 3, 5, 7, 9, 11, 13, 15});
#endif
}
__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
vuqadd_s8 (int8x8_t __a, uint8x8_t __b) vuzp2_u16 (uint16x4_t __a, uint16x4_t __b)
{ {
return (int8x8_t) __builtin_aarch64_suqaddv8qi (__a, (int8x8_t) __b); #ifdef __AARCH64EB__
return __builtin_shuffle (__a, __b, (uint16x4_t) {4, 6, 0, 2});
#else
return __builtin_shuffle (__a, __b, (uint16x4_t) {1, 3, 5, 7});
#endif
} }
__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vuqadd_s16 (int16x4_t __a, uint16x4_t __b) vuzp2_u32 (uint32x2_t __a, uint32x2_t __b)
{ {
return (int16x4_t) __builtin_aarch64_suqaddv4hi (__a, (int16x4_t) __b); #ifdef __AARCH64EB__
return __builtin_shuffle (__a, __b, (uint32x2_t) {2, 0});
#else
return __builtin_shuffle (__a, __b, (uint32x2_t) {1, 3});
#endif
} }
__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
vuqadd_s32 (int32x2_t __a, uint32x2_t __b) vuzp2q_f32 (float32x4_t __a, float32x4_t __b)
{ {
return (int32x2_t) __builtin_aarch64_suqaddv2si (__a, (int32x2_t) __b); #ifdef __AARCH64EB__
return __builtin_shuffle (__a, __b, (uint32x4_t) {4, 6, 0, 2});
#else
return __builtin_shuffle (__a, __b, (uint32x4_t) {1, 3, 5, 7});
#endif
} }
__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
vuqadd_s64 (int64x1_t __a, uint64x1_t __b) vuzp2q_f64 (float64x2_t __a, float64x2_t __b)
{ {
return (int64x1_t) __builtin_aarch64_suqadddi (__a, (int64x1_t) __b); #ifdef __AARCH64EB__
return __builtin_shuffle (__a, __b, (uint64x2_t) {2, 0});
#else
return __builtin_shuffle (__a, __b, (uint64x2_t) {1, 3});
#endif
}
__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
vuzp2q_p8 (poly8x16_t __a, poly8x16_t __b)
{
#ifdef __AARCH64EB__
return __builtin_shuffle (__a, __b,
(uint8x16_t) {16, 18, 20, 22, 24, 26, 28, 30, 0, 2, 4, 6, 8, 10, 12, 14});
#else
return __builtin_shuffle (__a, __b,
(uint8x16_t) {1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31});
#endif
}
__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
vuzp2q_p16 (poly16x8_t __a, poly16x8_t __b)
{
#ifdef __AARCH64EB__
return __builtin_shuffle (__a, __b, (uint16x8_t) {8, 10, 12, 14, 0, 2, 4, 6});
#else
return __builtin_shuffle (__a, __b, (uint16x8_t) {1, 3, 5, 7, 9, 11, 13, 15});
#endif
} }
__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
vuqaddq_s8 (int8x16_t __a, uint8x16_t __b) vuzp2q_s8 (int8x16_t __a, int8x16_t __b)
{ {
return (int8x16_t) __builtin_aarch64_suqaddv16qi (__a, (int8x16_t) __b); #ifdef __AARCH64EB__
return __builtin_shuffle (__a, __b,
(uint8x16_t) {16, 18, 20, 22, 24, 26, 28, 30, 0, 2, 4, 6, 8, 10, 12, 14});
#else
return __builtin_shuffle (__a, __b,
(uint8x16_t) {1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31});
#endif
} }
__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
vuqaddq_s16 (int16x8_t __a, uint16x8_t __b) vuzp2q_s16 (int16x8_t __a, int16x8_t __b)
{ {
return (int16x8_t) __builtin_aarch64_suqaddv8hi (__a, (int16x8_t) __b); #ifdef __AARCH64EB__
return __builtin_shuffle (__a, __b, (uint16x8_t) {8, 10, 12, 14, 0, 2, 4, 6});
#else
return __builtin_shuffle (__a, __b, (uint16x8_t) {1, 3, 5, 7, 9, 11, 13, 15});
#endif
} }
__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vuqaddq_s32 (int32x4_t __a, uint32x4_t __b) vuzp2q_s32 (int32x4_t __a, int32x4_t __b)
{ {
return (int32x4_t) __builtin_aarch64_suqaddv4si (__a, (int32x4_t) __b); #ifdef __AARCH64EB__
return __builtin_shuffle (__a, __b, (uint32x4_t) {4, 6, 0, 2});
#else
return __builtin_shuffle (__a, __b, (uint32x4_t) {1, 3, 5, 7});
#endif
} }
__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
vuqaddq_s64 (int64x2_t __a, uint64x2_t __b) vuzp2q_s64 (int64x2_t __a, int64x2_t __b)
{ {
return (int64x2_t) __builtin_aarch64_suqaddv2di (__a, (int64x2_t) __b); #ifdef __AARCH64EB__
return __builtin_shuffle (__a, __b, (uint64x2_t) {2, 0});
#else
return __builtin_shuffle (__a, __b, (uint64x2_t) {1, 3});
#endif
} }
__extension__ static __inline int8x1_t __attribute__ ((__always_inline__)) __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
vuqaddb_s8 (int8x1_t __a, uint8x1_t __b) vuzp2q_u8 (uint8x16_t __a, uint8x16_t __b)
{ {
return (int8x1_t) __builtin_aarch64_suqaddqi (__a, (int8x1_t) __b); #ifdef __AARCH64EB__
return __builtin_shuffle (__a, __b, (uint8x16_t)
{16, 18, 20, 22, 24, 26, 28, 30, 0, 2, 4, 6, 8, 10, 12, 14});
#else
return __builtin_shuffle (__a, __b, (uint8x16_t)
{1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31});
#endif
} }
__extension__ static __inline int16x1_t __attribute__ ((__always_inline__)) __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vuqaddh_s16 (int16x1_t __a, uint16x1_t __b) vuzp2q_u16 (uint16x8_t __a, uint16x8_t __b)
{ {
return (int16x1_t) __builtin_aarch64_suqaddhi (__a, (int16x1_t) __b); #ifdef __AARCH64EB__
return __builtin_shuffle (__a, __b, (uint16x8_t) {8, 10, 12, 14, 0, 2, 4, 6});
#else
return __builtin_shuffle (__a, __b, (uint16x8_t) {1, 3, 5, 7, 9, 11, 13, 15});
#endif
} }
__extension__ static __inline int32x1_t __attribute__ ((__always_inline__)) __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vuqadds_s32 (int32x1_t __a, uint32x1_t __b) vuzp2q_u32 (uint32x4_t __a, uint32x4_t __b)
{ {
return (int32x1_t) __builtin_aarch64_suqaddsi (__a, (int32x1_t) __b); #ifdef __AARCH64EB__
return __builtin_shuffle (__a, __b, (uint32x4_t) {4, 6, 0, 2});
#else
return __builtin_shuffle (__a, __b, (uint32x4_t) {1, 3, 5, 7});
#endif
} }
__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
vuqaddd_s64 (int64x1_t __a, uint64x1_t __b) vuzp2q_u64 (uint64x2_t __a, uint64x2_t __b)
{ {
return (int64x1_t) __builtin_aarch64_suqadddi (__a, (int64x1_t) __b); #ifdef __AARCH64EB__
return __builtin_shuffle (__a, __b, (uint64x2_t) {2, 0});
#else
return __builtin_shuffle (__a, __b, (uint64x2_t) {1, 3});
#endif
} }
#define __DEFINTERLEAVE(op, rettype, intype, funcsuffix, Q) \
__extension__ static __inline rettype \
__attribute__ ((__always_inline__)) \
v ## op ## Q ## _ ## funcsuffix (intype a, intype b) \
{ \
return (rettype) {v ## op ## 1 ## Q ## _ ## funcsuffix (a, b), \
v ## op ## 2 ## Q ## _ ## funcsuffix (a, b)}; \
}
#define __INTERLEAVE_LIST(op) \
__DEFINTERLEAVE (op, float32x2x2_t, float32x2_t, f32,) \
__DEFINTERLEAVE (op, poly8x8x2_t, poly8x8_t, p8,) \
__DEFINTERLEAVE (op, poly16x4x2_t, poly16x4_t, p16,) \
__DEFINTERLEAVE (op, int8x8x2_t, int8x8_t, s8,) \
__DEFINTERLEAVE (op, int16x4x2_t, int16x4_t, s16,) \
__DEFINTERLEAVE (op, int32x2x2_t, int32x2_t, s32,) \
__DEFINTERLEAVE (op, uint8x8x2_t, uint8x8_t, u8,) \
__DEFINTERLEAVE (op, uint16x4x2_t, uint16x4_t, u16,) \
__DEFINTERLEAVE (op, uint32x2x2_t, uint32x2_t, u32,) \
__DEFINTERLEAVE (op, float32x4x2_t, float32x4_t, f32, q) \
__DEFINTERLEAVE (op, poly8x16x2_t, poly8x16_t, p8, q) \
__DEFINTERLEAVE (op, poly16x8x2_t, poly16x8_t, p16, q) \
__DEFINTERLEAVE (op, int8x16x2_t, int8x16_t, s8, q) \
__DEFINTERLEAVE (op, int16x8x2_t, int16x8_t, s16, q) \
__DEFINTERLEAVE (op, int32x4x2_t, int32x4_t, s32, q) \
__DEFINTERLEAVE (op, uint8x16x2_t, uint8x16_t, u8, q) \
__DEFINTERLEAVE (op, uint16x8x2_t, uint16x8_t, u16, q) \
__DEFINTERLEAVE (op, uint32x4x2_t, uint32x4_t, u32, q)
/* vuzp */
__INTERLEAVE_LIST (uzp) __INTERLEAVE_LIST (uzp)
/* vzip */ /* vzip */
......
2014-04-30 Alan Lawrence <alan.lawrence@arm.com> 2014-04-30 Alan Lawrence <alan.lawrence@arm.com>
* gcc.target/aarch64/vuzps32_1.c: Expect zip1/2 insn rather than uzp1/2.
* gcc.target/aarch64/vuzpu32_1.c: Likewise.
* gcc.target/aarch64/vuzpf32_1.c: Likewise.
2014-04-30 Alan Lawrence <alan.lawrence@arm.com>
* gcc.target/aarch64/simd/vuzpf32_1.c: New file. * gcc.target/aarch64/simd/vuzpf32_1.c: New file.
* gcc.target/aarch64/simd/vuzpf32.x: New file. * gcc.target/aarch64/simd/vuzpf32.x: New file.
* gcc.target/aarch64/simd/vuzpp16_1.c: New file. * gcc.target/aarch64/simd/vuzpp16_1.c: New file.
......
...@@ -6,6 +6,6 @@ ...@@ -6,6 +6,6 @@
#include <arm_neon.h> #include <arm_neon.h>
#include "vuzpf32.x" #include "vuzpf32.x"
/* { dg-final { scan-assembler-times "uzp1\[ \t\]+v\[0-9\]+\.2s, ?v\[0-9\]+\.2s, ?v\[0-9\]+\.2s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ /* { dg-final { scan-assembler-times "zip1\[ \t\]+v\[0-9\]+\.2s, ?v\[0-9\]+\.2s, ?v\[0-9\]+\.2s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
/* { dg-final { scan-assembler-times "uzp2\[ \t\]+v\[0-9\]+\.2s, ?v\[0-9\]+\.2s, ?v\[0-9\]+\.2s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ /* { dg-final { scan-assembler-times "zip2\[ \t\]+v\[0-9\]+\.2s, ?v\[0-9\]+\.2s, ?v\[0-9\]+\.2s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
/* { dg-final { cleanup-saved-temps } } */ /* { dg-final { cleanup-saved-temps } } */
...@@ -6,6 +6,6 @@ ...@@ -6,6 +6,6 @@
#include <arm_neon.h> #include <arm_neon.h>
#include "vuzps32.x" #include "vuzps32.x"
/* { dg-final { scan-assembler-times "uzp1\[ \t\]+v\[0-9\]+\.2s, ?v\[0-9\]+\.2s, ?v\[0-9\]+\.2s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ /* { dg-final { scan-assembler-times "zip1\[ \t\]+v\[0-9\]+\.2s, ?v\[0-9\]+\.2s, ?v\[0-9\]+\.2s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
/* { dg-final { scan-assembler-times "uzp2\[ \t\]+v\[0-9\]+\.2s, ?v\[0-9\]+\.2s, ?v\[0-9\]+\.2s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ /* { dg-final { scan-assembler-times "zip2\[ \t\]+v\[0-9\]+\.2s, ?v\[0-9\]+\.2s, ?v\[0-9\]+\.2s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
/* { dg-final { cleanup-saved-temps } } */ /* { dg-final { cleanup-saved-temps } } */
...@@ -6,6 +6,6 @@ ...@@ -6,6 +6,6 @@
#include <arm_neon.h> #include <arm_neon.h>
#include "vuzpu32.x" #include "vuzpu32.x"
/* { dg-final { scan-assembler-times "uzp1\[ \t\]+v\[0-9\]+\.2s, ?v\[0-9\]+\.2s, ?v\[0-9\]+\.2s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ /* { dg-final { scan-assembler-times "zip1\[ \t\]+v\[0-9\]+\.2s, ?v\[0-9\]+\.2s, ?v\[0-9\]+\.2s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
/* { dg-final { scan-assembler-times "uzp2\[ \t\]+v\[0-9\]+\.2s, ?v\[0-9\]+\.2s, ?v\[0-9\]+\.2s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ /* { dg-final { scan-assembler-times "zip2\[ \t\]+v\[0-9\]+\.2s, ?v\[0-9\]+\.2s, ?v\[0-9\]+\.2s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
/* { dg-final { cleanup-saved-temps } } */ /* { dg-final { cleanup-saved-temps } } */
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment