Commit 3dfa8071 by Kyrylo Tkachov Committed by Kyrylo Tkachov

[AArch64][2/2] (Re)Implement vcopy<q>_lane<q> intrinsics

2016-06-30  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>
            James Greenhalgh  <james.greenhalgh@arm.com>

	* config/aarch64/arm_neon.h (vcopyq_lane_f32, vcopyq_lane_f64,
	vcopyq_lane_p8, vcopyq_lane_p16, vcopyq_lane_s8, vcopyq_lane_s16,
	vcopyq_lane_s32, vcopyq_lane_s64, vcopyq_lane_u8, vcopyq_lane_u16,
	vcopyq_lane_u32, vcopyq_lane_u64): Reimplement in C.
	(vcopy_lane_f32, vcopy_lane_f64, vcopy_lane_p8, vcopy_lane_p16,
	vcopy_lane_s8, vcopy_lane_s16, vcopy_lane_s32, vcopy_lane_s64,
	vcopy_lane_u8, vcopy_lane_u16, vcopy_lane_u32, vcopy_lane_u64,
	vcopy_laneq_f32, vcopy_laneq_f64, vcopy_laneq_p8, vcopy_laneq_p16,
	vcopy_laneq_s8, vcopy_laneq_s16, vcopy_laneq_s32, vcopy_laneq_s64,
	vcopy_laneq_u8, vcopy_laneq_u16, vcopy_laneq_u32, vcopy_laneq_u64,
	vcopyq_laneq_f32, vcopyq_laneq_f64, vcopyq_laneq_p8, vcopyq_laneq_p16,
	vcopyq_laneq_s8, vcopyq_laneq_s16, vcopyq_laneq_s32, vcopyq_laneq_s64,
	vcopyq_laneq_u8, vcopyq_laneq_u16, vcopyq_laneq_u32, vcopyq_laneq_u64):
	New intrinsics.

	* gcc.target/aarch64/vect_copy_lane_1.c: New test.


Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com>

From-SVN: r237883
parent 9bd62242
2016-06-30 Kyrylo Tkachov <kyrylo.tkachov@arm.com>
James Greenhalgh <james.greenhalgh@arm.com>
* config/aarch64/arm_neon.h (vcopyq_lane_f32, vcopyq_lane_f64,
vcopyq_lane_p8, vcopyq_lane_p16, vcopyq_lane_s8, vcopyq_lane_s16,
vcopyq_lane_s32, vcopyq_lane_s64, vcopyq_lane_u8, vcopyq_lane_u16,
vcopyq_lane_u32, vcopyq_lane_u64): Reimplement in C.
(vcopy_lane_f32, vcopy_lane_f64, vcopy_lane_p8, vcopy_lane_p16,
vcopy_lane_s8, vcopy_lane_s16, vcopy_lane_s32, vcopy_lane_s64,
vcopy_lane_u8, vcopy_lane_u16, vcopy_lane_u32, vcopy_lane_u64,
vcopy_laneq_f32, vcopy_laneq_f64, vcopy_laneq_p8, vcopy_laneq_p16,
vcopy_laneq_s8, vcopy_laneq_s16, vcopy_laneq_s32, vcopy_laneq_s64,
vcopy_laneq_u8, vcopy_laneq_u16, vcopy_laneq_u32, vcopy_laneq_u64,
vcopyq_laneq_f32, vcopyq_laneq_f64, vcopyq_laneq_p8, vcopyq_laneq_p16,
vcopyq_laneq_s8, vcopyq_laneq_s16, vcopyq_laneq_s32, vcopyq_laneq_s64,
vcopyq_laneq_u8, vcopyq_laneq_u16, vcopyq_laneq_u32, vcopyq_laneq_u64):
New intrinsics.
2016-06-30 James Greenhalgh <james.greenhalgh@arm.com>
Kyrylo Tkachov <kyrylo.tkachov@arm.com>
......
......@@ -5814,162 +5814,6 @@ vaddlvq_u32 (uint32x4_t a)
return result;
}
#define vcopyq_lane_f32(a, b, c, d) \
__extension__ \
({ \
float32x4_t c_ = (c); \
float32x4_t a_ = (a); \
float32x4_t result; \
__asm__ ("ins %0.s[%2], %3.s[%4]" \
: "=w"(result) \
: "0"(a_), "i"(b), "w"(c_), "i"(d) \
: /* No clobbers */); \
result; \
})
#define vcopyq_lane_f64(a, b, c, d) \
__extension__ \
({ \
float64x2_t c_ = (c); \
float64x2_t a_ = (a); \
float64x2_t result; \
__asm__ ("ins %0.d[%2], %3.d[%4]" \
: "=w"(result) \
: "0"(a_), "i"(b), "w"(c_), "i"(d) \
: /* No clobbers */); \
result; \
})
#define vcopyq_lane_p8(a, b, c, d) \
__extension__ \
({ \
poly8x16_t c_ = (c); \
poly8x16_t a_ = (a); \
poly8x16_t result; \
__asm__ ("ins %0.b[%2], %3.b[%4]" \
: "=w"(result) \
: "0"(a_), "i"(b), "w"(c_), "i"(d) \
: /* No clobbers */); \
result; \
})
#define vcopyq_lane_p16(a, b, c, d) \
__extension__ \
({ \
poly16x8_t c_ = (c); \
poly16x8_t a_ = (a); \
poly16x8_t result; \
__asm__ ("ins %0.h[%2], %3.h[%4]" \
: "=w"(result) \
: "0"(a_), "i"(b), "w"(c_), "i"(d) \
: /* No clobbers */); \
result; \
})
#define vcopyq_lane_s8(a, b, c, d) \
__extension__ \
({ \
int8x16_t c_ = (c); \
int8x16_t a_ = (a); \
int8x16_t result; \
__asm__ ("ins %0.b[%2], %3.b[%4]" \
: "=w"(result) \
: "0"(a_), "i"(b), "w"(c_), "i"(d) \
: /* No clobbers */); \
result; \
})
#define vcopyq_lane_s16(a, b, c, d) \
__extension__ \
({ \
int16x8_t c_ = (c); \
int16x8_t a_ = (a); \
int16x8_t result; \
__asm__ ("ins %0.h[%2], %3.h[%4]" \
: "=w"(result) \
: "0"(a_), "i"(b), "w"(c_), "i"(d) \
: /* No clobbers */); \
result; \
})
#define vcopyq_lane_s32(a, b, c, d) \
__extension__ \
({ \
int32x4_t c_ = (c); \
int32x4_t a_ = (a); \
int32x4_t result; \
__asm__ ("ins %0.s[%2], %3.s[%4]" \
: "=w"(result) \
: "0"(a_), "i"(b), "w"(c_), "i"(d) \
: /* No clobbers */); \
result; \
})
#define vcopyq_lane_s64(a, b, c, d) \
__extension__ \
({ \
int64x2_t c_ = (c); \
int64x2_t a_ = (a); \
int64x2_t result; \
__asm__ ("ins %0.d[%2], %3.d[%4]" \
: "=w"(result) \
: "0"(a_), "i"(b), "w"(c_), "i"(d) \
: /* No clobbers */); \
result; \
})
#define vcopyq_lane_u8(a, b, c, d) \
__extension__ \
({ \
uint8x16_t c_ = (c); \
uint8x16_t a_ = (a); \
uint8x16_t result; \
__asm__ ("ins %0.b[%2], %3.b[%4]" \
: "=w"(result) \
: "0"(a_), "i"(b), "w"(c_), "i"(d) \
: /* No clobbers */); \
result; \
})
#define vcopyq_lane_u16(a, b, c, d) \
__extension__ \
({ \
uint16x8_t c_ = (c); \
uint16x8_t a_ = (a); \
uint16x8_t result; \
__asm__ ("ins %0.h[%2], %3.h[%4]" \
: "=w"(result) \
: "0"(a_), "i"(b), "w"(c_), "i"(d) \
: /* No clobbers */); \
result; \
})
#define vcopyq_lane_u32(a, b, c, d) \
__extension__ \
({ \
uint32x4_t c_ = (c); \
uint32x4_t a_ = (a); \
uint32x4_t result; \
__asm__ ("ins %0.s[%2], %3.s[%4]" \
: "=w"(result) \
: "0"(a_), "i"(b), "w"(c_), "i"(d) \
: /* No clobbers */); \
result; \
})
#define vcopyq_lane_u64(a, b, c, d) \
__extension__ \
({ \
uint64x2_t c_ = (c); \
uint64x2_t a_ = (a); \
uint64x2_t result; \
__asm__ ("ins %0.d[%2], %3.d[%4]" \
: "=w"(result) \
: "0"(a_), "i"(b), "w"(c_), "i"(d) \
: /* No clobbers */); \
result; \
})
__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
vcvtx_f32_f64 (float64x2_t a)
{
......@@ -12356,6 +12200,398 @@ vcntq_u8 (uint8x16_t __a)
return (uint8x16_t) __builtin_aarch64_popcountv16qi ((int8x16_t) __a);
}
/* vcopy_lane. */
__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
vcopy_lane_f32 (float32x2_t __a, const int __lane1,
float32x2_t __b, const int __lane2)
{
return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2),
__a, __lane1);
}
__extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
vcopy_lane_f64 (float64x1_t __a, const int __lane1,
float64x1_t __b, const int __lane2)
{
return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2),
__a, __lane1);
}
__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
vcopy_lane_p8 (poly8x8_t __a, const int __lane1,
poly8x8_t __b, const int __lane2)
{
return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2),
__a, __lane1);
}
__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
vcopy_lane_p16 (poly16x4_t __a, const int __lane1,
poly16x4_t __b, const int __lane2)
{
return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2),
__a, __lane1);
}
__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
vcopy_lane_s8 (int8x8_t __a, const int __lane1,
int8x8_t __b, const int __lane2)
{
return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2),
__a, __lane1);
}
__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
vcopy_lane_s16 (int16x4_t __a, const int __lane1,
int16x4_t __b, const int __lane2)
{
return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2),
__a, __lane1);
}
__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
vcopy_lane_s32 (int32x2_t __a, const int __lane1,
int32x2_t __b, const int __lane2)
{
return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2),
__a, __lane1);
}
__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
vcopy_lane_s64 (int64x1_t __a, const int __lane1,
int64x1_t __b, const int __lane2)
{
return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2),
__a, __lane1);
}
__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vcopy_lane_u8 (uint8x8_t __a, const int __lane1,
uint8x8_t __b, const int __lane2)
{
return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2),
__a, __lane1);
}
__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
vcopy_lane_u16 (uint16x4_t __a, const int __lane1,
uint16x4_t __b, const int __lane2)
{
return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2),
__a, __lane1);
}
__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vcopy_lane_u32 (uint32x2_t __a, const int __lane1,
uint32x2_t __b, const int __lane2)
{
return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2),
__a, __lane1);
}
__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
vcopy_lane_u64 (uint64x1_t __a, const int __lane1,
uint64x1_t __b, const int __lane2)
{
return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2),
__a, __lane1);
}
/* vcopy_laneq. */
__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
vcopy_laneq_f32 (float32x2_t __a, const int __lane1,
float32x4_t __b, const int __lane2)
{
return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2),
__a, __lane1);
}
__extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
vcopy_laneq_f64 (float64x1_t __a, const int __lane1,
float64x2_t __b, const int __lane2)
{
return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2),
__a, __lane1);
}
__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
vcopy_laneq_p8 (poly8x8_t __a, const int __lane1,
poly8x16_t __b, const int __lane2)
{
return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2),
__a, __lane1);
}
__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
vcopy_laneq_p16 (poly16x4_t __a, const int __lane1,
poly16x8_t __b, const int __lane2)
{
return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2),
__a, __lane1);
}
__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
vcopy_laneq_s8 (int8x8_t __a, const int __lane1,
int8x16_t __b, const int __lane2)
{
return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2),
__a, __lane1);
}
__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
vcopy_laneq_s16 (int16x4_t __a, const int __lane1,
int16x8_t __b, const int __lane2)
{
return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2),
__a, __lane1);
}
__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
vcopy_laneq_s32 (int32x2_t __a, const int __lane1,
int32x4_t __b, const int __lane2)
{
return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2),
__a, __lane1);
}
__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
vcopy_laneq_s64 (int64x1_t __a, const int __lane1,
int64x2_t __b, const int __lane2)
{
return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2),
__a, __lane1);
}
__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vcopy_laneq_u8 (uint8x8_t __a, const int __lane1,
uint8x16_t __b, const int __lane2)
{
return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2),
__a, __lane1);
}
__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
vcopy_laneq_u16 (uint16x4_t __a, const int __lane1,
uint16x8_t __b, const int __lane2)
{
return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2),
__a, __lane1);
}
__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vcopy_laneq_u32 (uint32x2_t __a, const int __lane1,
uint32x4_t __b, const int __lane2)
{
return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2),
__a, __lane1);
}
__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
vcopy_laneq_u64 (uint64x1_t __a, const int __lane1,
uint64x2_t __b, const int __lane2)
{
return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2),
__a, __lane1);
}
/* vcopyq_lane. */
__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
vcopyq_lane_f32 (float32x4_t __a, const int __lane1,
float32x2_t __b, const int __lane2)
{
return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2),
__a, __lane1);
}
__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
vcopyq_lane_f64 (float64x2_t __a, const int __lane1,
float64x1_t __b, const int __lane2)
{
return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2),
__a, __lane1);
}
__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
vcopyq_lane_p8 (poly8x16_t __a, const int __lane1,
poly8x8_t __b, const int __lane2)
{
return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2),
__a, __lane1);
}
__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
vcopyq_lane_p16 (poly16x8_t __a, const int __lane1,
poly16x4_t __b, const int __lane2)
{
return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2),
__a, __lane1);
}
__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
vcopyq_lane_s8 (int8x16_t __a, const int __lane1,
int8x8_t __b, const int __lane2)
{
return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2),
__a, __lane1);
}
__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
vcopyq_lane_s16 (int16x8_t __a, const int __lane1,
int16x4_t __b, const int __lane2)
{
return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2),
__a, __lane1);
}
__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vcopyq_lane_s32 (int32x4_t __a, const int __lane1,
int32x2_t __b, const int __lane2)
{
return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2),
__a, __lane1);
}
__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
vcopyq_lane_s64 (int64x2_t __a, const int __lane1,
int64x1_t __b, const int __lane2)
{
return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2),
__a, __lane1);
}
__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
vcopyq_lane_u8 (uint8x16_t __a, const int __lane1,
uint8x8_t __b, const int __lane2)
{
return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2),
__a, __lane1);
}
__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vcopyq_lane_u16 (uint16x8_t __a, const int __lane1,
uint16x4_t __b, const int __lane2)
{
return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2),
__a, __lane1);
}
__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vcopyq_lane_u32 (uint32x4_t __a, const int __lane1,
uint32x2_t __b, const int __lane2)
{
return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2),
__a, __lane1);
}
__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
vcopyq_lane_u64 (uint64x2_t __a, const int __lane1,
uint64x1_t __b, const int __lane2)
{
return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2),
__a, __lane1);
}
/* vcopyq_laneq. */
__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
vcopyq_laneq_f32 (float32x4_t __a, const int __lane1,
float32x4_t __b, const int __lane2)
{
return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2),
__a, __lane1);
}
__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
vcopyq_laneq_f64 (float64x2_t __a, const int __lane1,
float64x2_t __b, const int __lane2)
{
return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2),
__a, __lane1);
}
__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
vcopyq_laneq_p8 (poly8x16_t __a, const int __lane1,
poly8x16_t __b, const int __lane2)
{
return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2),
__a, __lane1);
}
__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
vcopyq_laneq_p16 (poly16x8_t __a, const int __lane1,
poly16x8_t __b, const int __lane2)
{
return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2),
__a, __lane1);
}
__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
vcopyq_laneq_s8 (int8x16_t __a, const int __lane1,
int8x16_t __b, const int __lane2)
{
return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2),
__a, __lane1);
}
__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
vcopyq_laneq_s16 (int16x8_t __a, const int __lane1,
int16x8_t __b, const int __lane2)
{
return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2),
__a, __lane1);
}
__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vcopyq_laneq_s32 (int32x4_t __a, const int __lane1,
int32x4_t __b, const int __lane2)
{
return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2),
__a, __lane1);
}
__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
vcopyq_laneq_s64 (int64x2_t __a, const int __lane1,
int64x2_t __b, const int __lane2)
{
return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2),
__a, __lane1);
}
__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
vcopyq_laneq_u8 (uint8x16_t __a, const int __lane1,
uint8x16_t __b, const int __lane2)
{
return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2),
__a, __lane1);
}
__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vcopyq_laneq_u16 (uint16x8_t __a, const int __lane1,
uint16x8_t __b, const int __lane2)
{
return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2),
__a, __lane1);
}
__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vcopyq_laneq_u32 (uint32x4_t __a, const int __lane1,
uint32x4_t __b, const int __lane2)
{
return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2),
__a, __lane1);
}
__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
vcopyq_laneq_u64 (uint64x2_t __a, const int __lane1,
uint64x2_t __b, const int __lane2)
{
return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2),
__a, __lane1);
}
/* vcvt (double -> float). */
__extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
......
2016-06-30 Kyrylo Tkachov <kyrylo.tkachov@arm.com>
James Greenhalgh <james.greenhalgh@arm.com>
* gcc.target/aarch64/vect_copy_lane_1.c: New test.
2016-06-30 James Greenhalgh <james.greenhalgh@arm.com>
Kyrylo Tkachov <kyrylo.tkachov@arm.com>
......
/* { dg-do compile } */
/* { dg-options "-O3" } */
#include "arm_neon.h"
#define BUILD_TEST(TYPE1, TYPE2, Q1, Q2, SUFFIX, INDEX1, INDEX2) \
TYPE1 __attribute__((noinline,noclone)) \
test_copy##Q1##_lane##Q2##_##SUFFIX (TYPE1 a, TYPE2 b) \
{ \
return vcopy##Q1##_lane##Q2##_##SUFFIX (a, INDEX1, b, INDEX2); \
}
/* vcopy_lane. */
BUILD_TEST (poly8x8_t, poly8x8_t, , , p8, 7, 6)
BUILD_TEST (int8x8_t, int8x8_t, , , s8, 7, 6)
BUILD_TEST (uint8x8_t, uint8x8_t, , , u8, 7, 6)
/* { dg-final { scan-assembler-times "ins\\tv0.b\\\[7\\\], v1.b\\\[6\\\]" 3 } } */
BUILD_TEST (poly16x4_t, poly16x4_t, , , p16, 3, 2)
BUILD_TEST (int16x4_t, int16x4_t, , , s16, 3, 2)
BUILD_TEST (uint16x4_t, uint16x4_t, , , u16, 3, 2)
/* { dg-final { scan-assembler-times "ins\\tv0.h\\\[3\\\], v1.h\\\[2\\\]" 3 } } */
BUILD_TEST (float32x2_t, float32x2_t, , , f32, 1, 0)
BUILD_TEST (int32x2_t, int32x2_t, , , s32, 1, 0)
BUILD_TEST (uint32x2_t, uint32x2_t, , , u32, 1, 0)
/* { dg-final { scan-assembler-times "ins\\tv0.s\\\[1\\\], v1.s\\\[0\\\]" 3 } } */
BUILD_TEST (int64x1_t, int64x1_t, , , s64, 0, 0)
BUILD_TEST (uint64x1_t, uint64x1_t, , , u64, 0, 0)
BUILD_TEST (float64x1_t, float64x1_t, , , f64, 0, 0)
/* { dg-final { scan-assembler-times "fmov\\td0, d1" 3 } } */
/* vcopy_laneq. */
BUILD_TEST (poly8x8_t, poly8x16_t, , q, p8, 7, 15)
BUILD_TEST (int8x8_t, int8x16_t, , q, s8, 7, 15)
BUILD_TEST (uint8x8_t, uint8x16_t, , q, u8, 7, 15)
/* { dg-final { scan-assembler-times "ins\\tv0.b\\\[7\\\], v1.b\\\[15\\\]" 3 } } */
BUILD_TEST (poly16x4_t, poly16x8_t, , q, p16, 3, 7)
BUILD_TEST (int16x4_t, int16x8_t, , q, s16, 3, 7)
BUILD_TEST (uint16x4_t, uint16x8_t, , q, u16, 3, 7)
/* { dg-final { scan-assembler-times "ins\\tv0.h\\\[3\\\], v1.h\\\[7\\\]" 3 } } */
BUILD_TEST (float32x2_t, float32x4_t, , q, f32, 1, 3)
BUILD_TEST (int32x2_t, int32x4_t, , q, s32, 1, 3)
BUILD_TEST (uint32x2_t, uint32x4_t, , q, u32, 1, 3)
/* { dg-final { scan-assembler-times "ins\\tv0.s\\\[1\\\], v1.s\\\[3\\\]" 3 } } */
BUILD_TEST (float64x1_t, float64x2_t, , q, f64, 0, 1)
BUILD_TEST (int64x1_t, int64x2_t, , q, s64, 0, 1)
BUILD_TEST (uint64x1_t, uint64x2_t, , q, u64, 0, 1)
/* XFAIL due to PR 71307. */
/* { dg-final { scan-assembler-times "dup\\td0, v1.d\\\[1\\\]" 3 { xfail *-*-* } } } */
/* vcopyq_lane. */
BUILD_TEST (poly8x16_t, poly8x8_t, q, , p8, 15, 7)
BUILD_TEST (int8x16_t, int8x8_t, q, , s8, 15, 7)
BUILD_TEST (uint8x16_t, uint8x8_t, q, , u8, 15, 7)
/* { dg-final { scan-assembler-times "ins\\tv0.b\\\[15\\\], v1.b\\\[7\\\]" 3 } } */
BUILD_TEST (poly16x8_t, poly16x4_t, q, , p16, 7, 3)
BUILD_TEST (int16x8_t, int16x4_t, q, , s16, 7, 3)
BUILD_TEST (uint16x8_t, uint16x4_t, q, , u16, 7, 3)
/* { dg-final { scan-assembler-times "ins\\tv0.h\\\[7\\\], v1.h\\\[3\\\]" 3 } } */
BUILD_TEST (float32x4_t, float32x2_t, q, , f32, 3, 1)
BUILD_TEST (int32x4_t, int32x2_t, q, , s32, 3, 1)
BUILD_TEST (uint32x4_t, uint32x2_t, q, , u32, 3, 1)
/* { dg-final { scan-assembler-times "ins\\tv0.s\\\[3\\\], v1.s\\\[1\\\]" 3 } } */
BUILD_TEST (float64x2_t, float64x1_t, q, , f64, 1, 0)
BUILD_TEST (int64x2_t, int64x1_t, q, , s64, 1, 0)
BUILD_TEST (uint64x2_t, uint64x1_t, q, , u64, 1, 0)
/* { dg-final { scan-assembler-times "ins\\tv0.d\\\[1\\\], v1.d\\\[0\\\]" 3 } } */
/* vcopyq_laneq. */
BUILD_TEST (poly8x16_t, poly8x16_t, q, q, p8, 14, 15)
BUILD_TEST (int8x16_t, int8x16_t, q, q, s8, 14, 15)
BUILD_TEST (uint8x16_t, uint8x16_t, q, q, u8, 14, 15)
/* { dg-final { scan-assembler-times "ins\\tv0.b\\\[14\\\], v1.b\\\[15\\\]" 3 } } */
BUILD_TEST (poly16x8_t, poly16x8_t, q, q, p16, 6, 7)
BUILD_TEST (int16x8_t, int16x8_t, q, q, s16, 6, 7)
BUILD_TEST (uint16x8_t, uint16x8_t, q, q, u16, 6, 7)
/* { dg-final { scan-assembler-times "ins\\tv0.h\\\[6\\\], v1.h\\\[7\\\]" 3 } } */
BUILD_TEST (float32x4_t, float32x4_t, q, q, f32, 2, 3)
BUILD_TEST (int32x4_t, int32x4_t, q, q, s32, 2, 3)
BUILD_TEST (uint32x4_t, uint32x4_t, q, q, u32, 2, 3)
/* { dg-final { scan-assembler-times "ins\\tv0.s\\\[2\\\], v1.s\\\[3\\\]" 3 } } */
BUILD_TEST (float64x2_t, float64x2_t, q, q, f64, 1, 1)
BUILD_TEST (int64x2_t, int64x2_t, q, q, s64, 1, 1)
BUILD_TEST (uint64x2_t, uint64x2_t, q, q, u64, 1, 1)
/* { dg-final { scan-assembler-times "ins\\tv0.d\\\[1\\\], v1.d\\\[1\\\]" 3 } } */
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment