Commit cf465d71 by Alan Lawrence Committed by Alan Lawrence

[PATCH AArch64] Add a builtin for rbit(q?)_p8; add intrinsics and tests.

gcc/:

	* config/aarch64/aarch64-simd.md (aarch64_rbit<mode>): New pattern.
	* config/aarch64/aarch64-simd-builtins.def (rbit): New builtin.
	* config/aarch64/arm_neon.h (vrbit_s8, vrbit_u8, vrbitq_s8, vrbitq_u8):
	Replace temporary asm with call to builtin.
	(vrbit_p8, vrbitq_p8): New functions.

gcc/testsuite/:

	* gcc.target/aarch64/simd/vrbit_1.c: New test.

From-SVN: r214943
parent 08c13199
2014-09-05 Alan Lawrence <alan.lawrence@arm.com>
* config/aarch64/aarch64-simd.md (aarch64_rbit<mode>): New pattern.
* config/aarch64/aarch64-simd-builtins.def (rbit): New builtin.
* config/aarch64/arm_neon.h (vrbit_s8, vrbit_u8, vrbitq_s8, vrbitq_u8):
Replace temporary asm with call to builtin.
(vrbit_p8, vrbitq_p8): New functions.
2014-09-05 Richard Biener <rguenther@suse.de>
* cfgloop.c (mark_loop_for_removal): New function.
......@@ -344,6 +344,8 @@
VAR5 (UNOPU, bswap, 10, v4hi, v8hi, v2si, v4si, v2di)
BUILTIN_VB (UNOP, rbit, 0)
/* Implemented by
aarch64_<PERMUTE:perm_insn><PERMUTE:perm_hilo><mode>. */
BUILTIN_VALL (BINOP, zip1, 0)
......
......@@ -294,6 +294,15 @@
[(set_attr "type" "neon_rev<q>")]
)
(define_insn "aarch64_rbit<mode>"
[(set (match_operand:VB 0 "register_operand" "=w")
(unspec:VB [(match_operand:VB 1 "register_operand" "w")]
UNSPEC_RBIT))]
"TARGET_SIMD"
"rbit\\t%0.<Vbtype>, %1.<Vbtype>"
[(set_attr "type" "neon_rbit")]
)
(define_insn "*aarch64_mul3_elt<mode>"
[(set (match_operand:VMUL 0 "register_operand" "=w")
(mult:VMUL
......
......@@ -10407,50 +10407,6 @@ vqrdmulhq_n_s32 (int32x4_t a, int32_t b)
result; \
})
__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
vrbit_s8 (int8x8_t a)
{
int8x8_t result;
__asm__ ("rbit %0.8b,%1.8b"
: "=w"(result)
: "w"(a)
: /* No clobbers */);
return result;
}
__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vrbit_u8 (uint8x8_t a)
{
uint8x8_t result;
__asm__ ("rbit %0.8b,%1.8b"
: "=w"(result)
: "w"(a)
: /* No clobbers */);
return result;
}
__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
vrbitq_s8 (int8x16_t a)
{
int8x16_t result;
__asm__ ("rbit %0.16b,%1.16b"
: "=w"(result)
: "w"(a)
: /* No clobbers */);
return result;
}
__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
vrbitq_u8 (uint8x16_t a)
{
uint8x16_t result;
__asm__ ("rbit %0.16b,%1.16b"
: "=w"(result)
: "w"(a)
: /* No clobbers */);
return result;
}
__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vrecpe_u32 (uint32x2_t a)
{
......@@ -20781,6 +20737,44 @@ vqsubd_u64 (uint64_t __a, uint64_t __b)
return __builtin_aarch64_uqsubdi_uuu (__a, __b);
}
/* vrbit */
__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
vrbit_p8 (poly8x8_t __a)
{
return (poly8x8_t) __builtin_aarch64_rbitv8qi ((int8x8_t) __a);
}
__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
vrbit_s8 (int8x8_t __a)
{
return __builtin_aarch64_rbitv8qi (__a);
}
__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vrbit_u8 (uint8x8_t __a)
{
return (uint8x8_t) __builtin_aarch64_rbitv8qi ((int8x8_t) __a);
}
__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
vrbitq_p8 (poly8x16_t __a)
{
return (poly8x16_t) __builtin_aarch64_rbitv16qi ((int8x16_t)__a);
}
__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
vrbitq_s8 (int8x16_t __a)
{
return __builtin_aarch64_rbitv16qi (__a);
}
__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
vrbitq_u8 (uint8x16_t __a)
{
return (uint8x16_t) __builtin_aarch64_rbitv16qi ((int8x16_t) __a);
}
/* vrecpe */
__extension__ static __inline float32_t __attribute__ ((__always_inline__))
......
2014-09-05 Alan Lawrence <alan.lawrence@arm.com>
* gcc.target/aarch64/simd/vrbit_1.c: New test.
2014-09-05 Richard Biener <rguenther@suse.de>
PR middle-end/63148
......
/* { dg-do run } */
/* { dg-options "-O2 --save-temps -fno-inline" } */
#include <arm_neon.h>
extern void abort (void);
uint64_t in1 = 0x0123456789abcdefULL;
uint64_t expected1 = 0x80c4a2e691d5b3f7ULL;
#define TEST8(BASETYPE, SUFFIX) \
void test8_##SUFFIX () \
{ \
BASETYPE##8x8_t out = vrbit_##SUFFIX (vcreate_##SUFFIX (in1)); \
uint64_t res = vget_lane_u64 (vreinterpret_u64_##SUFFIX (out), 0); \
if (res != expected1) abort (); \
}
uint64_t in2 = 0xdeadbeefcafebabeULL;
uint64_t expected2 = 0x7bb57df7537f5d7dULL;
#define TEST16(BASETYPE, SUFFIX) \
void test16_##SUFFIX () \
{ \
BASETYPE##8x16_t in = vcombine_##SUFFIX (vcreate_##SUFFIX (in1), \
vcreate_##SUFFIX (in2)); \
uint64x2_t res = vreinterpretq_u64_##SUFFIX (vrbitq_##SUFFIX (in)); \
uint64_t res1 = vgetq_lane_u64 (res, 0); \
uint64_t res2 = vgetq_lane_u64 (res, 1); \
if (res1 != expected1 || res2 != expected2) abort (); \
}
TEST8 (poly, p8);
TEST8 (int, s8);
TEST8 (uint, u8);
TEST16 (poly, p8);
TEST16 (int, s8);
TEST16 (uint, u8);
int
main (int argc, char **argv)
{
test8_p8 ();
test8_s8 ();
test8_u8 ();
test16_p8 ();
test16_s8 ();
test16_u8 ();
return 0;
}
/* { dg-final { scan-assembler-times "rbit\[ \t\]+\[vV\]\[0-9\]+\.8\[bB\], ?\[vV\]\[0-9\]+\.8\[bB\]" 3 } } */
/* { dg-final { scan-assembler-times "rbit\[ \t\]+\[vV\]\[0-9\]+\.16\[bB\], ?\[vV\]\[0-9\]+\.16\[bB\]" 3 } } */
/* { dg-final { cleanup-saved-temps } } */
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment