Commit 0fe04f5c by Alex Velenko Committed by Marcus Shawcroft

[AArch64] Implement vclz ADVSimd intrinsic.

From-SVN: r203314
parent bed9bae4
2013-10-09 Alex Velenko <Alex.Velenko@arm.com>
* config/aarch64/arm_neon.h (vclz_s8, vclz_s16, vclz_s32)
(vclzq_s8, vclzq_s16, vclzq_s32, vclz_u8, vclz_u16, vclz_u32)
(vclzq_u8, vclzq_u16, vclzq_u32): Replace ASM with C.
* config/aarch64/aarch64.h
(CLZ_DEFINED_VALUE_AT_ZERO): Macro fixed for clz.
* config/aarch64/aarch64-simd-builtins.def
(VAR1 (UNOP, clz, 0, v4si)): Replaced with iterator.
2013-10-09 Alex Velenko <Alex.Velenko@arm.com>
* config/aarch64/arm_neon.h (vadd_f64, vsub_f64): Implementation added.
2013-10-09 Alex Velenko <Alex.Velenko@arm.com>
......
......@@ -45,7 +45,7 @@
BUILTIN_VDQF (UNOP, sqrt, 2)
BUILTIN_VD_BHSI (BINOP, addp, 0)
VAR1 (UNOP, addp, 0, di)
VAR1 (UNOP, clz, 2, v4si)
BUILTIN_VDQ_BHSI (UNOP, clz, 2)
BUILTIN_VALL (GETLANE, get_lane, 0)
VAR1 (GETLANE, get_lane, 0, di)
......
......@@ -739,7 +739,7 @@ do { \
: reverse_condition (CODE))
#define CLZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE) \
((VALUE) = ((MODE) == SImode ? 32 : 64), 2)
((VALUE) = GET_MODE_UNIT_BITSIZE (MODE))
#define CTZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE) \
((VALUE) = ((MODE) == SImode ? 32 : 64), 2)
......
......@@ -5158,138 +5158,6 @@ vclsq_s32 (int32x4_t a)
return result;
}
__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
vclz_s8 (int8x8_t a)
{
int8x8_t result;
__asm__ ("clz %0.8b,%1.8b"
: "=w"(result)
: "w"(a)
: /* No clobbers */);
return result;
}
__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
vclz_s16 (int16x4_t a)
{
int16x4_t result;
__asm__ ("clz %0.4h,%1.4h"
: "=w"(result)
: "w"(a)
: /* No clobbers */);
return result;
}
__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
vclz_s32 (int32x2_t a)
{
int32x2_t result;
__asm__ ("clz %0.2s,%1.2s"
: "=w"(result)
: "w"(a)
: /* No clobbers */);
return result;
}
__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vclz_u8 (uint8x8_t a)
{
uint8x8_t result;
__asm__ ("clz %0.8b,%1.8b"
: "=w"(result)
: "w"(a)
: /* No clobbers */);
return result;
}
__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
vclz_u16 (uint16x4_t a)
{
uint16x4_t result;
__asm__ ("clz %0.4h,%1.4h"
: "=w"(result)
: "w"(a)
: /* No clobbers */);
return result;
}
__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vclz_u32 (uint32x2_t a)
{
uint32x2_t result;
__asm__ ("clz %0.2s,%1.2s"
: "=w"(result)
: "w"(a)
: /* No clobbers */);
return result;
}
__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
vclzq_s8 (int8x16_t a)
{
int8x16_t result;
__asm__ ("clz %0.16b,%1.16b"
: "=w"(result)
: "w"(a)
: /* No clobbers */);
return result;
}
__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
vclzq_s16 (int16x8_t a)
{
int16x8_t result;
__asm__ ("clz %0.8h,%1.8h"
: "=w"(result)
: "w"(a)
: /* No clobbers */);
return result;
}
__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vclzq_s32 (int32x4_t a)
{
int32x4_t result;
__asm__ ("clz %0.4s,%1.4s"
: "=w"(result)
: "w"(a)
: /* No clobbers */);
return result;
}
__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
vclzq_u8 (uint8x16_t a)
{
uint8x16_t result;
__asm__ ("clz %0.16b,%1.16b"
: "=w"(result)
: "w"(a)
: /* No clobbers */);
return result;
}
__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vclzq_u16 (uint16x8_t a)
{
uint16x8_t result;
__asm__ ("clz %0.8h,%1.8h"
: "=w"(result)
: "w"(a)
: /* No clobbers */);
return result;
}
__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vclzq_u32 (uint32x4_t a)
{
uint32x4_t result;
__asm__ ("clz %0.4s,%1.4s"
: "=w"(result)
: "w"(a)
: /* No clobbers */);
return result;
}
__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
vcnt_p8 (poly8x8_t a)
{
......@@ -17934,6 +17802,80 @@ vcltzd_f64 (float64_t __a)
return __a < 0.0 ? -1ll : 0ll;
}
/* vclz. */
__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
vclz_s8 (int8x8_t __a)
{
return __builtin_aarch64_clzv8qi (__a);
}
__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
vclz_s16 (int16x4_t __a)
{
return __builtin_aarch64_clzv4hi (__a);
}
__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
vclz_s32 (int32x2_t __a)
{
return __builtin_aarch64_clzv2si (__a);
}
__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vclz_u8 (uint8x8_t __a)
{
return (uint8x8_t)__builtin_aarch64_clzv8qi ((int8x8_t)__a);
}
__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
vclz_u16 (uint16x4_t __a)
{
return (uint16x4_t)__builtin_aarch64_clzv4hi ((int16x4_t)__a);
}
__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vclz_u32 (uint32x2_t __a)
{
return (uint32x2_t)__builtin_aarch64_clzv2si ((int32x2_t)__a);
}
__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
vclzq_s8 (int8x16_t __a)
{
return __builtin_aarch64_clzv16qi (__a);
}
__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
vclzq_s16 (int16x8_t __a)
{
return __builtin_aarch64_clzv8hi (__a);
}
__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vclzq_s32 (int32x4_t __a)
{
return __builtin_aarch64_clzv4si (__a);
}
__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
vclzq_u8 (uint8x16_t __a)
{
return (uint8x16_t)__builtin_aarch64_clzv16qi ((int8x16_t)__a);
}
__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vclzq_u16 (uint16x8_t __a)
{
return (uint16x8_t)__builtin_aarch64_clzv8hi ((int16x8_t)__a);
}
__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vclzq_u32 (uint32x4_t __a)
{
return (uint32x4_t)__builtin_aarch64_clzv4si ((int32x4_t)__a);
}
/* vcvt (double -> float). */
__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
......
2013-10-09 Alex Velenko <Alex.Velenko@arm.com>
* gcc.target/aarch64/vclz.c: New testcase.
2013-10-09 Alex Velenko <Alex.Velenko@arm.com>
* gcc.target/aarch64/vadd_f64.c: New testcase.
* gcc.target/aarch64/vsub_f64.c: New testcase.
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment