Commit 5e32e83b by Jiong Wang Committed by Jiong Wang

[AArch64] Add vector pattern for __builtin_ctz

  gcc/
    * config/aarch64/iterators.md (VS): New mode iterator.
    (vsi2qi): New mode attribute.
    (VSI2QI): Likewise.
    * config/aarch64/aarch64-simd-builtins.def: New entry for ctz.
    * config/aarch64/aarch64-simd.md (ctz<mode>2): New pattern for ctz.
    * config/aarch64/aarch64-builtins.c
    (aarch64_builtin_vectorized_function): Support BUILT_IN_CTZ.

  gcc/testsuite/
    * gcc.target/aarch64/vect_ctz_1.c: New testcase.

From-SVN: r217938
parent a699d672
2014-11-21 Jiong Wang <jiong.wang@arm.com>
* config/aarch64/iterators.md (VS): New mode iterator.
(vsi2qi): New mode attribute.
(VSI2QI): Likewise.
* config/aarch64/aarch64-simd-builtins.def: New entry for ctz.
* config/aarch64/aarch64-simd.md (ctz<mode>2): New pattern for ctz.
* config/aarch64/aarch64-builtins.c
(aarch64_builtin_vectorized_function): Support BUILT_IN_CTZ.
2014-11-21 H.J. Lu <hongjiu.lu@intel.com> 2014-11-21 H.J. Lu <hongjiu.lu@intel.com>
PR bootstrap/63784 PR bootstrap/63784
...@@ -1199,6 +1199,14 @@ aarch64_builtin_vectorized_function (tree fndecl, tree type_out, tree type_in) ...@@ -1199,6 +1199,14 @@ aarch64_builtin_vectorized_function (tree fndecl, tree type_out, tree type_in)
return aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_UNOP_clzv4si]; return aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_UNOP_clzv4si];
return NULL_TREE; return NULL_TREE;
} }
case BUILT_IN_CTZ:
{
if (AARCH64_CHECK_BUILTIN_MODE (2, S))
return aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_UNOP_ctzv2si];
else if (AARCH64_CHECK_BUILTIN_MODE (4, S))
return aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_UNOP_ctzv4si];
return NULL_TREE;
}
#undef AARCH64_CHECK_BUILTIN_MODE #undef AARCH64_CHECK_BUILTIN_MODE
#define AARCH64_CHECK_BUILTIN_MODE(C, N) \ #define AARCH64_CHECK_BUILTIN_MODE(C, N) \
(out_mode == N##Imode && out_n == C \ (out_mode == N##Imode && out_n == C \
......
...@@ -45,6 +45,7 @@ ...@@ -45,6 +45,7 @@
BUILTIN_VD_BHSI (BINOP, addp, 0) BUILTIN_VD_BHSI (BINOP, addp, 0)
VAR1 (UNOP, addp, 0, di) VAR1 (UNOP, addp, 0, di)
BUILTIN_VDQ_BHSI (UNOP, clz, 2) BUILTIN_VDQ_BHSI (UNOP, clz, 2)
BUILTIN_VS (UNOP, ctz, 2)
/* be_checked_get_lane does its own lane swapping, so not a lane index. */ /* be_checked_get_lane does its own lane swapping, so not a lane index. */
BUILTIN_VALL (GETREG, be_checked_get_lane, 0) BUILTIN_VALL (GETREG, be_checked_get_lane, 0)
......
...@@ -303,6 +303,20 @@ ...@@ -303,6 +303,20 @@
[(set_attr "type" "neon_rbit")] [(set_attr "type" "neon_rbit")]
) )
(define_expand "ctz<mode>2"
[(set (match_operand:VS 0 "register_operand")
(ctz:VS (match_operand:VS 1 "register_operand")))]
"TARGET_SIMD"
{
emit_insn (gen_bswap<mode> (operands[0], operands[1]));
rtx op0_castsi2qi = simplify_gen_subreg(<VS:VSI2QI>mode, operands[0],
<MODE>mode, 0);
emit_insn (gen_aarch64_rbit<VS:vsi2qi> (op0_castsi2qi, op0_castsi2qi));
emit_insn (gen_clz<mode>2 (operands[0], operands[0]));
DONE;
}
)
(define_insn "*aarch64_mul3_elt<mode>" (define_insn "*aarch64_mul3_elt<mode>"
[(set (match_operand:VMUL 0 "register_operand" "=w") [(set (match_operand:VMUL 0 "register_operand" "=w")
(mult:VMUL (mult:VMUL
......
...@@ -180,6 +180,9 @@ ...@@ -180,6 +180,9 @@
;; All byte modes. ;; All byte modes.
(define_mode_iterator VB [V8QI V16QI]) (define_mode_iterator VB [V8QI V16QI])
;; 2 and 4 lane SI modes.
(define_mode_iterator VS [V2SI V4SI])
(define_mode_iterator TX [TI TF]) (define_mode_iterator TX [TI TF])
;; Opaque structure modes. ;; Opaque structure modes.
...@@ -667,6 +670,9 @@ ...@@ -667,6 +670,9 @@
(V2DI "p") (V2DF "p") (V2DI "p") (V2DF "p")
(V2SF "p") (V4SF "v")]) (V2SF "p") (V4SF "v")])
(define_mode_attr vsi2qi [(V2SI "v8qi") (V4SI "v16qi")])
(define_mode_attr VSI2QI [(V2SI "V8QI") (V4SI "V16QI")])
;; ------------------------------------------------------------------- ;; -------------------------------------------------------------------
;; Code Iterators ;; Code Iterators
;; ------------------------------------------------------------------- ;; -------------------------------------------------------------------
......
2014-11-21 Jiong Wang <jiong.wang@arm.com>
* gcc.target/aarch64/vect_ctz_1.c: New testcase.
2014-11-21 Kyrylo Tkachov <kyrylo.tkachov@arm.com> 2014-11-21 Kyrylo Tkachov <kyrylo.tkachov@arm.com>
* gcc.target/aarch64/simd/vsqrt_f64_1.c * gcc.target/aarch64/simd/vsqrt_f64_1.c
......
/* { dg-do run } */
/* { dg-options "-O3 -save-temps -fno-inline" } */
extern void abort ();
#define TEST(name, subname, count) \
void \
count_tz_##name (unsigned *__restrict a, int *__restrict b) \
{ \
int i; \
for (i = 0; i < count; i++) \
b[i] = __builtin_##subname (a[i]); \
}
#define CHECK(name, count, input, output) \
count_tz_##name (input, output); \
for (i = 0; i < count; i++) \
{ \
if (output[i] != r[i]) \
abort (); \
}
TEST (v4si, ctz, 4)
TEST (v2si, ctz, 2)
/* { dg-final { scan-assembler "clz\tv\[0-9\]+\.4s" } } */
/* { dg-final { scan-assembler "clz\tv\[0-9\]+\.2s" } } */
int
main ()
{
unsigned int x4[4] = { 0x0, 0xFF80, 0x1FFFF, 0xFF000000 };
int r[4] = { 32, 7, 0, 24 };
int d[4], i;
CHECK (v4si, 4, x4, d);
CHECK (v2si, 2, x4, d);
return 0;
}
/* { dg-final { cleanup-saved-temps } } */
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment