Commit c7f28cd5 by Kyrylo Tkachov Committed by Kyrylo Tkachov

[AArch64] Vectorise bswap[16,32,64]

	* config/aarch64/aarch64-builtins.c
	(aarch64_builtin_vectorized_function): Handle BUILT_IN_BSWAP16,
	BUILT_IN_BSWAP32, BUILT_IN_BSWAP64.
	* config/aarch64/aarch64-simd.md (bswap<mode>): New pattern.
	* config/aarch64/aarch64-simd-builtins.def: Define vector bswap
	builtins.
	* config/aarch64/iterator.md (VDQHSD): New mode iterator.
	(Vrevsuff): New mode attribute.

	* lib/target-supports.exp (check_effective_target_vect_bswap): New.
	* gcc.dg/vect/vect-bswap16: New test.
	* gcc.dg/vect/vect-bswap32: Likewise.
	* gcc.dg/vect/vect-bswap64: Likewise.

From-SVN: r209736
parent b0419491
2014-04-24 Kyrylo Tkachov <kyrylo.tkachov@arm.com>
* config/aarch64/aarch64-builtins.c
(aarch64_builtin_vectorized_function): Handle BUILT_IN_BSWAP16,
BUILT_IN_BSWAP32, BUILT_IN_BSWAP64.
* config/aarch64/aarch64-simd.md (bswap<mode>): New pattern.
* config/aarch64/aarch64-simd-builtins.def: Define vector bswap
builtins.
* config/aarch64/iterator.md (VDQHSD): New mode iterator.
(Vrevsuff): New mode attribute.
2014-04-24 Terry Guo <terry.guo@arm.com>
* config/arm/arm.h (machine_function): Define variable
......
......@@ -1104,7 +1104,29 @@ aarch64_builtin_vectorized_function (tree fndecl, tree type_out, tree type_in)
return aarch64_builtin_decls[builtin];
}
case BUILT_IN_BSWAP16:
#undef AARCH64_CHECK_BUILTIN_MODE
#define AARCH64_CHECK_BUILTIN_MODE(C, N) \
(out_mode == N##Imode && out_n == C \
&& in_mode == N##Imode && in_n == C)
if (AARCH64_CHECK_BUILTIN_MODE (4, H))
return aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_UNOPU_bswapv4hi];
else if (AARCH64_CHECK_BUILTIN_MODE (8, H))
return aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_UNOPU_bswapv8hi];
else
return NULL_TREE;
case BUILT_IN_BSWAP32:
if (AARCH64_CHECK_BUILTIN_MODE (2, S))
return aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_UNOPU_bswapv2si];
else if (AARCH64_CHECK_BUILTIN_MODE (4, S))
return aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_UNOPU_bswapv4si];
else
return NULL_TREE;
case BUILT_IN_BSWAP64:
if (AARCH64_CHECK_BUILTIN_MODE (2, D))
return aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_UNOPU_bswapv2di];
else
return NULL_TREE;
default:
return NULL_TREE;
}
......
......@@ -341,6 +341,8 @@
VAR1 (UNOP, floatunsv4si, 2, v4sf)
VAR1 (UNOP, floatunsv2di, 2, v2df)
VAR5 (UNOPU, bswap, 10, v4hi, v8hi, v2si, v4si, v2di)
/* Implemented by
aarch64_<PERMUTE:perm_insn><PERMUTE:perm_hilo><mode>. */
BUILTIN_VALL (BINOP, zip1, 0)
......
......@@ -286,6 +286,14 @@
[(set_attr "type" "neon_mul_<Vetype><q>")]
)
(define_insn "bswap<mode>"
[(set (match_operand:VDQHSD 0 "register_operand" "=w")
(bswap:VDQHSD (match_operand:VDQHSD 1 "register_operand" "w")))]
"TARGET_SIMD"
"rev<Vrevsuff>\\t%0.<Vbtype>, %1.<Vbtype>"
[(set_attr "type" "neon_rev<q>")]
)
(define_insn "*aarch64_mul3_elt<mode>"
[(set (match_operand:VMUL 0 "register_operand" "=w")
(mult:VMUL
......
......@@ -150,6 +150,9 @@
;; Vector modes for H and S types.
(define_mode_iterator VDQHS [V4HI V8HI V2SI V4SI])
;; Vector modes for H, S and D types.
(define_mode_iterator VDQHSD [V4HI V8HI V2SI V4SI V2DI])
;; Vector modes for Q, H and S types.
(define_mode_iterator VDQQHS [V8QI V16QI V4HI V8HI V2SI V4SI])
......@@ -356,6 +359,9 @@
(V2DI "2d") (V2SF "2s")
(V4SF "4s") (V2DF "2d")])
(define_mode_attr Vrevsuff [(V4HI "16") (V8HI "16") (V2SI "32")
(V4SI "32") (V2DI "64")])
(define_mode_attr Vmtype [(V8QI ".8b") (V16QI ".16b")
(V4HI ".4h") (V8HI ".8h")
(V2SI ".2s") (V4SI ".4s")
......
2014-04-24 Kyrylo Tkachov <kyrylo.tkachov@arm.com>
* lib/target-supports.exp (check_effective_target_vect_bswap): New.
* gcc.dg/vect/vect-bswap16: New test.
* gcc.dg/vect/vect-bswap32: Likewise.
* gcc.dg/vect/vect-bswap64: Likewise.
2014-04-23 Jeff Law <law@redhat.com>
PR tree-optimization/60902
......
/* { dg-require-effective-target vect_bswap } */
#include "tree-vect.h"
#define N 128
volatile int y = 0;
static inline void
vfoo16 (unsigned short int* a)
{
int i = 0;
for (i = 0; i < N; ++i)
a[i] = __builtin_bswap16 (a[i]);
}
int
main (void)
{
unsigned short arr[N];
unsigned short expect[N];
int i;
for (i = 0; i < N; ++i)
{
arr[i] = i;
expect[i] = __builtin_bswap16 (i);
if (y) /* Avoid vectorisation. */
abort ();
}
vfoo16 (arr);
for (i = 0; i < N; ++i)
{
if (arr[i] != expect[i])
abort ();
}
return 0;
}
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
/* { dg-require-effective-target vect_bswap } */
#include "tree-vect.h"
#define N 128
volatile int y = 0;
static inline void
vfoo32 (unsigned int* a)
{
int i = 0;
for (i = 0; i < N; ++i)
a[i] = __builtin_bswap32 (a[i]);
}
int
main (void)
{
unsigned int arr[N];
unsigned int expect[N];
int i;
for (i = 0; i < N; ++i)
{
arr[i] = i;
expect[i] = __builtin_bswap32 (i);
if (y) /* Avoid vectorisation. */
abort ();
}
vfoo32 (arr);
for (i = 0; i < N; ++i)
{
if (arr[i] != expect[i])
abort ();
}
return 0;
}
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
/* { dg-require-effective-target vect_bswap } */
#include "tree-vect.h"
#define N 128
volatile int y = 0;
static inline void
vfoo64 (unsigned long long* a)
{
int i = 0;
for (i = 0; i < N; ++i)
a[i] = __builtin_bswap64 (a[i]);
}
int
main (void)
{
unsigned long long arr[N];
unsigned long long expect[N];
int i;
for (i = 0; i < N; ++i)
{
arr[i] = i;
expect[i] = __builtin_bswap64 (i);
if (y) /* Avoid vectorisation. */
abort ();
}
vfoo64 (arr);
for (i = 0; i < N; ++i)
{
if (arr[i] != expect[i])
abort ();
}
return 0;
}
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
......@@ -3271,6 +3271,24 @@ proc check_effective_target_vect_shift { } {
return $et_vect_shift_saved
}
# Return 1 if the target supports vector bswap operations.
proc check_effective_target_vect_bswap { } {
global et_vect_bswap_saved
if [info exists et_vect_bswap_saved] {
verbose "check_effective_target_vect_bswap: using cached result" 2
} else {
set et_vect_bswap_saved 0
if { [istarget aarch64*-*-*] } {
set et_vect_bswap_saved 1
}
}
verbose "check_effective_target_vect_bswap: returning $et_vect_bswap_saved" 2
return $et_vect_bswap_saved
}
# Return 1 if the target supports hardware vector shift operation for char.
proc check_effective_target_vect_shift_char { } {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment