Commit 8c2f568c by Richard Sandiford Committed by Richard Sandiford

Avoid is_constant calls in vectorizable_bswap

The "new" VEC_PERM_EXPR handling makes it easy to support bswap
for variable-length vectors.

2018-08-24  Richard Sandiford  <richard.sandiford@arm.com>

gcc/
	* tree-vect-stmts.c (vectorizable_bswap): Handle variable-length
	vectors.

gcc/testsuite/
	* gcc.target/aarch64/sve/bswap_1.c: New test.
	* gcc.target/aarch64/sve/bswap_2.c: Likewise.
	* gcc.target/aarch64/sve/bswap_3.c: Likewise.

From-SVN: r263833
parent ab7e60ce
2018-08-24 Richard Sandiford <richard.sandiford@arm.com> 2018-08-24 Richard Sandiford <richard.sandiford@arm.com>
* tree-vect-stmts.c (vectorizable_bswap): Handle variable-length
vectors.
2018-08-24 Richard Sandiford <richard.sandiford@arm.com>
* tree-vect-slp.c (vect_transform_slp_perm_load): Separate out * tree-vect-slp.c (vect_transform_slp_perm_load): Separate out
the case in which the permute needs only a single element and the case in which the permute needs only a single element and
repeats for every vector of the result. Extend that case to repeats for every vector of the result. Extend that case to
......
2018-08-24 Richard Sandiford <richard.sandiford@arm.com> 2018-08-24 Richard Sandiford <richard.sandiford@arm.com>
* gcc.target/aarch64/sve/bswap_1.c: New test.
* gcc.target/aarch64/sve/bswap_2.c: Likewise.
* gcc.target/aarch64/sve/bswap_3.c: Likewise.
2018-08-24 Richard Sandiford <richard.sandiford@arm.com>
* gcc.target/aarch64/sve/slp_perm_1.c: New test. * gcc.target/aarch64/sve/slp_perm_1.c: New test.
* gcc.target/aarch64/sve/slp_perm_2.c: Likewise. * gcc.target/aarch64/sve/slp_perm_2.c: Likewise.
* gcc.target/aarch64/sve/slp_perm_3.c: Likewise. * gcc.target/aarch64/sve/slp_perm_3.c: Likewise.
......
/* { dg-do compile } */
/* { dg-options "-O2 -ftree-vectorize" } */
#include <stdint.h>
void
f (uint16_t *a, uint16_t *b)
{
for (int i = 0; i < 100; ++i)
a[i] = __builtin_bswap16 (b[i]);
}
/* { dg-final { scan-assembler-times {\trevb\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h\n} 1 { xfail aarch64_big_endian } } } */
/* { dg-do compile } */
/* { dg-options "-O2 -ftree-vectorize" } */
#include <stdint.h>
void
f (uint32_t *a, uint32_t *b)
{
for (int i = 0; i < 100; ++i)
a[i] = __builtin_bswap32 (b[i]);
}
/* { dg-final { scan-assembler-times {\trevb\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s\n} 1 { xfail aarch64_big_endian } } } */
/* { dg-do compile } */
/* { dg-options "-O2 -ftree-vectorize" } */
#include <stdint.h>
void
f (uint64_t *a, uint64_t *b)
{
for (int i = 0; i < 100; ++i)
a[i] = __builtin_bswap64 (b[i]);
}
/* { dg-final { scan-assembler-times {\trevb\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d\n} 1 { xfail aarch64_big_endian } } } */
...@@ -2961,13 +2961,10 @@ vectorizable_bswap (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi, ...@@ -2961,13 +2961,10 @@ vectorizable_bswap (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
vec_info *vinfo = stmt_info->vinfo; vec_info *vinfo = stmt_info->vinfo;
loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
unsigned ncopies; unsigned ncopies;
unsigned HOST_WIDE_INT nunits, num_bytes;
op = gimple_call_arg (stmt, 0); op = gimple_call_arg (stmt, 0);
vectype = STMT_VINFO_VECTYPE (stmt_info); vectype = STMT_VINFO_VECTYPE (stmt_info);
poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits))
return false;
/* Multiple types in SLP are handled by creating the appropriate number of /* Multiple types in SLP are handled by creating the appropriate number of
vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
...@@ -2983,11 +2980,11 @@ vectorizable_bswap (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi, ...@@ -2983,11 +2980,11 @@ vectorizable_bswap (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
if (! char_vectype) if (! char_vectype)
return false; return false;
if (!TYPE_VECTOR_SUBPARTS (char_vectype).is_constant (&num_bytes)) poly_uint64 num_bytes = TYPE_VECTOR_SUBPARTS (char_vectype);
unsigned word_bytes;
if (!constant_multiple_p (num_bytes, nunits, &word_bytes))
return false; return false;
unsigned word_bytes = num_bytes / nunits;
/* The encoding uses one stepped pattern for each byte in the word. */ /* The encoding uses one stepped pattern for each byte in the word. */
vec_perm_builder elts (num_bytes, word_bytes, 3); vec_perm_builder elts (num_bytes, word_bytes, 3);
for (unsigned i = 0; i < 3; ++i) for (unsigned i = 0; i < 3; ++i)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment