Commit e09b4c37 by Richard Biener Committed by Richard Biener

tree-vect-stmts.c (vectorizable_load): Remove restrictions on strided SLP loads…

tree-vect-stmts.c (vectorizable_load): Remove restrictions on strided SLP loads and fall back to scalar loads in case...

2016-06-08  Richard Biener  <rguenther@suse.de>

	* tree-vect-stmts.c (vectorizable_load): Remove restrictions
	on strided SLP loads and fall back to scalar loads in case
	we can't chunk them.

	* gcc.dg/vect/slp-43.c: New testcase.

From-SVN: r237215
parent 72d50660
2016-06-08 Richard Biener <rguenther@suse.de>
* tree-vect-stmts.c (vectorizable_load): Remove restrictions
on strided SLP loads and fall back to scalar loads in case
we can't chunk them.
2016-06-08 Richard Biener <rguenther@suse.de>
PR tree-optimization/71452
* tree-ssa.c (non_rewritable_lvalue_p): Make sure that the
type used for the SSA rewrite has enough precision to cover
......
2016-06-08 Richard Biener <rguenther@suse.de>
* gcc.dg/vect/slp-43.c: New testcase.
2016-06-08 Richard Biener <rguenther@suse.de>
PR tree-optimization/71452
* gcc.dg/torture/pr71452.c: New testcase.
......
/* { dg-do run } */
/* { dg-require-effective-target vect_int } */
/* { dg-additional-options "-O3" } */
#include <string.h>
#include "tree-vect.h"
#define FOO(T,N) \
void __attribute__((noinline,noclone)) \
foo_ ## T ## _ ## N (T * __restrict__ in_, T * __restrict__ out_, int s) \
{ \
T *in = __builtin_assume_aligned (in_, __BIGGEST_ALIGNMENT__); \
T *out = __builtin_assume_aligned (out_, __BIGGEST_ALIGNMENT__); \
for (int i = 0; i < 16; i++) \
{ \
for (int j = 0; j < N; ++j) \
out[j] = in[j]; \
in += s*N; \
out += N; \
} \
}
#define TEST(T,N) \
do { \
memset (out, 0, 4096); \
foo_ ## T ## _ ## N ((T *)in, (T *)out, 1); \
if (memcmp (in, out, sizeof (T) * 16 * N) != 0) \
__builtin_abort (); \
for (int i = sizeof (T) * 16 * N; i < 4096; ++i) \
if (out[i] != 0) \
__builtin_abort (); \
} while (0)
FOO(char, 1)
FOO(char, 2)
FOO(char, 3)
FOO(char, 4)
FOO(char, 6)
FOO(char, 8)
FOO(int, 1)
FOO(int, 2)
FOO(int, 3)
FOO(int, 4)
FOO(int, 6)
FOO(int, 8)
FOO(int, 16)
char in[4096] __attribute__((aligned(__BIGGEST_ALIGNMENT__)));
char out[4096] __attribute__((aligned(__BIGGEST_ALIGNMENT__)));
int main()
{
check_vect ();
for (int i = 0; i < 4096; ++i)
{
in[i] = i;
__asm__ volatile ("" : : : "memory");
}
TEST(char, 1);
TEST(char, 2);
TEST(char, 3);
TEST(char, 4);
TEST(char, 6);
TEST(char, 8);
TEST(int, 1);
TEST(int, 2);
TEST(int, 3);
TEST(int, 4);
TEST(int, 6);
TEST(int, 8);
TEST(int, 16);
return 0;
}
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 13 "vect" } } */
......@@ -6440,17 +6440,7 @@ vectorizable_load (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
}
}
else if (STMT_VINFO_STRIDED_P (stmt_info))
{
if (grouped_load
&& slp
&& (group_size > nunits
|| nunits % group_size != 0))
{
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
"unhandled strided group load\n");
return false;
}
}
;
else
{
negative = tree_int_cst_compare (nested_in_vect_loop
......@@ -6744,16 +6734,29 @@ vectorizable_load (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
running_off = offvar;
alias_off = build_int_cst (reference_alias_ptr_type (DR_REF (first_dr)), 0);
int nloads = nunits;
int lnel = 1;
tree ltype = TREE_TYPE (vectype);
auto_vec<tree> dr_chain;
if (slp)
{
nloads = nunits / group_size;
if (group_size < nunits)
ltype = build_vector_type (TREE_TYPE (vectype), group_size);
else
ltype = vectype;
ltype = build_aligned_type (ltype, TYPE_ALIGN (TREE_TYPE (vectype)));
if (group_size < nunits
&& nunits % group_size == 0)
{
nloads = nunits / group_size;
lnel = group_size;
ltype = build_vector_type (TREE_TYPE (vectype), group_size);
ltype = build_aligned_type (ltype,
TYPE_ALIGN (TREE_TYPE (vectype)));
}
else if (group_size >= nunits
&& group_size % nunits == 0)
{
nloads = 1;
lnel = nunits;
ltype = vectype;
ltype = build_aligned_type (ltype,
TYPE_ALIGN (TREE_TYPE (vectype)));
}
/* For SLP permutation support we need to load the whole group,
not only the number of vector stmts the permutation result
fits in. */
......@@ -6765,48 +6768,43 @@ vectorizable_load (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
else
ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
}
int group_el = 0;
unsigned HOST_WIDE_INT
elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
for (j = 0; j < ncopies; j++)
{
tree vec_inv;
if (nloads > 1)
vec_alloc (v, nloads);
for (i = 0; i < nloads; i++)
{
vec_alloc (v, nloads);
for (i = 0; i < nloads; i++)
tree this_off = build_int_cst (TREE_TYPE (alias_off),
group_el * elsz);
new_stmt = gimple_build_assign (make_ssa_name (ltype),
build2 (MEM_REF, ltype,
running_off, this_off));
vect_finish_stmt_generation (stmt, new_stmt, gsi);
if (nloads > 1)
CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
gimple_assign_lhs (new_stmt));
group_el += lnel;
if (! slp
|| group_el == group_size)
{
tree newref, newoff;
gimple *incr;
newref = build2 (MEM_REF, ltype, running_off, alias_off);
newref = force_gimple_operand_gsi (gsi, newref, true,
NULL_TREE, true,
GSI_SAME_STMT);
CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, newref);
newoff = copy_ssa_name (running_off);
incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
running_off, stride_step);
tree newoff = copy_ssa_name (running_off);
gimple *incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
running_off, stride_step);
vect_finish_stmt_generation (stmt, incr, gsi);
running_off = newoff;
group_el = 0;
}
vec_inv = build_constructor (vectype, v);
new_temp = vect_init_vector (stmt, vec_inv, vectype, gsi);
new_stmt = SSA_NAME_DEF_STMT (new_temp);
}
else
if (nloads > 1)
{
new_stmt = gimple_build_assign (make_ssa_name (ltype),
build2 (MEM_REF, ltype,
running_off, alias_off));
vect_finish_stmt_generation (stmt, new_stmt, gsi);
tree newoff = copy_ssa_name (running_off);
gimple *incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
running_off, stride_step);
vect_finish_stmt_generation (stmt, incr, gsi);
running_off = newoff;
tree vec_inv = build_constructor (vectype, v);
new_temp = vect_init_vector (stmt, vec_inv, vectype, gsi);
new_stmt = SSA_NAME_DEF_STMT (new_temp);
}
if (slp)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment