Commit e09b4c37 by Richard Biener Committed by Richard Biener

tree-vect-stmts.c (vectorizable_load): Remove restrictions on strided SLP loads…

tree-vect-stmts.c (vectorizable_load): Remove restrictions on strided SLP loads and fall back to scalar loads in case...

2016-06-08  Richard Biener  <rguenther@suse.de>

	* tree-vect-stmts.c (vectorizable_load): Remove restrictions
	on strided SLP loads and fall back to scalar loads in case
	we can't chunk them.

	* gcc.dg/vect/slp-43.c: New testcase.

From-SVN: r237215
parent 72d50660
2016-06-08 Richard Biener <rguenther@suse.de> 2016-06-08 Richard Biener <rguenther@suse.de>
* tree-vect-stmts.c (vectorizable_load): Remove restrictions
on strided SLP loads and fall back to scalar loads in case
we can't chunk them.
2016-06-08 Richard Biener <rguenther@suse.de>
PR tree-optimization/71452 PR tree-optimization/71452
* tree-ssa.c (non_rewritable_lvalue_p): Make sure that the * tree-ssa.c (non_rewritable_lvalue_p): Make sure that the
type used for the SSA rewrite has enough precision to cover type used for the SSA rewrite has enough precision to cover
......
2016-06-08 Richard Biener <rguenther@suse.de> 2016-06-08 Richard Biener <rguenther@suse.de>
* gcc.dg/vect/slp-43.c: New testcase.
2016-06-08 Richard Biener <rguenther@suse.de>
PR tree-optimization/71452 PR tree-optimization/71452
* gcc.dg/torture/pr71452.c: New testcase. * gcc.dg/torture/pr71452.c: New testcase.
......
/* { dg-do run } */
/* { dg-require-effective-target vect_int } */
/* { dg-additional-options "-O3" } */
#include <string.h>
#include "tree-vect.h"
#define FOO(T,N) \
void __attribute__((noinline,noclone)) \
foo_ ## T ## _ ## N (T * __restrict__ in_, T * __restrict__ out_, int s) \
{ \
T *in = __builtin_assume_aligned (in_, __BIGGEST_ALIGNMENT__); \
T *out = __builtin_assume_aligned (out_, __BIGGEST_ALIGNMENT__); \
for (int i = 0; i < 16; i++) \
{ \
for (int j = 0; j < N; ++j) \
out[j] = in[j]; \
in += s*N; \
out += N; \
} \
}
#define TEST(T,N) \
do { \
memset (out, 0, 4096); \
foo_ ## T ## _ ## N ((T *)in, (T *)out, 1); \
if (memcmp (in, out, sizeof (T) * 16 * N) != 0) \
__builtin_abort (); \
for (int i = sizeof (T) * 16 * N; i < 4096; ++i) \
if (out[i] != 0) \
__builtin_abort (); \
} while (0)
FOO(char, 1)
FOO(char, 2)
FOO(char, 3)
FOO(char, 4)
FOO(char, 6)
FOO(char, 8)
FOO(int, 1)
FOO(int, 2)
FOO(int, 3)
FOO(int, 4)
FOO(int, 6)
FOO(int, 8)
FOO(int, 16)
char in[4096] __attribute__((aligned(__BIGGEST_ALIGNMENT__)));
char out[4096] __attribute__((aligned(__BIGGEST_ALIGNMENT__)));
int main()
{
check_vect ();
for (int i = 0; i < 4096; ++i)
{
in[i] = i;
__asm__ volatile ("" : : : "memory");
}
TEST(char, 1);
TEST(char, 2);
TEST(char, 3);
TEST(char, 4);
TEST(char, 6);
TEST(char, 8);
TEST(int, 1);
TEST(int, 2);
TEST(int, 3);
TEST(int, 4);
TEST(int, 6);
TEST(int, 8);
TEST(int, 16);
return 0;
}
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 13 "vect" } } */
...@@ -6440,17 +6440,7 @@ vectorizable_load (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt, ...@@ -6440,17 +6440,7 @@ vectorizable_load (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
} }
} }
else if (STMT_VINFO_STRIDED_P (stmt_info)) else if (STMT_VINFO_STRIDED_P (stmt_info))
{ ;
if (grouped_load
&& slp
&& (group_size > nunits
|| nunits % group_size != 0))
{
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
"unhandled strided group load\n");
return false;
}
}
else else
{ {
negative = tree_int_cst_compare (nested_in_vect_loop negative = tree_int_cst_compare (nested_in_vect_loop
...@@ -6744,16 +6734,29 @@ vectorizable_load (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt, ...@@ -6744,16 +6734,29 @@ vectorizable_load (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
running_off = offvar; running_off = offvar;
alias_off = build_int_cst (reference_alias_ptr_type (DR_REF (first_dr)), 0); alias_off = build_int_cst (reference_alias_ptr_type (DR_REF (first_dr)), 0);
int nloads = nunits; int nloads = nunits;
int lnel = 1;
tree ltype = TREE_TYPE (vectype); tree ltype = TREE_TYPE (vectype);
auto_vec<tree> dr_chain; auto_vec<tree> dr_chain;
if (slp) if (slp)
{ {
nloads = nunits / group_size; if (group_size < nunits
if (group_size < nunits) && nunits % group_size == 0)
ltype = build_vector_type (TREE_TYPE (vectype), group_size); {
else nloads = nunits / group_size;
ltype = vectype; lnel = group_size;
ltype = build_aligned_type (ltype, TYPE_ALIGN (TREE_TYPE (vectype))); ltype = build_vector_type (TREE_TYPE (vectype), group_size);
ltype = build_aligned_type (ltype,
TYPE_ALIGN (TREE_TYPE (vectype)));
}
else if (group_size >= nunits
&& group_size % nunits == 0)
{
nloads = 1;
lnel = nunits;
ltype = vectype;
ltype = build_aligned_type (ltype,
TYPE_ALIGN (TREE_TYPE (vectype)));
}
/* For SLP permutation support we need to load the whole group, /* For SLP permutation support we need to load the whole group,
not only the number of vector stmts the permutation result not only the number of vector stmts the permutation result
fits in. */ fits in. */
...@@ -6765,48 +6768,43 @@ vectorizable_load (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt, ...@@ -6765,48 +6768,43 @@ vectorizable_load (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
else else
ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node); ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
} }
int group_el = 0;
unsigned HOST_WIDE_INT
elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
for (j = 0; j < ncopies; j++) for (j = 0; j < ncopies; j++)
{ {
tree vec_inv;
if (nloads > 1) if (nloads > 1)
vec_alloc (v, nloads);
for (i = 0; i < nloads; i++)
{ {
vec_alloc (v, nloads); tree this_off = build_int_cst (TREE_TYPE (alias_off),
for (i = 0; i < nloads; i++) group_el * elsz);
new_stmt = gimple_build_assign (make_ssa_name (ltype),
build2 (MEM_REF, ltype,
running_off, this_off));
vect_finish_stmt_generation (stmt, new_stmt, gsi);
if (nloads > 1)
CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
gimple_assign_lhs (new_stmt));
group_el += lnel;
if (! slp
|| group_el == group_size)
{ {
tree newref, newoff; tree newoff = copy_ssa_name (running_off);
gimple *incr; gimple *incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
newref = build2 (MEM_REF, ltype, running_off, alias_off); running_off, stride_step);
newref = force_gimple_operand_gsi (gsi, newref, true,
NULL_TREE, true,
GSI_SAME_STMT);
CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, newref);
newoff = copy_ssa_name (running_off);
incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
running_off, stride_step);
vect_finish_stmt_generation (stmt, incr, gsi); vect_finish_stmt_generation (stmt, incr, gsi);
running_off = newoff; running_off = newoff;
group_el = 0;
} }
vec_inv = build_constructor (vectype, v);
new_temp = vect_init_vector (stmt, vec_inv, vectype, gsi);
new_stmt = SSA_NAME_DEF_STMT (new_temp);
} }
else if (nloads > 1)
{ {
new_stmt = gimple_build_assign (make_ssa_name (ltype), tree vec_inv = build_constructor (vectype, v);
build2 (MEM_REF, ltype, new_temp = vect_init_vector (stmt, vec_inv, vectype, gsi);
running_off, alias_off)); new_stmt = SSA_NAME_DEF_STMT (new_temp);
vect_finish_stmt_generation (stmt, new_stmt, gsi);
tree newoff = copy_ssa_name (running_off);
gimple *incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
running_off, stride_step);
vect_finish_stmt_generation (stmt, incr, gsi);
running_off = newoff;
} }
if (slp) if (slp)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment