Commit 9b999e8c by Richard Biener Committed by Richard Biener

tree-vect-stmts.c (vectorizable_load): Compute the pointer adjustment for gaps…

tree-vect-stmts.c (vectorizable_load): Compute the pointer adjustment for gaps at the end of a SLP load group properly.

2015-06-08  Richard Biener  <rguenther@suse.de>

	* tree-vect-stmts.c (vectorizable_load): Compute the pointer
	adjustment for gaps at the end of a SLP load group properly.
	* tree-vect-slp.c (vect_supported_load_permutation_p): Allow
	all permutations we can generate.
	(vect_transform_slp_perm_load): Use the correct group-size.

	* gcc.dg/vect/slp-perm-10.c: New testcase.
	* gcc.dg/vect/slp-23.c: Adjust.
	* gcc.dg/torture/pr53366-2.c: Also verify cross-iteration
	vector pointer update.

From-SVN: r224221
parent 7026b8df
2015-06-08 Richard Biener <rguenther@suse.de>
* tree-vect-stmts.c (vectorizable_load): Compute the pointer
adjustment for gaps at the end of a SLP load group properly.
* tree-vect-slp.c (vect_supported_load_permutation_p): Allow
all permutations we can generate.
(vect_transform_slp_perm_load): Use the correct group-size.
2015-06-08 Marc Glisse <marc.glisse@inria.fr> 2015-06-08 Marc Glisse <marc.glisse@inria.fr>
* genmatch.c (expr::gen_transform): For conditions, guess the type * genmatch.c (expr::gen_transform): For conditions, guess the type
......
2015-06-08 Richard Biener <rguenther@suse.de>
* gcc.dg/vect/slp-perm-10.c: New testcase.
* gcc.dg/vect/slp-23.c: Adjust.
* gcc.dg/torture/pr53366-2.c: Also verify cross-iteration
vector pointer update.
2015-06-08 Marek Polacek <polacek@redhat.com> 2015-06-08 Marek Polacek <polacek@redhat.com>
PR sanitizer/66452 PR sanitizer/66452
......
...@@ -4,18 +4,18 @@ ...@@ -4,18 +4,18 @@
extern void abort (void); extern void abort (void);
struct T { float r[3], i[3]; }; struct T { float r[3], i[3]; };
struct U { struct T j[2]; }; struct U { struct T j[4]; };
void __attribute__ ((noinline)) void __attribute__ ((noinline))
foo (struct U *__restrict y, const float _Complex *__restrict x) foo (struct U *__restrict y, const float _Complex *__restrict x)
{ {
int i, j; int i, j;
for (j = 0; j < 2; ++j) for (j = 0; j < 4; ++j)
{ {
float a = __real__ x[j]; float a = __real__ x[j];
float b = __imag__ x[j]; float b = __imag__ x[j];
float c = __real__ x[j + 2]; float c = __real__ x[j + 4];
float d = __imag__ x[j + 2]; float d = __imag__ x[j + 4];
for (i = 0; i < 3; ++i) for (i = 0; i < 3; ++i)
{ {
y->j[j].r[i] = y->j[j].r[i] + a + c; y->j[j].r[i] = y->j[j].r[i] + a + c;
...@@ -24,20 +24,23 @@ foo (struct U *__restrict y, const float _Complex *__restrict x) ...@@ -24,20 +24,23 @@ foo (struct U *__restrict y, const float _Complex *__restrict x)
} }
} }
_Complex float x[4]; _Complex float x[8];
struct U y; struct U y;
int int
main () main ()
{ {
int i, j; int i, j;
for (i = 0; i < 4; ++i) for (i = 0; i < 8; ++i)
x[i] = i + 1.0iF * (2 * i); {
x[i] = i + 1.0iF * (2 * i);
__asm__ volatile ("");
}
foo (&y, x); foo (&y, x);
for (j = 0; j < 2; ++j) for (j = 0; j < 4; ++j)
for (i = 0; i < 3; ++i) for (i = 0; i < 3; ++i)
if (y.j[j].r[i] != __real__ (x[j] + x[j + 2]) if (y.j[j].r[i] != __real__ (x[j] + x[j + 4])
|| y.j[j].i[i] != __imag__ (x[j] + x[j + 2])) || y.j[j].i[i] != __imag__ (x[j] + x[j + 4]))
__builtin_abort (); __builtin_abort ();
return 0; return 0;
} }
...@@ -108,5 +108,6 @@ int main (void) ...@@ -108,5 +108,6 @@ int main (void)
/* { dg-final { scan-tree-dump-times "vectorized 2 loops" 1 "vect" { target { vect_strided8 && { ! { vect_no_align} } } } } } */ /* { dg-final { scan-tree-dump-times "vectorized 2 loops" 1 "vect" { target { vect_strided8 && { ! { vect_no_align} } } } } } */
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { ! { vect_strided8 || vect_no_align } } } } } */ /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { ! { vect_strided8 || vect_no_align } } } } } */
/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" } } */ /* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target { ! vect_perm } } } } */
/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" { target vect_perm } } } */
/* { dg-require-effective-target vect_int } */
#include "tree-vect.h"
int a[256], b[256];
void __attribute__((noinline))
foo (void)
{
int i;
for (i = 0; i < 32; ++i)
{
b[i*8+0] = a[i*8+0];
b[i*8+1] = a[i*8+0];
b[i*8+2] = a[i*8+3];
b[i*8+3] = a[i*8+3];
b[i*8+4] = a[i*8+4];
b[i*8+5] = a[i*8+6];
b[i*8+6] = a[i*8+4];
b[i*8+7] = a[i*8+6];
}
}
int main ()
{
int i;
check_vect ();
for (i = 0; i < 256; ++i)
{
a[i] = i;
__asm__ volatile ("");
}
foo ();
for (i = 0; i < 32; ++i)
if (b[i*8+0] != i*8+0
|| b[i*8+1] != i*8+0
|| b[i*8+2] != i*8+3
|| b[i*8+3] != i*8+3
|| b[i*8+4] != i*8+4
|| b[i*8+5] != i*8+6
|| b[i*8+6] != i*8+4
|| b[i*8+7] != i*8+6)
abort ();
return 0;
}
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_perm } } } */
/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target vect_perm } } } */
...@@ -1497,47 +1497,14 @@ vect_supported_load_permutation_p (slp_instance slp_instn) ...@@ -1497,47 +1497,14 @@ vect_supported_load_permutation_p (slp_instance slp_instn)
return true; return true;
} }
/* FORNOW: the only supported permutation is 0..01..1.. of length equal to /* For loop vectorization verify we can generate the permutation. */
GROUP_SIZE and where each sequence of same drs is of GROUP_SIZE length as
well (unless it's reduction). */
if (SLP_INSTANCE_LOADS (slp_instn).length () != group_size)
return false;
FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
if (!node->load_permutation.exists ())
return false;
load_index = sbitmap_alloc (group_size);
bitmap_clear (load_index);
FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
{
unsigned int lidx = node->load_permutation[0];
if (bitmap_bit_p (load_index, lidx))
{
sbitmap_free (load_index);
return false;
}
bitmap_set_bit (load_index, lidx);
FOR_EACH_VEC_ELT (node->load_permutation, j, k)
if (k != lidx)
{
sbitmap_free (load_index);
return false;
}
}
for (i = 0; i < group_size; i++)
if (!bitmap_bit_p (load_index, i))
{
sbitmap_free (load_index);
return false;
}
sbitmap_free (load_index);
FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node) FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
if (node->load_permutation.exists () if (node->load_permutation.exists ()
&& !vect_transform_slp_perm_load && !vect_transform_slp_perm_load
(node, vNULL, NULL, (node, vNULL, NULL,
SLP_INSTANCE_UNROLLING_FACTOR (slp_instn), slp_instn, true)) SLP_INSTANCE_UNROLLING_FACTOR (slp_instn), slp_instn, true))
return false; return false;
return true; return true;
} }
...@@ -3282,6 +3249,8 @@ vect_transform_slp_perm_load (slp_tree node, vec<tree> dr_chain, ...@@ -3282,6 +3249,8 @@ vect_transform_slp_perm_load (slp_tree node, vec<tree> dr_chain,
if (!STMT_VINFO_GROUPED_ACCESS (stmt_info)) if (!STMT_VINFO_GROUPED_ACCESS (stmt_info))
return false; return false;
stmt_info = vinfo_for_stmt (GROUP_FIRST_ELEMENT (stmt_info));
/* Generate permutation masks for every NODE. Number of masks for each NODE /* Generate permutation masks for every NODE. Number of masks for each NODE
is equal to GROUP_SIZE. is equal to GROUP_SIZE.
E.g., we have a group of three nodes with three loads from the same E.g., we have a group of three nodes with three loads from the same
...@@ -3316,7 +3285,7 @@ vect_transform_slp_perm_load (slp_tree node, vec<tree> dr_chain, ...@@ -3316,7 +3285,7 @@ vect_transform_slp_perm_load (slp_tree node, vec<tree> dr_chain,
for (k = 0; k < group_size; k++) for (k = 0; k < group_size; k++)
{ {
i = SLP_TREE_LOAD_PERMUTATION (node)[k]; i = SLP_TREE_LOAD_PERMUTATION (node)[k];
first_mask_element = i + j * group_size; first_mask_element = i + j * STMT_VINFO_GROUP_SIZE (stmt_info);
if (!vect_get_mask_element (stmt, first_mask_element, 0, if (!vect_get_mask_element (stmt, first_mask_element, 0,
nunits, only_one_vec, index, nunits, only_one_vec, index,
mask, &current_mask_element, mask, &current_mask_element,
......
...@@ -5802,7 +5802,7 @@ vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt, ...@@ -5802,7 +5802,7 @@ vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
gimple ptr_incr = NULL; gimple ptr_incr = NULL;
int nunits = TYPE_VECTOR_SUBPARTS (vectype); int nunits = TYPE_VECTOR_SUBPARTS (vectype);
int ncopies; int ncopies;
int i, j, group_size = -1, group_gap; int i, j, group_size = -1, group_gap_adj;
tree msq = NULL_TREE, lsq; tree msq = NULL_TREE, lsq;
tree offset = NULL_TREE; tree offset = NULL_TREE;
tree byte_offset = NULL_TREE; tree byte_offset = NULL_TREE;
...@@ -6391,26 +6391,24 @@ vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt, ...@@ -6391,26 +6391,24 @@ vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
} }
first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt)); first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt)); group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
group_gap_adj = 0;
/* VEC_NUM is the number of vect stmts to be created for this group. */ /* VEC_NUM is the number of vect stmts to be created for this group. */
if (slp) if (slp)
{ {
grouped_load = false; grouped_load = false;
vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node); vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
group_gap = GROUP_GAP (vinfo_for_stmt (first_stmt)); group_gap_adj = vf * group_size - nunits * vec_num;
} }
else else
{ vec_num = group_size;
vec_num = group_size;
group_gap = 0;
}
} }
else else
{ {
first_stmt = stmt; first_stmt = stmt;
first_dr = dr; first_dr = dr;
group_size = vec_num = 1; group_size = vec_num = 1;
group_gap = 0; group_gap_adj = 0;
} }
alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false); alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
...@@ -6826,12 +6824,15 @@ vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt, ...@@ -6826,12 +6824,15 @@ vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
if (slp && !slp_perm) if (slp && !slp_perm)
SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt); SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
} }
/* Bump the vector pointer to account for a gap. */ /* Bump the vector pointer to account for a gap or for excess
if (slp && group_gap != 0) elements loaded for a permuted SLP load. */
if (group_gap_adj != 0)
{ {
tree bump = size_binop (MULT_EXPR, bool ovf;
TYPE_SIZE_UNIT (elem_type), tree bump
size_int (group_gap)); = wide_int_to_tree (sizetype,
wi::smul (TYPE_SIZE_UNIT (elem_type),
group_gap_adj, &ovf));
dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
stmt, bump); stmt, bump);
} }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment