Commit cb330ba5 by Jakub Jelinek Committed by Jakub Jelinek

re PR tree-optimization/78899 (Vestorized loop with optmized mask stores motion…

re PR tree-optimization/78899 (Vestorized loop with optmized mask stores motion is completely deleted after r242520.)

	PR tree-optimization/78899
	* tree-if-conv.c (version_loop_for_if_conversion): Instead of
	returning bool return struct loop *, NULL for failure and the new
	loop on success.
	(versionable_outer_loop_p): Don't version outer loop if it has
	dont_vectorized bit set.
	(tree_if_conversion): When versioning outer loop, ensure
	tree_if_conversion is performed also on the inner loop of the
	non-vectorizable outer loop copy.
	* tree-vectorizer.c (set_uid_loop_bbs): Formatting fix.  Fold
	LOOP_VECTORIZED in inner loop of the scalar outer loop and
	prevent vectorization of it.
	(vectorize_loops): For outer + inner LOOP_VECTORIZED, ensure
	the outer loop vectorization of the non-scalar version is attempted
	before vectorization of the inner loop in scalar version.  If
	outer LOOP_VECTORIZED guarded loop is not vectorized, prevent
	vectorization of its inner loop.
	* tree-vect-loop-manip.c (rename_variables_in_bb): If outer_loop
	has 2 inner loops, rename also on edges from bb whose single pred
	is outer_loop->header.  Fix typo in function comment.

	* gcc.target/i386/pr78899.c: New test.
	* gcc.dg/pr71077.c: New test.

From-SVN: r244238
parent 47d5beb4
2017-01-09 Jakub Jelinek <jakub@redhat.com>
PR tree-optimization/78899
* tree-if-conv.c (version_loop_for_if_conversion): Instead of
returning bool return struct loop *, NULL for failure and the new
loop on success.
(versionable_outer_loop_p): Don't version outer loop if it has
dont_vectorized bit set.
(tree_if_conversion): When versioning outer loop, ensure
tree_if_conversion is performed also on the inner loop of the
non-vectorizable outer loop copy.
* tree-vectorizer.c (set_uid_loop_bbs): Formatting fix. Fold
LOOP_VECTORIZED in inner loop of the scalar outer loop and
prevent vectorization of it.
(vectorize_loops): For outer + inner LOOP_VECTORIZED, ensure
the outer loop vectorization of the non-scalar version is attempted
before vectorization of the inner loop in scalar version. If
outer LOOP_VECTORIZED guarded loop is not vectorized, prevent
vectorization of its inner loop.
* tree-vect-loop-manip.c (rename_variables_in_bb): If outer_loop
has 2 inner loops, rename also on edges from bb whose single pred
is outer_loop->header. Fix typo in function comment.
2017-01-09 Martin Sebor <msebor@redhat.com> 2017-01-09 Martin Sebor <msebor@redhat.com>
PR bootstrap/79033 PR bootstrap/79033
......
2017-01-09 Jakub Jelinek <jakub@redhat.com>
PR tree-optimization/78899
* gcc.target/i386/pr78899.c: New test.
* gcc.dg/pr71077.c: New test.
2017-01-09 Martin Jambor <mjambor@suse.cz> 2017-01-09 Martin Jambor <mjambor@suse.cz>
PR ipa/78365 PR ipa/78365
......
/* PR c++/71077 */
/* { dg-do compile } */
/* { dg-options "-O3" } */
/* { dg-additional-options "-mavx2" { target { i?86-*-* x86_64-*-* } } } */
void
foo (int *a, int n)
{
int b, c;
for (b = 0; b < n; b++)
for (c = 0; c < 32; c++)
if ((b & 1U) << c)
a[b + c] = 0;
}
/* PR tree-optimization/78899 */
/* { dg-do compile } */
/* { dg-options "-Ofast -fopenmp-simd -mavx2 -mno-avx512f" } */
#define N 1024
#define M 4
int p1[N], p2[N], p3[N], c[N];
void
foo (int n)
{
int i, k;
for (k = 0; k < n / M; k++)
{
#pragma omp simd
for (i = 0; i < M; i++)
if (c[k * M + i])
{
p1[k * M + i] += 1;
p2[k * M + i] = p3[k * M + i] + 2;
}
}
}
/* Ensure the loop is vectorized. */
/* { dg-final { scan-assembler "vpmaskmov" } } */
/* { dg-final { scan-assembler "vpadd" } } */
...@@ -2535,7 +2535,7 @@ combine_blocks (struct loop *loop) ...@@ -2535,7 +2535,7 @@ combine_blocks (struct loop *loop)
loop to execute. The vectorizer pass will fold this loop to execute. The vectorizer pass will fold this
internal call into either true or false. */ internal call into either true or false. */
static bool static struct loop *
version_loop_for_if_conversion (struct loop *loop) version_loop_for_if_conversion (struct loop *loop)
{ {
basic_block cond_bb; basic_block cond_bb;
...@@ -2566,7 +2566,7 @@ version_loop_for_if_conversion (struct loop *loop) ...@@ -2566,7 +2566,7 @@ version_loop_for_if_conversion (struct loop *loop)
ifc_bbs[i]->aux = saved_preds[i]; ifc_bbs[i]->aux = saved_preds[i];
if (new_loop == NULL) if (new_loop == NULL)
return false; return NULL;
new_loop->dont_vectorize = true; new_loop->dont_vectorize = true;
new_loop->force_vectorize = false; new_loop->force_vectorize = false;
...@@ -2574,7 +2574,7 @@ version_loop_for_if_conversion (struct loop *loop) ...@@ -2574,7 +2574,7 @@ version_loop_for_if_conversion (struct loop *loop)
gimple_call_set_arg (g, 1, build_int_cst (integer_type_node, new_loop->num)); gimple_call_set_arg (g, 1, build_int_cst (integer_type_node, new_loop->num));
gsi_insert_before (&gsi, g, GSI_SAME_STMT); gsi_insert_before (&gsi, g, GSI_SAME_STMT);
update_ssa (TODO_update_ssa); update_ssa (TODO_update_ssa);
return true; return new_loop;
} }
/* Return true when LOOP satisfies the follow conditions that will /* Return true when LOOP satisfies the follow conditions that will
...@@ -2594,6 +2594,7 @@ static bool ...@@ -2594,6 +2594,7 @@ static bool
versionable_outer_loop_p (struct loop *loop) versionable_outer_loop_p (struct loop *loop)
{ {
if (!loop_outer (loop) if (!loop_outer (loop)
|| loop->dont_vectorize
|| !loop->inner || !loop->inner
|| loop->inner->next || loop->inner->next
|| !single_exit (loop) || !single_exit (loop)
...@@ -2602,7 +2603,7 @@ versionable_outer_loop_p (struct loop *loop) ...@@ -2602,7 +2603,7 @@ versionable_outer_loop_p (struct loop *loop)
|| !single_pred_p (loop->latch) || !single_pred_p (loop->latch)
|| !single_pred_p (loop->inner->latch)) || !single_pred_p (loop->inner->latch))
return false; return false;
basic_block outer_exit = single_pred (loop->latch); basic_block outer_exit = single_pred (loop->latch);
basic_block inner_exit = single_pred (loop->inner->latch); basic_block inner_exit = single_pred (loop->inner->latch);
...@@ -2789,7 +2790,10 @@ tree_if_conversion (struct loop *loop) ...@@ -2789,7 +2790,10 @@ tree_if_conversion (struct loop *loop)
{ {
unsigned int todo = 0; unsigned int todo = 0;
bool aggressive_if_conv; bool aggressive_if_conv;
struct loop *rloop;
again:
rloop = NULL;
ifc_bbs = NULL; ifc_bbs = NULL;
any_pred_load_store = false; any_pred_load_store = false;
any_complicated_phi = false; any_complicated_phi = false;
...@@ -2829,8 +2833,31 @@ tree_if_conversion (struct loop *loop) ...@@ -2829,8 +2833,31 @@ tree_if_conversion (struct loop *loop)
struct loop *vloop struct loop *vloop
= (versionable_outer_loop_p (loop_outer (loop)) = (versionable_outer_loop_p (loop_outer (loop))
? loop_outer (loop) : loop); ? loop_outer (loop) : loop);
if (!version_loop_for_if_conversion (vloop)) struct loop *nloop = version_loop_for_if_conversion (vloop);
if (nloop == NULL)
goto cleanup; goto cleanup;
if (vloop != loop)
{
/* If versionable_outer_loop_p decided to version the
outer loop, version also the inner loop of the non-vectorized
loop copy. So we transform:
loop1
loop2
into:
if (LOOP_VECTORIZED (1, 3))
{
loop1
loop2
}
else
loop3 (copy of loop1)
if (LOOP_VECTORIZED (4, 5))
loop4 (copy of loop2)
else
loop5 (copy of loop4) */
gcc_assert (nloop->inner && nloop->inner->next == NULL);
rloop = nloop->inner;
}
} }
/* Now all statements are if-convertible. Combine all the basic /* Now all statements are if-convertible. Combine all the basic
...@@ -2854,6 +2881,11 @@ tree_if_conversion (struct loop *loop) ...@@ -2854,6 +2881,11 @@ tree_if_conversion (struct loop *loop)
free (ifc_bbs); free (ifc_bbs);
ifc_bbs = NULL; ifc_bbs = NULL;
} }
if (rloop != NULL)
{
loop = rloop;
goto again;
}
return todo; return todo;
} }
......
...@@ -71,7 +71,7 @@ rename_use_op (use_operand_p op_p) ...@@ -71,7 +71,7 @@ rename_use_op (use_operand_p op_p)
} }
/* Renames the variables in basic block BB. Allow renaming of PHI argumnets /* Renames the variables in basic block BB. Allow renaming of PHI arguments
on edges incoming from outer-block header if RENAME_FROM_OUTER_LOOP is on edges incoming from outer-block header if RENAME_FROM_OUTER_LOOP is
true. */ true. */
...@@ -102,9 +102,25 @@ rename_variables_in_bb (basic_block bb, bool rename_from_outer_loop) ...@@ -102,9 +102,25 @@ rename_variables_in_bb (basic_block bb, bool rename_from_outer_loop)
FOR_EACH_EDGE (e, ei, bb->preds) FOR_EACH_EDGE (e, ei, bb->preds)
{ {
if (!flow_bb_inside_loop_p (loop, e->src) if (!flow_bb_inside_loop_p (loop, e->src))
&& (!rename_from_outer_loop || e->src != outer_loop->header)) {
continue; if (!rename_from_outer_loop)
continue;
if (e->src != outer_loop->header)
{
if (outer_loop->inner->next)
{
/* If outer_loop has 2 inner loops, allow there to
be an extra basic block which decides which of the
two loops to use using LOOP_VECTORIZED. */
if (!single_pred_p (e->src)
|| single_pred (e->src) != outer_loop->header)
continue;
}
else
continue;
}
}
for (gphi_iterator gsi = gsi_start_phis (bb); !gsi_end_p (gsi); for (gphi_iterator gsi = gsi_start_phis (bb); !gsi_end_p (gsi);
gsi_next (&gsi)) gsi_next (&gsi))
rename_use_op (PHI_ARG_DEF_PTR_FROM_EDGE (gsi.phi (), e)); rename_use_op (PHI_ARG_DEF_PTR_FROM_EDGE (gsi.phi (), e));
......
...@@ -465,6 +465,7 @@ fold_loop_vectorized_call (gimple *g, tree value) ...@@ -465,6 +465,7 @@ fold_loop_vectorized_call (gimple *g, tree value)
update_stmt (use_stmt); update_stmt (use_stmt);
} }
} }
/* Set the uids of all the statements in basic blocks inside loop /* Set the uids of all the statements in basic blocks inside loop
represented by LOOP_VINFO. LOOP_VECTORIZED_CALL is the internal represented by LOOP_VINFO. LOOP_VECTORIZED_CALL is the internal
call guarding the loop which has been if converted. */ call guarding the loop which has been if converted. */
...@@ -477,9 +478,22 @@ set_uid_loop_bbs (loop_vec_info loop_vinfo, gimple *loop_vectorized_call) ...@@ -477,9 +478,22 @@ set_uid_loop_bbs (loop_vec_info loop_vinfo, gimple *loop_vectorized_call)
struct loop *scalar_loop = get_loop (cfun, tree_to_shwi (arg)); struct loop *scalar_loop = get_loop (cfun, tree_to_shwi (arg));
LOOP_VINFO_SCALAR_LOOP (loop_vinfo) = scalar_loop; LOOP_VINFO_SCALAR_LOOP (loop_vinfo) = scalar_loop;
gcc_checking_assert (vect_loop_vectorized_call gcc_checking_assert (vect_loop_vectorized_call (scalar_loop)
(LOOP_VINFO_SCALAR_LOOP (loop_vinfo))
== loop_vectorized_call); == loop_vectorized_call);
/* If we are going to vectorize outer loop, prevent vectorization
of the inner loop in the scalar loop - either the scalar loop is
thrown away, so it is a wasted work, or is used only for
a few iterations. */
if (scalar_loop->inner)
{
gimple *g = vect_loop_vectorized_call (scalar_loop->inner);
if (g)
{
arg = gimple_call_arg (g, 0);
get_loop (cfun, tree_to_shwi (arg))->dont_vectorize = true;
fold_loop_vectorized_call (g, boolean_false_node);
}
}
bbs = get_loop_body (scalar_loop); bbs = get_loop_body (scalar_loop);
for (i = 0; i < scalar_loop->num_nodes; i++) for (i = 0; i < scalar_loop->num_nodes; i++)
{ {
...@@ -534,14 +548,59 @@ vectorize_loops (void) ...@@ -534,14 +548,59 @@ vectorize_loops (void)
only over initial loops skipping newly generated ones. */ only over initial loops skipping newly generated ones. */
FOR_EACH_LOOP (loop, 0) FOR_EACH_LOOP (loop, 0)
if (loop->dont_vectorize) if (loop->dont_vectorize)
any_ifcvt_loops = true;
else if ((flag_tree_loop_vectorize
&& optimize_loop_nest_for_speed_p (loop))
|| loop->force_vectorize)
{ {
loop_vec_info loop_vinfo, orig_loop_vinfo = NULL; any_ifcvt_loops = true;
gimple *loop_vectorized_call = vect_loop_vectorized_call (loop); /* If-conversion sometimes versions both the outer loop
vectorize_epilogue: (for the case when outer loop vectorization might be
desirable) as well as the inner loop in the scalar version
of the loop. So we have:
if (LOOP_VECTORIZED (1, 3))
{
loop1
loop2
}
else
loop3 (copy of loop1)
if (LOOP_VECTORIZED (4, 5))
loop4 (copy of loop2)
else
loop5 (copy of loop4)
If FOR_EACH_LOOP gives us loop3 first (which has
dont_vectorize set), make sure to process loop1 before loop4;
so that we can prevent vectorization of loop4 if loop1
is successfully vectorized. */
if (loop->inner)
{
gimple *loop_vectorized_call
= vect_loop_vectorized_call (loop);
if (loop_vectorized_call
&& vect_loop_vectorized_call (loop->inner))
{
tree arg = gimple_call_arg (loop_vectorized_call, 0);
struct loop *vector_loop
= get_loop (cfun, tree_to_shwi (arg));
if (vector_loop && vector_loop != loop)
{
loop = vector_loop;
/* Make sure we don't vectorize it twice. */
loop->dont_vectorize = true;
goto try_vectorize;
}
}
}
}
else
{
loop_vec_info loop_vinfo, orig_loop_vinfo;
gimple *loop_vectorized_call;
try_vectorize:
if (!((flag_tree_loop_vectorize
&& optimize_loop_nest_for_speed_p (loop))
|| loop->force_vectorize))
continue;
orig_loop_vinfo = NULL;
loop_vectorized_call = vect_loop_vectorized_call (loop);
vectorize_epilogue:
vect_location = find_loop_location (loop); vect_location = find_loop_location (loop);
if (LOCATION_LOCUS (vect_location) != UNKNOWN_LOCATION if (LOCATION_LOCUS (vect_location) != UNKNOWN_LOCATION
&& dump_enabled_p ()) && dump_enabled_p ())
...@@ -595,6 +654,12 @@ vectorize_epilogue: ...@@ -595,6 +654,12 @@ vectorize_epilogue:
ret |= TODO_cleanup_cfg; ret |= TODO_cleanup_cfg;
} }
} }
/* If outer loop vectorization fails for LOOP_VECTORIZED guarded
loop, don't vectorize its inner loop; we'll attempt to
vectorize LOOP_VECTORIZED guarded inner loop of the scalar
loop version. */
if (loop_vectorized_call && loop->inner)
loop->inner->dont_vectorize = true;
continue; continue;
} }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment