Commit dc236397 by Michael Matz Committed by Michael Matz

re PR tree-optimization/83323 (186.crafty miscompares)

Fix PR83323

	* gimple-loop-jam (unroll_jam_possible_p): Correct test for
	head-controlled loops and loop BBs.
	* common.opt (funroll-and-jam): Remove, instead ...
	(floop-unroll-and-jam): ... reuse this option.
	* opts.c (default_options_table): Use OPT_floop_unroll_and_jam.
	* doc/invoke.texi (-funroll-and-jam): Move docu to ...
	(-floop-unroll-and-jam): ... this option.

testsuite/
	* gcc.dg/pr83323.c: New test.
	* gcc.dg/unroll-and-jam.c: Use -floop-unroll-and-jam.

From-SVN: r255514
parent 450aa0ee
2017-12-08 Michael Matz <matz@suse.de>
Fix PR tree-optimization/83323
* gimple-loop-jam (unroll_jam_possible_p): Correct test for
head-controlled loops and loop BBs.
* common.opt (funroll-and-jam): Remove, instead ...
(floop-unroll-and-jam): ... reuse this option.
* opts.c (default_options_table): Use OPT_floop_unroll_and_jam.
* doc/invoke.texi (-funroll-and-jam): Move docu to ...
(-floop-unroll-and-jam): ... this option.
2017-12-08 Jakub Jelinek <jakub@redhat.com>
* ipa-polymorphic-call.c (noncall_stmt_may_be_vtbl_ptr_store): Fix
......@@ -1512,8 +1512,8 @@ Common Alias(floop-nest-optimize)
Enable loop nest transforms. Same as -floop-nest-optimize.
floop-unroll-and-jam
Common Alias(floop-nest-optimize)
Enable loop nest transforms. Same as -floop-nest-optimize.
Common Report Var(flag_unroll_jam) Optimization
Perform unroll-and-jam on loops.
fgnu-tm
Common Report Var(flag_tm)
......@@ -2695,10 +2695,6 @@ fsplit-loops
Common Report Var(flag_split_loops) Optimization
Perform loop splitting.
funroll-and-jam
Common Report Var(flag_unroll_jam) Optimization
Perform unroll-and-jam on loops.
funwind-tables
Common Report Var(flag_unwind_tables) Optimization
Just generate unwind tables for exception handling.
......
......@@ -437,7 +437,7 @@ Objective-C and Objective-C++ Dialects}.
-ftree-reassoc -ftree-sink -ftree-slsr -ftree-sra @gol
-ftree-switch-conversion -ftree-tail-merge @gol
-ftree-ter -ftree-vectorize -ftree-vrp -funconstrained-commons @gol
-funit-at-a-time -funroll-all-loops -funroll-loops -funroll-and-jam @gol
-funit-at-a-time -funroll-all-loops -funroll-loops @gol
-funsafe-math-optimizations -funswitch-loops @gol
-fipa-ra -fvariable-expansion-in-unroller -fvect-cost-model -fvpt @gol
-fweb -fwhole-program -fwpa -fuse-linker-plugin @gol
......@@ -8511,11 +8511,9 @@ at @option{-O} and higher.
@item -ftree-loop-linear
@itemx -floop-strip-mine
@itemx -floop-block
@itemx -floop-unroll-and-jam
@opindex ftree-loop-linear
@opindex floop-strip-mine
@opindex floop-block
@opindex floop-unroll-and-jam
Perform loop nest optimizations. Same as
@option{-floop-nest-optimize}. To use this code transformation, GCC has
to be configured with @option{--with-isl} to enable the Graphite loop
......@@ -9789,8 +9787,8 @@ for one side of the iteration space and false for the other.
Move branches with loop invariant conditions out of the loop, with duplicates
of the loop on both branches (modified according to result of the condition).
@item -funroll-and-jam
@opindex funroll-and-jam
@item -floop-unroll-and-jam
@opindex floop-unroll-and-jam
Apply unroll and jam transoformations on feasible loops. In a loop
nest this unrolls the outer loop by some factor and fuses the resulting
multiple inner loops.
......@@ -152,7 +152,7 @@ merge_loop_tree (struct loop *loop, struct loop *old)
free (bbs);
}
/* BB exits the outer loop of an unroll-and-jam situation.
/* BB is part of the outer loop of an unroll-and-jam situation.
Check if any statements therein would prevent the transformation. */
static bool
......@@ -160,9 +160,10 @@ bb_prevents_fusion_p (basic_block bb)
{
gimple_stmt_iterator gsi;
/* BB is duplicated by outer unrolling and then all N-1 first copies
move into the body of the fused inner loop. The last copy remains
the exit block of the outer loop and is still outside the inner loop
also after fusion. We can't allow this for some effects of BB:
move into the body of the fused inner loop. If BB exits the outer loop
the last copy still doess so, and the first N-1 copies are cancelled
by loop unrolling, so also after fusion it's the exit block.
But there might be other reasons that prevent fusion:
* stores or unknown side-effects prevent fusion
* loads don't
* computations into SSA names: these aren't problematic. Their
......@@ -204,6 +205,19 @@ unroll_jam_possible_p (struct loop *outer, struct loop *loop)
if (outer->inner != loop || loop->next)
return false;
/* Prevent head-controlled inner loops, that we usually have.
The guard block would need to be accepted
(invariant condition either entering or skipping the loop),
without also accepting arbitrary control flow. When unswitching
ran before us (as with -O3) this won't be a problem because its
outer loop unswitching will have moved out the invariant condition.
If we do that we need to extend fuse_loops() to cope with this
by threading through the (still invariant) copied condition
between the two loop copies. */
if (!dominated_by_p (CDI_DOMINATORS, outer->latch, loop->header))
return false;
/* The number of iterations of the inner loop must be loop invariant
with respect to the outer loop. */
if (!number_of_iterations_exit (loop, single_exit (loop), &niter,
......@@ -218,23 +232,8 @@ unroll_jam_possible_p (struct loop *outer, struct loop *loop)
n = get_loop_body_with_size (outer, bbs, n_basic_blocks_for_fn (cfun));
for (i = 0; i < n; i++)
{
if (bbs[i]->loop_father == outer
&& bbs[i] != outer->latch && bbs[i] != outer->header
&& (!loop_exits_from_bb_p (outer, bbs[i])
|| bb_prevents_fusion_p (bbs[i])))
break;
/* XXX Note that the above disallows head-controlled inner loops,
that we usually have. The guard block would need to be accepted
(invariant condition either entering or skipping the loop),
without also accepting arbitrary control flow. When unswitching
ran before us (as with -O3) this won't be a problem because its
outer loop unswitching will have moved out the invariant condition.
If we do that we need to extend fuse_loops() to cope with this
by threading through the (still invariant) copied condition
between the two loop copies. */
}
if (bbs[i]->loop_father == outer && bb_prevents_fusion_p (bbs[i]))
break;
free (bbs);
if (i != n)
return false;
......
......@@ -536,7 +536,7 @@ static const struct default_options default_options_table[] =
{ OPT_LEVELS_1_PLUS_NOT_DEBUG, OPT_finline_functions_called_once, NULL, 1 },
{ OPT_LEVELS_3_PLUS, OPT_fsplit_loops, NULL, 1 },
{ OPT_LEVELS_3_PLUS, OPT_funswitch_loops, NULL, 1 },
{ OPT_LEVELS_3_PLUS, OPT_funroll_and_jam, NULL, 1 },
{ OPT_LEVELS_3_PLUS, OPT_floop_unroll_and_jam, NULL, 1 },
{ OPT_LEVELS_3_PLUS, OPT_fgcse_after_reload, NULL, 1 },
{ OPT_LEVELS_3_PLUS, OPT_ftree_loop_vectorize, NULL, 1 },
{ OPT_LEVELS_3_PLUS, OPT_ftree_slp_vectorize, NULL, 1 },
......
2017-12-08 Michael Matz <matz@suse.de>
Fix PR tree-optimization/83323
* gcc.dg/pr83323.c: New test.
* gcc.dg/unroll-and-jam.c: Use -floop-unroll-and-jam.
2017-12-08 Richard Biener <rguenther@suse.de>
PR middle-end/81782
......
/* { dg-do run } */
/* { dg-options "-O3 -floop-unroll-and-jam --param unroll-jam-min-percent=0" } */
int x[1024], y[1024];
void __attribute__((noipa)) foo ()
{
for (int i = 0; i < 1024; ++i)
{
x[i] = 0;
for (int j = 0; j < 1024; ++j)
if (!y[j])
x[i] = 1;
}
}
int main()
{
y[1023] = 1;
foo ();
if (x[1] != 1)
__builtin_abort ();
return 0;
}
/* { dg-do run } */
/* { dg-options "-O3 -funroll-and-jam --param unroll-jam-min-percent=0 -fdump-tree-unrolljam-details" } */
/* { dg-options "-O3 -floop-unroll-and-jam --param unroll-jam-min-percent=0 -fdump-tree-unrolljam-details" } */
/* { dg-require-effective-target int32plus } */
#include <stdio.h>
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment