Commit 519cac4a by Jan Hubicka Committed by Jan Hubicka

cfgloopanal.c (get_loop_hot_path): New function.

	* cfgloopanal.c (get_loop_hot_path): New function.
	* tree-ssa-lop-ivcanon.c (struct loop_size): Add CONSTANT_IV,
	NUM_NON_PURE_CALLS_ON_HOT_PATH, NUM_PURE_CALLS_ON_HOT_PATH,
	NUM_BRANCHES_ON_HOT_PATH.
	(tree_estimate_loop_size): Compute the new values.
	(try_unroll_loop_completely): Disable unrolling of loops with only
	calls or too many branches.
	(tree_unroll_loops_completely): Deal also with outer loops of hot loops.
	* cfgloop.h (get_loop_hot_path): Declare.
	* params.def (PARAM_MAX_PEEL_BRANCHES): New parameters.
	* invoke.texi (max-peel-branches): Document.

	* gcc.dg/tree-ssa/loop-1.c: Make to look like a good unroling candidate still.
	* gcc.dg/tree-ssa/loop-23.c: Likewise.
	* gcc.dg/tree-ssa/cunroll-1.c: Unrolling now happens early.
	* gcc.dg/tree-prof/unroll-1.c: Remove confused dg-options.

From-SVN: r193246
parent 425b784f
......@@ -714,6 +714,7 @@ extern void doloop_optimize_loops (void);
extern void move_loop_invariants (void);
extern bool finite_loop_p (struct loop *);
extern void scale_loop_profile (struct loop *loop, int scale, int iteration_bound);
extern VEC (basic_block, heap) * get_loop_hot_path (const struct loop *loop);
/* Returns the outermost loop of the loop nest that contains LOOP.*/
static inline struct loop *
......
......@@ -483,3 +483,36 @@ single_likely_exit (struct loop *loop)
VEC_free (edge, heap, exits);
return found;
}
/* Gets basic blocks of a LOOP. Header is the 0-th block, rest is in dfs
order against direction of edges from latch. Specially, if
header != latch, latch is the 1-st block. */
VEC (basic_block, heap) *
get_loop_hot_path (const struct loop *loop)
{
basic_block bb = loop->header;
VEC (basic_block, heap) *path = NULL;
bitmap visited = BITMAP_ALLOC (NULL);
while (true)
{
edge_iterator ei;
edge e;
edge best = NULL;
VEC_safe_push (basic_block, heap, path, bb);
bitmap_set_bit (visited, bb->index);
FOR_EACH_EDGE (e, ei, bb->succs)
if ((!best || e->probability > best->probability)
&& !loop_exit_edge_p (loop, e)
&& !bitmap_bit_p (visited, e->dest->index))
best = e;
if (!best || best->dest == loop->header)
break;
bb = best->dest;
}
BITMAP_FREE (visited);
return path;
}
......@@ -9085,6 +9085,9 @@ the loop code is peeled.
@item max-peel-times
The maximum number of peelings of a single loop.
@item max-peel-branches
The maximum number of branches on the hot path through the peeled sequence.
@item max-completely-peeled-insns
The maximum number of insns of a completely peeled loop.
......
......@@ -291,6 +291,11 @@ DEFPARAM(PARAM_MAX_PEEL_TIMES,
"max-peel-times",
"The maximum number of peelings of a single loop",
16, 0, 0)
/* The maximum number of peelings of a single loop that is peeled completely. */
DEFPARAM(PARAM_MAX_PEEL_BRANCHES,
"max-peel-branches",
"The maximum number of branches on the path through the peeled sequence",
32, 0, 0)
/* The maximum number of insns of a peeled loop. */
DEFPARAM(PARAM_MAX_COMPLETELY_PEELED_INSNS,
"max-completely-peeled-insns",
......
2012-11-06 Jan Hubicka <jh@suse.cz>
* gcc.dg/tree-ssa/loop-1.c: Make to look like a good unroling candidate still.
* gcc.dg/tree-ssa/loop-23.c: Likewise.
* gcc.dg/tree-ssa/cunroll-1.c: Unrolling now happens early.
* gcc.dg/tree-prof/unroll-1.c: Remove confused dg-options.
2012-11-06 David Edelsohn <dje.gcc@gmail.com>
* const-uniq-1.c: Expand regex to match AIX XCOFF labels.
......
......@@ -21,4 +21,3 @@ main()
}
/* { dg-final-use { scan-rtl-dump "Considering unrolling loop with constant number of iterations" "loop2_unroll" } } */
/* { dg-final-use { cleanup-rtl-dump "Not unrolling loop, doesn't roll" } } */
/* { dg-options "-O3 -fdump-rtl-loop2_unroll -funroll-loops -fno-peel-loops" } */
/* { dg-do compile } */
/* { dg-options "-O3 -fdump-tree-cunroll-details" } */
/* { dg-options "-O3 -fdump-tree-cunrolli-details" } */
int a[2];
test(int c)
{
......@@ -10,4 +10,4 @@ test(int c)
/* Array bounds says the loop will not roll much. */
/* { dg-final { scan-tree-dump "Unrolled loop 1 completely .duplicated 1 times.." "cunroll"} } */
/* { dg-final { scan-tree-dump "Last iteration exit edge was proved true." "cunroll"} } */
/* { dg-final { cleanup-tree-dump "cunroll" } } */
/* { dg-final { cleanup-tree-dump "cunrolli" } } */
......@@ -17,13 +17,16 @@
to the load from the GOT this also contains the name of the funtion so for
each call the function name would appear twice. */
/* { dg-options "-O1 -ftree-loop-ivcanon -funroll-loops -fdump-tree-ivcanon-details -fdump-tree-cunroll-details -fdump-tree-optimized -mno-relax-pic-calls" { target mips*-*-* } } */
void xxx(void)
__attribute__ ((pure))
int foo (int x);
int xxx(void)
{
int x = 45;
int sum;
while (x >>= 1)
foo ();
sum += foo (x) * 2;
return sum;
}
/* We should be able to find out that the loop iterates four times and unroll it completely. */
......
/* { dg-do compile } */
/* { dg-options "-O2 -funroll-loops -fdump-tree-cunroll-details" } */
void bla(int);
__attribute__ ((pure))
int bla(int);
void foo(void)
int foo(void)
{
int i;
int sum;
/* This loop used to appear to be too large for unrolling. */
for (i = 0; i < 4; i++)
{
bla (i);
bla (2*i);
bla (3*i);
bla (4*i);
bla (5*i);
bla (6*i);
bla (7*i);
bla (8*i);
sum += bla (i);
sum += bla (2*i);
sum += bla (3*i);
sum += bla (4*i);
sum += bla (5*i);
sum += bla (6*i);
sum += bla (7*i);
sum += bla (8*i);
}
return sum;
}
/* { dg-final { scan-tree-dump-times "Unrolled loop 1 completely" 1 "cunroll" } } */
......
......@@ -140,6 +140,20 @@ struct loop_size
instructions after exit are not executed. */
int last_iteration;
int last_iteration_eliminated_by_peeling;
/* If some IV computation will become constant. */
bool constant_iv;
/* Number of call stmts that are not a builtin and are pure or const
present on the hot path. */
int num_pure_calls_on_hot_path;
/* Number of call stmts that are not a builtin and are not pure nor const
present on the hot path. */
int num_non_pure_calls_on_hot_path;
/* Number of statements other than calls in the loop. */
int non_call_stmts_on_hot_path;
/* Number of branches seen on the hot path. */
int num_branches_on_hot_path;
};
/* Return true if OP in STMT will be constant after peeling LOOP. */
......@@ -188,7 +202,11 @@ constant_after_peeling (tree op, gimple stmt, struct loop *loop)
return true;
}
/* Computes an estimated number of insns in LOOP, weighted by WEIGHTS.
/* Computes an estimated number of insns in LOOP.
EXIT (if non-NULL) is an exite edge that will be eliminated in all but last
iteration of the loop.
EDGE_TO_CANCEL (if non-NULL) is an non-exit edge eliminated in the last iteration
of loop.
Return results in SIZE, estimate benefits for complete unrolling exiting by EXIT. */
static void
......@@ -198,11 +216,17 @@ tree_estimate_loop_size (struct loop *loop, edge exit, edge edge_to_cancel, stru
gimple_stmt_iterator gsi;
unsigned int i;
bool after_exit;
VEC (basic_block, heap) *path = get_loop_hot_path (loop);
size->overall = 0;
size->eliminated_by_peeling = 0;
size->last_iteration = 0;
size->last_iteration_eliminated_by_peeling = 0;
size->num_pure_calls_on_hot_path = 0;
size->num_non_pure_calls_on_hot_path = 0;
size->non_call_stmts_on_hot_path = 0;
size->num_branches_on_hot_path = 0;
size->constant_iv = 0;
if (dump_file && (dump_flags & TDF_DETAILS))
fprintf (dump_file, "Estimating sizes for loop %i\n", loop->num);
......@@ -221,6 +245,8 @@ tree_estimate_loop_size (struct loop *loop, edge exit, edge edge_to_cancel, stru
gimple stmt = gsi_stmt (gsi);
int num = estimate_num_insns (stmt, &eni_size_weights);
bool likely_eliminated = false;
bool likely_eliminated_last = false;
bool likely_eliminated_peeled = false;
if (dump_file && (dump_flags & TDF_DETAILS))
{
......@@ -231,11 +257,21 @@ tree_estimate_loop_size (struct loop *loop, edge exit, edge edge_to_cancel, stru
/* Look for reasons why we might optimize this stmt away. */
/* Exit conditional. */
if (exit && body[i] == exit->src && stmt == last_stmt (exit->src))
if (exit && body[i] == exit->src
&& stmt == last_stmt (exit->src))
{
if (dump_file && (dump_flags & TDF_DETAILS))
fprintf (dump_file, " Exit condition will be eliminated.\n");
likely_eliminated = true;
fprintf (dump_file, " Exit condition will be eliminated "
"in peeled copies.\n");
likely_eliminated_peeled = true;
}
else if (edge_to_cancel && body[i] == edge_to_cancel->src
&& stmt == last_stmt (edge_to_cancel->src))
{
if (dump_file && (dump_flags & TDF_DETAILS))
fprintf (dump_file, " Exit condition will be eliminated "
"in last copy.\n");
likely_eliminated_last = true;
}
/* Sets of IV variables */
else if (gimple_code (stmt) == GIMPLE_ASSIGN
......@@ -249,19 +285,22 @@ tree_estimate_loop_size (struct loop *loop, edge exit, edge edge_to_cancel, stru
/* Assignments of IV variables. */
else if (gimple_code (stmt) == GIMPLE_ASSIGN
&& TREE_CODE (gimple_assign_lhs (stmt)) == SSA_NAME
&& constant_after_peeling (gimple_assign_rhs1 (stmt), stmt,loop)
&& constant_after_peeling (gimple_assign_rhs1 (stmt), stmt, loop)
&& (gimple_assign_rhs_class (stmt) != GIMPLE_BINARY_RHS
|| constant_after_peeling (gimple_assign_rhs2 (stmt),
stmt, loop)))
{
size->constant_iv = true;
if (dump_file && (dump_flags & TDF_DETAILS))
fprintf (dump_file, " Constant expression will be folded away.\n");
likely_eliminated = true;
}
/* Conditionals. */
else if (gimple_code (stmt) == GIMPLE_COND
&& constant_after_peeling (gimple_cond_lhs (stmt), stmt, loop)
&& constant_after_peeling (gimple_cond_rhs (stmt), stmt, loop))
else if ((gimple_code (stmt) == GIMPLE_COND
&& constant_after_peeling (gimple_cond_lhs (stmt), stmt, loop)
&& constant_after_peeling (gimple_cond_rhs (stmt), stmt, loop))
|| (gimple_code (stmt) == GIMPLE_SWITCH
&& constant_after_peeling (gimple_switch_index (stmt), stmt, loop)))
{
if (dump_file && (dump_flags & TDF_DETAILS))
fprintf (dump_file, " Constant conditional.\n");
......@@ -269,16 +308,49 @@ tree_estimate_loop_size (struct loop *loop, edge exit, edge edge_to_cancel, stru
}
size->overall += num;
if (likely_eliminated)
if (likely_eliminated || likely_eliminated_peeled)
size->eliminated_by_peeling += num;
if (!after_exit)
{
size->last_iteration += num;
if (likely_eliminated)
if (likely_eliminated || likely_eliminated_last)
size->last_iteration_eliminated_by_peeling += num;
}
}
}
while (VEC_length (basic_block, path))
{
basic_block bb = VEC_pop (basic_block, path);
for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
{
gimple stmt = gsi_stmt (gsi);
if (gimple_code (stmt) == GIMPLE_CALL)
{
int flags = gimple_call_flags (stmt);
tree decl = gimple_call_fndecl (stmt);
if (decl && DECL_IS_BUILTIN (decl)
&& is_inexpensive_builtin (decl))
;
else if (flags & (ECF_PURE | ECF_CONST))
size->num_pure_calls_on_hot_path++;
else
size->num_non_pure_calls_on_hot_path++;
size->num_branches_on_hot_path ++;
}
else if (gimple_code (stmt) != GIMPLE_CALL
&& gimple_code (stmt) != GIMPLE_DEBUG)
size->non_call_stmts_on_hot_path++;
if (((gimple_code (stmt) == GIMPLE_COND
&& (!constant_after_peeling (gimple_cond_lhs (stmt), stmt, loop)
|| constant_after_peeling (gimple_cond_rhs (stmt), stmt, loop)))
|| (gimple_code (stmt) == GIMPLE_SWITCH
&& !constant_after_peeling (gimple_switch_index (stmt), stmt, loop)))
&& (!exit || bb != exit->src))
size->num_branches_on_hot_path++;
}
}
VEC_free (basic_block, heap, path);
if (dump_file && (dump_flags & TDF_DETAILS))
fprintf (dump_file, "size: %i-%i, last_iteration: %i-%i\n", size->overall,
size->eliminated_by_peeling, size->last_iteration,
......@@ -644,34 +716,85 @@ try_unroll_loop_completely (struct loop *loop,
(int) unr_insns);
}
/* If the code is going to shrink, we don't need to be extra cautious
on guessing if the unrolling is going to be profitable. */
if (unr_insns
/* If there is IV variable that will become constant, we save
one instruction in the loop prologue we do not account
otherwise. */
<= ninsns + (size.constant_iv != false))
;
/* We unroll only inner loops, because we do not consider it profitable
otheriwse. We still can cancel loopback edge of not rolling loop;
this is always a good idea. */
if (loop->inner && unr_insns > ninsns)
else if (ul == UL_NO_GROWTH)
{
if (dump_file && (dump_flags & TDF_DETAILS))
fprintf (dump_file, "Not unrolling loop %d: size would grow.\n",
loop->num);
return false;
}
/* Outer loops tend to be less interesting candidates for complette
unrolling unless we can do a lot of propagation into the inner loop
body. For now we disable outer loop unrolling when the code would
grow. */
else if (loop->inner)
{
if (dump_file && (dump_flags & TDF_DETAILS))
fprintf (dump_file, "Not unrolling loop %d:"
fprintf (dump_file, "Not unrolling loop %d: "
"it is not innermost and code would grow.\n",
loop->num);
return false;
}
if (unr_insns > ninsns
&& (unr_insns
> (unsigned) PARAM_VALUE (PARAM_MAX_COMPLETELY_PEELED_INSNS)))
/* If there is call on a hot path through the loop, then
there is most probably not much to optimize. */
else if (size.num_non_pure_calls_on_hot_path)
{
if (dump_file && (dump_flags & TDF_DETAILS))
fprintf (dump_file, "Not unrolling loop %d "
"(--param max-completely-peeled-insns limit reached).\n",
fprintf (dump_file, "Not unrolling loop %d: "
"contains call and code would grow.\n",
loop->num);
return false;
}
if (ul == UL_NO_GROWTH
&& unr_insns > ninsns)
/* If there is pure/const call in the function, then we
can still optimize the unrolled loop body if it contains
some other interesting code than the calls and code
storing or cumulating the return value. */
else if (size.num_pure_calls_on_hot_path
/* One IV increment, one test, one ivtmp store
and one usefull stmt. That is about minimal loop
doing pure call. */
&& (size.non_call_stmts_on_hot_path
<= 3 + size.num_pure_calls_on_hot_path))
{
if (dump_file && (dump_flags & TDF_DETAILS))
fprintf (dump_file, "Not unrolling loop %d: size would grow.\n",
fprintf (dump_file, "Not unrolling loop %d: "
"contains just pure calls and code would grow.\n",
loop->num);
return false;
}
/* Complette unrolling is major win when control flow is removed and
one big basic block is created. If the loop contains control flow
the optimization may still be a win because of eliminating the loop
overhead but it also may blow the branch predictor tables.
Limit number of branches on the hot path through the peeled
sequence. */
else if (size.num_branches_on_hot_path * (int)n_unroll
> PARAM_VALUE (PARAM_MAX_PEEL_BRANCHES))
{
if (dump_file && (dump_flags & TDF_DETAILS))
fprintf (dump_file, "Not unrolling loop %d: "
" number of branches on hot path in the unrolled sequence"
" reach --param max-peel-branches limit.\n",
loop->num);
return false;
}
else if (unr_insns
> (unsigned) PARAM_VALUE (PARAM_MAX_COMPLETELY_PEELED_INSNS))
{
if (dump_file && (dump_flags & TDF_DETAILS))
fprintf (dump_file, "Not unrolling loop %d: "
"(--param max-completely-peeled-insns limit reached).\n",
loop->num);
return false;
}
......@@ -689,6 +812,8 @@ try_unroll_loop_completely (struct loop *loop,
{
free_original_copy_tables ();
free (wont_exit);
if (dump_file && (dump_flags & TDF_DETAILS))
fprintf (dump_file, "Failed to duplicate the loop\n");
return false;
}
......@@ -968,7 +1093,7 @@ tree_unroll_loops_completely (bool may_increase_size, bool unroll_outer)
{
struct loop *loop_father = loop_outer (loop);
if (may_increase_size && optimize_loop_for_speed_p (loop)
if (may_increase_size && optimize_loop_nest_for_speed_p (loop)
/* Unroll outermost loops only if asked to do so or they do
not cause code growth. */
&& (unroll_outer || loop_outer (loop_father)))
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment