Commit f8934be7 by Jan Hubicka Committed by Jan Hubicka

loop-unroll.c: (decide_unrolling_and_peeling): Rename to


	* loop-unroll.c: (decide_unrolling_and_peeling): Rename to
	(decide_unrolling): ... this one.
	(peel_loops_completely): Remove.
	(decide_peel_simple): Remove.
	(decide_peel_once_rolling): Remove.
	(decide_peel_completely): Remove.
	(peel_loop_simple): Remove.
	(peel_loop_completely): Remove.
	(unroll_and_peel_loops): Rename to ...
	(unroll_loops): ... this one; handle only unrolling.
	* cfgloop.h (lpt_dec): Remove LPT_PEEL_COMPLETELY and
	LPT_PEEL_SIMPLE.
	(UAP_PEEL): Remove.
	(unroll_and_peel_loops): Remove.
	(unroll_loops): New.
	* passes.def: Replace
	pass_rtl_unroll_and_peel_loops by pass_rtl_unroll_loops.
	* loop-init.c (gate_rtl_unroll_and_peel_loops,
	rtl_unroll_and_peel_loops): Rename to ...
	(gate_rtl_unroll_loops, rtl_unroll_loops): ... these; update.
	(pass_rtl_unroll_and_peel_loops): Rename to ...
	(pass_rtl_unroll_loops): ... this one.
	* tree-pass.h (make_pass_rtl_unroll_and_peel_loops): Remove.
	(make_pass_rtl_unroll_loops): New.
	* tree-ssa-loop-ivcanon.c: (estimated_peeled_sequence_size, try_peel_loop): New.
	(canonicalize_loop_induction_variables): Update.

	* gcc.dg/tree-prof/peel-1.c: Update.
	* gcc.dg/tree-prof/unroll-1.c: Update.
	* gcc.dg/gcc.dg/unroll_1.c: Update.
	* gcc.dg/gcc.dg/unroll_2.c: Update.
	* gcc.dg/gcc.dg/unroll_3.c: Update.
	* gcc.dg/gcc.dg/unroll_4.c: Update.

From-SVN: r216238
parent fa7fa585
2014-10-14 Jan Hubicka <hubicka@ucw.cz>
* loop-unroll.c: (decide_unrolling_and_peeling): Rename to
(decide_unrolling): ... this one.
(peel_loops_completely): Remove.
(decide_peel_simple): Remove.
(decide_peel_once_rolling): Remove.
(decide_peel_completely): Remove.
(peel_loop_simple): Remove.
(peel_loop_completely): Remove.
(unroll_and_peel_loops): Rename to ...
(unroll_loops): ... this one; handle only unrolling.
* cfgloop.h (lpt_dec): Remove LPT_PEEL_COMPLETELY and
LPT_PEEL_SIMPLE.
(UAP_PEEL): Remove.
(unroll_and_peel_loops): Remove.
(unroll_loops): New.
* passes.def: Replace
pass_rtl_unroll_and_peel_loops by pass_rtl_unroll_loops.
* loop-init.c (gate_rtl_unroll_and_peel_loops,
rtl_unroll_and_peel_loops): Rename to ...
(gate_rtl_unroll_loops, rtl_unroll_loops): ... these; update.
(pass_rtl_unroll_and_peel_loops): Rename to ...
(pass_rtl_unroll_loops): ... this one.
* tree-pass.h (make_pass_rtl_unroll_and_peel_loops): Remove.
(make_pass_rtl_unroll_loops): New.
* tree-ssa-loop-ivcanon.c: (estimated_peeled_sequence_size, try_peel_loop): New.
(canonicalize_loop_induction_variables): Update.
2014-10-14 Max Filippov <jcmvbkbc@gmail.com>
* config/xtensa/xtensa.h (TARGET_HARD_FLOAT_POSTINC): new macro.
......@@ -30,8 +30,6 @@ along with GCC; see the file COPYING3. If not see
enum lpt_dec
{
LPT_NONE,
LPT_PEEL_COMPLETELY,
LPT_PEEL_SIMPLE,
LPT_UNROLL_CONSTANT,
LPT_UNROLL_RUNTIME,
LPT_UNROLL_STUPID
......@@ -731,12 +729,11 @@ extern void loop_optimizer_finalize (void);
/* Optimization passes. */
enum
{
UAP_PEEL = 1, /* Enables loop peeling. */
UAP_UNROLL = 2, /* Enables unrolling of loops if it seems profitable. */
UAP_UNROLL_ALL = 4 /* Enables unrolling of all loops. */
UAP_UNROLL = 1, /* Enables unrolling of loops if it seems profitable. */
UAP_UNROLL_ALL = 2 /* Enables unrolling of all loops. */
};
extern void unroll_and_peel_loops (int);
extern void unroll_loops (int);
extern void doloop_optimize_loops (void);
extern void move_loop_invariants (void);
extern void scale_loop_profile (struct loop *loop, int scale, gcov_type iteration_bound);
......
......@@ -357,7 +357,6 @@ pass_loop2::gate (function *fun)
if (optimize > 0
&& (flag_move_loop_invariants
|| flag_unswitch_loops
|| flag_peel_loops
|| flag_unroll_loops
#ifdef HAVE_doloop_end
|| (flag_branch_on_count_reg && HAVE_doloop_end)
......@@ -537,7 +536,7 @@ make_pass_rtl_move_loop_invariants (gcc::context *ctxt)
namespace {
const pass_data pass_data_rtl_unroll_and_peel_loops =
const pass_data pass_data_rtl_unroll_loops =
{
RTL_PASS, /* type */
"loop2_unroll", /* name */
......@@ -550,11 +549,11 @@ const pass_data pass_data_rtl_unroll_and_peel_loops =
0, /* todo_flags_finish */
};
class pass_rtl_unroll_and_peel_loops : public rtl_opt_pass
class pass_rtl_unroll_loops : public rtl_opt_pass
{
public:
pass_rtl_unroll_and_peel_loops (gcc::context *ctxt)
: rtl_opt_pass (pass_data_rtl_unroll_and_peel_loops, ctxt)
pass_rtl_unroll_loops (gcc::context *ctxt)
: rtl_opt_pass (pass_data_rtl_unroll_loops, ctxt)
{}
/* opt_pass methods: */
......@@ -565,10 +564,10 @@ public:
virtual unsigned int execute (function *);
}; // class pass_rtl_unroll_and_peel_loops
}; // class pass_rtl_unroll_loops
unsigned int
pass_rtl_unroll_and_peel_loops::execute (function *fun)
pass_rtl_unroll_loops::execute (function *fun)
{
if (number_of_loops (fun) > 1)
{
......@@ -576,14 +575,12 @@ pass_rtl_unroll_and_peel_loops::execute (function *fun)
if (dump_file)
df_dump (dump_file);
if (flag_peel_loops)
flags |= UAP_PEEL;
if (flag_unroll_loops)
flags |= UAP_UNROLL;
if (flag_unroll_all_loops)
flags |= UAP_UNROLL_ALL;
unroll_and_peel_loops (flags);
unroll_loops (flags);
}
return 0;
}
......@@ -591,9 +588,9 @@ pass_rtl_unroll_and_peel_loops::execute (function *fun)
} // anon namespace
rtl_opt_pass *
make_pass_rtl_unroll_and_peel_loops (gcc::context *ctxt)
make_pass_rtl_unroll_loops (gcc::context *ctxt)
{
return new pass_rtl_unroll_and_peel_loops (ctxt);
return new pass_rtl_unroll_loops (ctxt);
}
......
......@@ -359,7 +359,7 @@ along with GCC; see the file COPYING3. If not see
PUSH_INSERT_PASSES_WITHIN (pass_loop2)
NEXT_PASS (pass_rtl_loop_init);
NEXT_PASS (pass_rtl_move_loop_invariants);
NEXT_PASS (pass_rtl_unroll_and_peel_loops);
NEXT_PASS (pass_rtl_unroll_loops);
NEXT_PASS (pass_rtl_doloop);
NEXT_PASS (pass_rtl_loop_done);
TERMINATE_PASS_LIST ()
......
2014-10-14 Jan Hubicka <hubicka@ucw.cz>
* gcc.dg/tree-prof/peel-1.c: Update.
* gcc.dg/tree-prof/unroll-1.c: Update.
* gcc.dg/gcc.dg/unroll_1.c: Update.
* gcc.dg/gcc.dg/unroll_2.c: Update.
* gcc.dg/gcc.dg/unroll_3.c: Update.
* gcc.dg/gcc.dg/unroll_4.c: Update.
2014-10-14 DJ Delorie <dj@redhat.com>
* g++.dg/abi/mangle64.C: New.
......
/* { dg-options "-O3 -fdump-rtl-loop2_unroll -fno-unroll-loops -fpeel-loops" } */
/* { dg-options "-O3 -fdump-tree-cunroll-details -fno-unroll-loops -fpeel-loops" } */
void abort();
int a[1000];
......
/* { dg-do compile } */
/* { dg-options "-O2 -fdump-rtl-loop2_unroll=stderr -fno-peel-loops -fno-tree-vrp -fdisable-tree-cunroll -fdisable-tree-cunrolli -fenable-rtl-loop2 -fenable-rtl-loop2_unroll" } */
/* { dg-options "-O2 -fdump-tree-cunrolli-details=stderr -fno-peel-loops -fno-tree-vrp -fdisable-tree-cunroll -fenable-tree-cunrolli" } */
unsigned a[100], b[100];
inline void bar()
......@@ -11,7 +11,7 @@ int foo(void)
{
int i;
bar();
for (i = 0; i < 2; i++) /* { dg-message "note: loop turned into non-loop; it never loops" } */
for (i = 0; i < 2; i++) /* { dg-message "note: loop with 3 iterations completely unrolled" } */
{
a[i]= b[i] + 1;
}
......@@ -21,7 +21,7 @@ int foo(void)
int foo2(void)
{
int i;
for (i = 0; i < 2; i++) /* { dg-message "note: loop turned into non-loop; it never loops" } */
for (i = 0; i < 2; i++) /* { dg-message "note: loop with 3 iterations completely unrolled" } */
{
a[i]= b[i] + 1;
}
......
/* { dg-do compile } */
/* { dg-options "-O2 -fdump-rtl-loop2_unroll -fno-peel-loops -fno-tree-vrp -fdisable-tree-cunroll=foo -fdisable-tree-cunrolli=foo -fenable-rtl-loop2_unroll" } */
/* { dg-options "-O2 -fdump-tree-cunrolli-details -fno-peel-loops -fno-tree-vrp -fdisable-tree-cunrolli=foo -fenable-tree-cunrolli=foo" } */
unsigned a[100], b[100];
inline void bar()
......@@ -28,5 +28,5 @@ int foo2(void)
return 1;
}
/* { dg-final { scan-rtl-dump-times "loop turned into non-loop; it never loops" 1 "loop2_unroll" } } */
/* { dg-final { cleanup-rtl-dump "loop2_unroll" } } */
/* { dg-final { scan-tree-dump-times "loop with 3 iterations completely unrolled" 1 "cunrolli" } } */
/* { dg-final { cleanup-tree-dump "cunrolli" } } */
/* { dg-do compile } */
/* { dg-options "-O2 -fdump-rtl-loop2_unroll -fno-peel-loops -fno-tree-vrp -fdisable-tree-cunroll -fdisable-tree-cunrolli -fenable-rtl-loop2_unroll=foo" } */
/* { dg-options "-O2 -fdump-tree-cunrolli-details -fno-peel-loops -fno-tree-vrp -fdisable-tree-cunroll -fenable-tree-cunrolli=foo -fdisable-tree-cunrolli=foo2" } */
unsigned a[100], b[100];
inline void bar()
......@@ -28,5 +28,5 @@ int foo2(void)
return 1;
}
/* { dg-final { scan-rtl-dump-times "loop turned into non-loop; it never loops" 1 "loop2_unroll" } } */
/* { dg-final { cleanup-rtl-dump "loop2_unroll" } } */
/* { dg-final { scan-tree-dump-times "loop with 3 iterations completely unrolled" 1 "cunrolli" } } */
/* { dg-final { cleanup-tree-dump "cunrolli" } } */
/* { dg-do compile } */
/* { dg-options "-O2 -fdump-rtl-loop2_unroll -fno-peel-loops -fno-tree-vrp -fdisable-tree-cunroll -fdisable-tree-cunrolli -fenable-rtl-loop2_unroll=foo2" } */
/* { dg-options "-O2 -fdump-tree-cunrolli-details -fno-peel-loops -fno-tree-vrp -fdisable-tree-cunroll -fenable-tree-cunrolli=foo2 -fdisable-tree-cunrolli=foo" } */
unsigned a[100], b[100];
inline void bar()
......@@ -28,5 +28,5 @@ int foo2(void)
return 1;
}
/* { dg-final { scan-rtl-dump-times "loop turned into non-loop; it never loops" 1 "loop2_unroll" } } */
/* { dg-final { cleanup-rtl-dump "loop2_unroll" } } */
/* { dg-final { scan-tree-dump-times "loop with 3 iterations completely unrolled" 1 "cunrolli" } } */
/* { dg-final { cleanup-tree-dump "cunrolli" } } */
......@@ -504,7 +504,7 @@ extern rtl_opt_pass *make_pass_outof_cfg_layout_mode (gcc::context *ctxt);
extern rtl_opt_pass *make_pass_loop2 (gcc::context *ctxt);
extern rtl_opt_pass *make_pass_rtl_loop_init (gcc::context *ctxt);
extern rtl_opt_pass *make_pass_rtl_move_loop_invariants (gcc::context *ctxt);
extern rtl_opt_pass *make_pass_rtl_unroll_and_peel_loops (gcc::context *ctxt);
extern rtl_opt_pass *make_pass_rtl_unroll_loops (gcc::context *ctxt);
extern rtl_opt_pass *make_pass_rtl_doloop (gcc::context *ctxt);
extern rtl_opt_pass *make_pass_rtl_loop_done (gcc::context *ctxt);
......
......@@ -28,9 +28,12 @@ along with GCC; see the file COPYING3. If not see
variables. In that case the created optimization possibilities are likely
to pay up.
Additionally in case we detect that it is beneficial to unroll the
loop completely, we do it right here to expose the optimization
possibilities to the following passes. */
We also perform
- complette unrolling (or peeling) when the loops is rolling few enough
times
- simple peeling (i.e. copying few initial iterations prior the loop)
when number of iteration estimate is known (typically by the profile
info). */
#include "config.h"
#include "system.h"
......@@ -657,11 +660,12 @@ try_unroll_loop_completely (struct loop *loop,
HOST_WIDE_INT maxiter,
location_t locus)
{
unsigned HOST_WIDE_INT n_unroll, ninsns, max_unroll, unr_insns;
unsigned HOST_WIDE_INT n_unroll = 0, ninsns, max_unroll, unr_insns;
gimple cond;
struct loop_size size;
bool n_unroll_found = false;
edge edge_to_cancel = NULL;
int report_flags = MSG_OPTIMIZED_LOCATIONS | TDF_RTL | TDF_DETAILS;
/* See if we proved number of iterations to be low constant.
......@@ -821,6 +825,8 @@ try_unroll_loop_completely (struct loop *loop,
loop->num);
return false;
}
dump_printf_loc (report_flags, locus,
"loop turned into non-loop; it never loops.\n");
initialize_original_copy_tables ();
wont_exit = sbitmap_alloc (n_unroll + 1);
......@@ -902,6 +908,133 @@ try_unroll_loop_completely (struct loop *loop,
return true;
}
/* Return number of instructions after peeling. */
static unsigned HOST_WIDE_INT
estimated_peeled_sequence_size (struct loop_size *size,
unsigned HOST_WIDE_INT npeel)
{
return MAX (npeel * (HOST_WIDE_INT) (size->overall
- size->eliminated_by_peeling), 1);
}
/* If the loop is expected to iterate N times and is
small enough, duplicate the loop body N+1 times before
the loop itself. This way the hot path will never
enter the loop.
Parameters are the same as for try_unroll_loops_completely */
static bool
try_peel_loop (struct loop *loop,
edge exit, tree niter,
HOST_WIDE_INT maxiter)
{
int npeel;
struct loop_size size;
int peeled_size;
sbitmap wont_exit;
unsigned i;
vec<edge> to_remove = vNULL;
edge e;
/* If the iteration bound is known and large, then we can safely eliminate
the check in peeled copies. */
if (TREE_CODE (niter) != INTEGER_CST)
exit = NULL;
if (!flag_peel_loops || PARAM_VALUE (PARAM_MAX_PEEL_TIMES) <= 0)
return false;
/* Peel only innermost loops. */
if (loop->inner)
{
if (dump_file)
fprintf (dump_file, "Not peeling: outer loop\n");
return false;
}
if (!optimize_loop_for_speed_p (loop))
{
if (dump_file)
fprintf (dump_file, "Not peeling: cold loop\n");
return false;
}
/* Check if there is an estimate on the number of iterations. */
npeel = estimated_loop_iterations_int (loop);
if (npeel < 0)
{
if (dump_file)
fprintf (dump_file, "Not peeling: number of iterations is not "
"estimated\n");
return false;
}
if (maxiter >= 0 && maxiter <= npeel)
{
if (dump_file)
fprintf (dump_file, "Not peeling: upper bound is known so can "
"unroll complettely\n");
return false;
}
/* We want to peel estimated number of iterations + 1 (so we never
enter the loop on quick path). Check against PARAM_MAX_PEEL_TIMES
and be sure to avoid overflows. */
if (npeel > PARAM_VALUE (PARAM_MAX_PEEL_TIMES) - 1)
{
if (dump_file)
fprintf (dump_file, "Not peeling: rolls too much "
"(%i + 1 > --param max-peel-times)\n", npeel);
return false;
}
npeel++;
/* Check peeled loops size. */
tree_estimate_loop_size (loop, exit, NULL, &size,
PARAM_VALUE (PARAM_MAX_PEELED_INSNS));
if ((peeled_size = estimated_peeled_sequence_size (&size, npeel))
> PARAM_VALUE (PARAM_MAX_PEELED_INSNS))
{
if (dump_file)
fprintf (dump_file, "Not peeling: peeled sequence size is too large "
"(%i insns > --param max-peel-insns)", peeled_size);
return false;
}
/* Duplicate possibly eliminating the exits. */
initialize_original_copy_tables ();
wont_exit = sbitmap_alloc (npeel + 1);
bitmap_ones (wont_exit);
bitmap_clear_bit (wont_exit, 0);
if (!gimple_duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
npeel, wont_exit,
exit, &to_remove,
DLTHE_FLAG_UPDATE_FREQ
| DLTHE_FLAG_COMPLETTE_PEEL))
{
free_original_copy_tables ();
free (wont_exit);
return false;
}
FOR_EACH_VEC_ELT (to_remove, i, e)
{
bool ok = remove_path (e);
gcc_assert (ok);
}
free (wont_exit);
free_original_copy_tables ();
if (dump_file && (dump_flags & TDF_DETAILS))
{
fprintf (dump_file, "Peeled loop %d, %i times.\n",
loop->num, npeel);
}
if (loop->any_upper_bound)
loop->nb_iterations_upper_bound -= npeel;
loop->nb_iterations_estimate = 0;
/* Make sure to mark loop cold so we do not try to peel it more. */
scale_loop_profile (loop, 1, 0);
loop->header->count = 0;
return true;
}
/* Adds a canonical induction variable to LOOP if suitable.
CREATE_IV is true if we may create a new iv. UL determines
which loops we are allowed to completely unroll. If TRY_EVAL is true, we try
......@@ -981,6 +1114,9 @@ canonicalize_loop_induction_variables (struct loop *loop,
&& exit && just_once_each_iteration_p (loop, exit->src))
create_canonical_iv (loop, exit, niter);
if (ul == UL_ALL)
modified |= try_peel_loop (loop, exit, niter, maxiter);
return modified;
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment