Commit 28df8730 by Michael Matz Committed by Michael Matz

loop splitting

(approved at https://gcc.gnu.org/ml/gcc-patches/2015-12/msg00648.html )

	* common.opt (-fsplit-loops): New flag.
	* passes.def (pass_loop_split): Add.
	* opts.c (default_options_table): Add OPT_fsplit_loops entry at -O3.
	(enable_fdo_optimizations): Add loop splitting.
	* timevar.def (TV_LOOP_SPLIT): Add.
	* tree-pass.h (make_pass_loop_split): Declare.
	* tree-ssa-loop-manip.h (rewrite_into_loop_closed_ssa_1): Declare.
	* tree-ssa-loop-unswitch.c: Include tree-ssa-loop-manip.h,
	* tree-ssa-loop-split.c: New file.
	* Makefile.in (OBJS): Add tree-ssa-loop-split.o.
	* doc/invoke.texi (fsplit-loops): Document.
	* doc/passes.texi (Loop optimization): Add paragraph about loop
	splitting.

testsuite/
	* gcc.dg/loop-split.c: New test.
	* gcc.dg/tree-ssa/ifc-9.c: Update.
	* gcc.dg/tree-ssa/ifc-10.c: Update.

From-SVN: r241374
parent a2e0054e
2016-10-20 Michael Matz <matz@suse.de>
Loop splitting.
* common.opt (-fsplit-loops): New flag.
* passes.def (pass_loop_split): Add.
* opts.c (default_options_table): Add OPT_fsplit_loops entry at -O3.
(enable_fdo_optimizations): Add loop splitting.
* timevar.def (TV_LOOP_SPLIT): Add.
* tree-pass.h (make_pass_loop_split): Declare.
* tree-ssa-loop-manip.h (rewrite_into_loop_closed_ssa_1): Declare.
* tree-ssa-loop-unswitch.c: Include tree-ssa-loop-manip.h,
* tree-ssa-loop-split.c: New file.
* Makefile.in (OBJS): Add tree-ssa-loop-split.o.
* doc/invoke.texi (fsplit-loops): Document.
* doc/passes.texi (Loop optimization): Add paragraph about loop
splitting.
2016-10-20 Richard Biener <rguenther@suse.de> 2016-10-20 Richard Biener <rguenther@suse.de>
* cgraphunit.c (analyze_functions): Set node->definition to * cgraphunit.c (analyze_functions): Set node->definition to
......
...@@ -1508,6 +1508,7 @@ OBJS = \ ...@@ -1508,6 +1508,7 @@ OBJS = \
tree-ssa-loop-manip.o \ tree-ssa-loop-manip.o \
tree-ssa-loop-niter.o \ tree-ssa-loop-niter.o \
tree-ssa-loop-prefetch.o \ tree-ssa-loop-prefetch.o \
tree-ssa-loop-split.o \
tree-ssa-loop-unswitch.o \ tree-ssa-loop-unswitch.o \
tree-ssa-loop.o \ tree-ssa-loop.o \
tree-ssa-math-opts.o \ tree-ssa-math-opts.o \
......
...@@ -2576,6 +2576,10 @@ funswitch-loops ...@@ -2576,6 +2576,10 @@ funswitch-loops
Common Report Var(flag_unswitch_loops) Optimization Common Report Var(flag_unswitch_loops) Optimization
Perform loop unswitching. Perform loop unswitching.
fsplit-loops
Common Report Var(flag_split_loops) Optimization
Perform loop splitting.
funwind-tables funwind-tables
Common Report Var(flag_unwind_tables) Optimization Common Report Var(flag_unwind_tables) Optimization
Just generate unwind tables for exception handling. Just generate unwind tables for exception handling.
......
...@@ -402,7 +402,7 @@ Objective-C and Objective-C++ Dialects}. ...@@ -402,7 +402,7 @@ Objective-C and Objective-C++ Dialects}.
-fsel-sched-pipelining -fsel-sched-pipelining-outer-loops @gol -fsel-sched-pipelining -fsel-sched-pipelining-outer-loops @gol
-fsemantic-interposition -fshrink-wrap -fshrink-wrap-separate @gol -fsemantic-interposition -fshrink-wrap -fshrink-wrap-separate @gol
-fsignaling-nans @gol -fsignaling-nans @gol
-fsingle-precision-constant -fsplit-ivs-in-unroller @gol -fsingle-precision-constant -fsplit-ivs-in-unroller -fsplit-loops@gol
-fsplit-paths @gol -fsplit-paths @gol
-fsplit-wide-types -fssa-backprop -fssa-phiopt @gol -fsplit-wide-types -fssa-backprop -fssa-phiopt @gol
-fstdarg-opt -fstrict-aliasing @gol -fstdarg-opt -fstrict-aliasing @gol
...@@ -9224,6 +9224,11 @@ Enabled with @option{-O3} and/or @option{-fprofile-use}. ...@@ -9224,6 +9224,11 @@ Enabled with @option{-O3} and/or @option{-fprofile-use}.
Enables the loop invariant motion pass in the RTL loop optimizer. Enabled Enables the loop invariant motion pass in the RTL loop optimizer. Enabled
at level @option{-O1} at level @option{-O1}
@item -fsplit-loops
@opindex fsplit-loops
Split a loop into two if it contains a condition that's always true
for one side of the iteration space and false for the other.
@item -funswitch-loops @item -funswitch-loops
@opindex funswitch-loops @opindex funswitch-loops
Move branches with loop invariant conditions out of the loop, with duplicates Move branches with loop invariant conditions out of the loop, with duplicates
......
...@@ -484,6 +484,12 @@ out of the loops. To achieve this, a duplicate of the loop is created for ...@@ -484,6 +484,12 @@ out of the loops. To achieve this, a duplicate of the loop is created for
each possible outcome of conditional jump(s). The pass is implemented in each possible outcome of conditional jump(s). The pass is implemented in
@file{tree-ssa-loop-unswitch.c}. @file{tree-ssa-loop-unswitch.c}.
Loop splitting. If a loop contains a conditional statement that is
always true for one part of the iteration space and false for the other
this pass splits the loop into two, one dealing with one side the other
only with the other, thereby removing one inner-loop conditional. The
pass is implemented in @file{tree-ssa-loop-split.c}.
The optimizations also use various utility functions contained in The optimizations also use various utility functions contained in
@file{tree-ssa-loop-manip.c}, @file{cfgloop.c}, @file{cfgloopanal.c} and @file{tree-ssa-loop-manip.c}, @file{cfgloop.c}, @file{cfgloopanal.c} and
@file{cfgloopmanip.c}. @file{cfgloopmanip.c}.
......
...@@ -530,6 +530,7 @@ static const struct default_options default_options_table[] = ...@@ -530,6 +530,7 @@ static const struct default_options default_options_table[] =
regardless of them being declared inline. */ regardless of them being declared inline. */
{ OPT_LEVELS_3_PLUS_AND_SIZE, OPT_finline_functions, NULL, 1 }, { OPT_LEVELS_3_PLUS_AND_SIZE, OPT_finline_functions, NULL, 1 },
{ OPT_LEVELS_1_PLUS_NOT_DEBUG, OPT_finline_functions_called_once, NULL, 1 }, { OPT_LEVELS_1_PLUS_NOT_DEBUG, OPT_finline_functions_called_once, NULL, 1 },
{ OPT_LEVELS_3_PLUS, OPT_fsplit_loops, NULL, 1 },
{ OPT_LEVELS_3_PLUS, OPT_funswitch_loops, NULL, 1 }, { OPT_LEVELS_3_PLUS, OPT_funswitch_loops, NULL, 1 },
{ OPT_LEVELS_3_PLUS, OPT_fgcse_after_reload, NULL, 1 }, { OPT_LEVELS_3_PLUS, OPT_fgcse_after_reload, NULL, 1 },
{ OPT_LEVELS_3_PLUS, OPT_ftree_loop_vectorize, NULL, 1 }, { OPT_LEVELS_3_PLUS, OPT_ftree_loop_vectorize, NULL, 1 },
...@@ -1427,6 +1428,8 @@ enable_fdo_optimizations (struct gcc_options *opts, ...@@ -1427,6 +1428,8 @@ enable_fdo_optimizations (struct gcc_options *opts,
opts->x_flag_ipa_bit_cp = value; opts->x_flag_ipa_bit_cp = value;
if (!opts_set->x_flag_predictive_commoning) if (!opts_set->x_flag_predictive_commoning)
opts->x_flag_predictive_commoning = value; opts->x_flag_predictive_commoning = value;
if (!opts_set->x_flag_split_loops)
opts->x_flag_split_loops = value;
if (!opts_set->x_flag_unswitch_loops) if (!opts_set->x_flag_unswitch_loops)
opts->x_flag_unswitch_loops = value; opts->x_flag_unswitch_loops = value;
if (!opts_set->x_flag_gcse_after_reload) if (!opts_set->x_flag_gcse_after_reload)
......
...@@ -269,6 +269,7 @@ along with GCC; see the file COPYING3. If not see ...@@ -269,6 +269,7 @@ along with GCC; see the file COPYING3. If not see
NEXT_PASS (pass_tree_loop_init); NEXT_PASS (pass_tree_loop_init);
NEXT_PASS (pass_tree_unswitch); NEXT_PASS (pass_tree_unswitch);
NEXT_PASS (pass_scev_cprop); NEXT_PASS (pass_scev_cprop);
NEXT_PASS (pass_loop_split);
NEXT_PASS (pass_record_bounds); NEXT_PASS (pass_record_bounds);
NEXT_PASS (pass_loop_distribution); NEXT_PASS (pass_loop_distribution);
NEXT_PASS (pass_copy_prop); NEXT_PASS (pass_copy_prop);
......
2016-10-20 Michael Matz <matz@suse.de>
* gcc.dg/loop-split.c: New test.
* gcc.dg/tree-ssa/ifc-9.c: Update.
* gcc.dg/tree-ssa/ifc-10.c: Update.
2016-10-20 Richard Biener <rguenther@suse.de> 2016-10-20 Richard Biener <rguenther@suse.de>
* gcc.dg/graphite/pr35356-1.c: Update. * gcc.dg/graphite/pr35356-1.c: Update.
......
/* { dg-do run } */
/* { dg-options "-O2 -fsplit-loops -fdump-tree-lsplit-details" } */
#ifdef __cplusplus
extern "C" int printf (const char *, ...);
extern "C" void abort (void);
#else
extern int printf (const char *, ...);
extern void abort (void);
#endif
/* Define TRACE to 1 or 2 to get detailed tracing.
Define SINGLE_TEST to 1 or 2 to get a simple routine with
just one loop, called only one time or with multiple parameters,
to make debugging easier. */
#ifndef TRACE
#define TRACE 0
#endif
#define loop(beg,step,beg2,cond1,cond2) \
do \
{ \
sum = 0; \
for (i = (beg), j = (beg2); (cond1); i+=(step),j+=(step)) \
{ \
if (cond2) { \
if (TRACE > 1) printf ("a: %d %d\n", i, j); \
sum += a[i]; \
} else { \
if (TRACE > 1) printf ("b: %d %d\n", i, j); \
sum += b[i]; \
} \
} \
if (TRACE > 0) printf ("sum: %d\n", sum); \
check = check * 47 + sum; \
} while (0)
#ifndef SINGLE_TEST
unsigned __attribute__((noinline, noclone)) dotest (int beg, int end, int step,
int c, int *a, int *b, int beg2)
{
unsigned check = 0;
int sum;
int i, j;
loop (beg, 1, beg2, i < end, j < c);
loop (beg, 1, beg2, i <= end, j < c);
loop (beg, 1, beg2, i < end, j <= c);
loop (beg, 1, beg2, i <= end, j <= c);
loop (beg, 1, beg2, i < end, j > c);
loop (beg, 1, beg2, i <= end, j > c);
loop (beg, 1, beg2, i < end, j >= c);
loop (beg, 1, beg2, i <= end, j >= c);
beg2 += end-beg;
loop (end, -1, beg2, i >= beg, j >= c);
loop (end, -1, beg2, i >= beg, j > c);
loop (end, -1, beg2, i > beg, j >= c);
loop (end, -1, beg2, i > beg, j > c);
loop (end, -1, beg2, i >= beg, j <= c);
loop (end, -1, beg2, i >= beg, j < c);
loop (end, -1, beg2, i > beg, j <= c);
loop (end, -1, beg2, i > beg, j < c);
return check;
}
#else
int __attribute__((noinline, noclone)) f (int beg, int end, int step,
int c, int *a, int *b, int beg2)
{
int sum = 0;
int i, j;
//for (i = beg, j = beg2; i < end; i += 1, j++ /*step*/)
for (i = end, j = beg2 + (end-beg); i > beg; i += -1, j-- /*step*/)
{
// i - j == X --> i = X + j
// --> i < end == X+j < end == j < end - X
// --> newend = end - (i_init - j_init)
// j < end-X && j < c --> j < min(end-X,c)
// j < end-X && j <= c --> j <= min(end-X-1,c) or j < min(end-X,c+1{OF!})
//if (j < c)
if (j >= c)
printf ("a: %d %d\n", i, j);
/*else
printf ("b: %d %d\n", i, j);*/
/*sum += a[i];
else
sum += b[i];*/
}
return sum;
}
int __attribute__((noinline, noclone)) f2 (int *beg, int *end, int step,
int *c, int *a, int *b, int *beg2)
{
int sum = 0;
int *i, *j;
for (i = beg, j = beg2; i < end; i += 1, j++ /*step*/)
{
if (j <= c)
printf ("%d %d\n", i - beg, j - beg);
/*sum += a[i];
else
sum += b[i];*/
}
return sum;
}
#endif
extern int printf (const char *, ...);
int main ()
{
int a[] = {0,0,0,0,0, 1,2,3,4,5,6,7,8,9, 0,0,0,0,0};
int b[] = {0,0,0,0,0, -1,-2,-3,-4,-5,-6,-7,-8,-9, 0,0,0,0,0,};
int c;
int diff = 0;
unsigned check = 0;
#if defined(SINGLE_TEST) && (SINGLE_TEST == 1)
//dotest (0, 9, 1, -1, a+5, b+5, -1);
//return 0;
f (0, 9, 1, 5, a+5, b+5, -1);
return 0;
#endif
for (diff = -5; diff <= 5; diff++)
{
for (c = -1; c <= 10; c++)
{
#ifdef SINGLE_TEST
int s = f (0, 9, 1, c, a+5, b+5, diff);
//int s = f2 (a+0, a+9, 1, a+c, a+5, b+5, a+diff);
printf ("%d ", s);
#else
if (TRACE > 0)
printf ("check %d %d\n", c, diff);
check = check * 51 + dotest (0, 9, 1, c, a+5, b+5, diff);
#endif
}
//printf ("\n");
}
//printf ("%u\n", check);
if (check != 3213344948)
abort ();
return 0;
}
/* All 16 loops in dotest should be split. */
/* { dg-final { scan-tree-dump-times "Loop split" 16 "lsplit" } } */
/* { dg-do compile } */ /* { dg-do compile } */
/* { dg-options "-Ofast -fdump-tree-ifcvt-stats" } */ /* { dg-options "-Ofast -fno-split-loops -fdump-tree-ifcvt-stats" } */
/* { dg-require-visibility "" } */ /* { dg-require-visibility "" } */
int b[256] = {0}, y; int b[256] = {0}, y;
......
/* { dg-do compile } */ /* { dg-do compile } */
/* { dg-options "-Ofast -fdump-tree-ifcvt-stats" } */ /* { dg-options "-Ofast -fno-split-loops -fdump-tree-ifcvt-stats" } */
/* { dg-require-visibility "" } */ /* { dg-require-visibility "" } */
extern int b[256], y; extern int b[256], y;
......
...@@ -185,6 +185,7 @@ DEFTIMEVAR (TV_LIM , "tree loop invariant motion") ...@@ -185,6 +185,7 @@ DEFTIMEVAR (TV_LIM , "tree loop invariant motion")
DEFTIMEVAR (TV_TREE_LOOP_IVCANON , "tree canonical iv") DEFTIMEVAR (TV_TREE_LOOP_IVCANON , "tree canonical iv")
DEFTIMEVAR (TV_SCEV_CONST , "scev constant prop") DEFTIMEVAR (TV_SCEV_CONST , "scev constant prop")
DEFTIMEVAR (TV_TREE_LOOP_UNSWITCH , "tree loop unswitching") DEFTIMEVAR (TV_TREE_LOOP_UNSWITCH , "tree loop unswitching")
DEFTIMEVAR (TV_LOOP_SPLIT , "loop splitting")
DEFTIMEVAR (TV_COMPLETE_UNROLL , "complete unrolling") DEFTIMEVAR (TV_COMPLETE_UNROLL , "complete unrolling")
DEFTIMEVAR (TV_TREE_PARALLELIZE_LOOPS, "tree parallelize loops") DEFTIMEVAR (TV_TREE_PARALLELIZE_LOOPS, "tree parallelize loops")
DEFTIMEVAR (TV_TREE_VECTORIZATION , "tree vectorization") DEFTIMEVAR (TV_TREE_VECTORIZATION , "tree vectorization")
......
...@@ -367,6 +367,7 @@ extern gimple_opt_pass *make_pass_tree_no_loop (gcc::context *ctxt); ...@@ -367,6 +367,7 @@ extern gimple_opt_pass *make_pass_tree_no_loop (gcc::context *ctxt);
extern gimple_opt_pass *make_pass_tree_loop_init (gcc::context *ctxt); extern gimple_opt_pass *make_pass_tree_loop_init (gcc::context *ctxt);
extern gimple_opt_pass *make_pass_lim (gcc::context *ctxt); extern gimple_opt_pass *make_pass_lim (gcc::context *ctxt);
extern gimple_opt_pass *make_pass_tree_unswitch (gcc::context *ctxt); extern gimple_opt_pass *make_pass_tree_unswitch (gcc::context *ctxt);
extern gimple_opt_pass *make_pass_loop_split (gcc::context *ctxt);
extern gimple_opt_pass *make_pass_predcom (gcc::context *ctxt); extern gimple_opt_pass *make_pass_predcom (gcc::context *ctxt);
extern gimple_opt_pass *make_pass_iv_canon (gcc::context *ctxt); extern gimple_opt_pass *make_pass_iv_canon (gcc::context *ctxt);
extern gimple_opt_pass *make_pass_scev_cprop (gcc::context *ctxt); extern gimple_opt_pass *make_pass_scev_cprop (gcc::context *ctxt);
......
...@@ -24,6 +24,8 @@ typedef void (*transform_callback)(struct loop *, void *); ...@@ -24,6 +24,8 @@ typedef void (*transform_callback)(struct loop *, void *);
extern void create_iv (tree, tree, tree, struct loop *, gimple_stmt_iterator *, extern void create_iv (tree, tree, tree, struct loop *, gimple_stmt_iterator *,
bool, tree *, tree *); bool, tree *, tree *);
extern void rewrite_into_loop_closed_ssa_1 (bitmap, unsigned, int,
struct loop *);
extern void rewrite_into_loop_closed_ssa (bitmap, unsigned); extern void rewrite_into_loop_closed_ssa (bitmap, unsigned);
extern void rewrite_virtuals_into_loop_closed_ssa (struct loop *); extern void rewrite_virtuals_into_loop_closed_ssa (struct loop *);
extern void verify_loop_closed_ssa (bool); extern void verify_loop_closed_ssa (bool);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment