Commit 2b572b3c by Jeff Law Committed by Jeff Law

re PR tree-optimization/68398 (coremark regression due to r229685)

	PR tree-optimization/68398
	* params.def (PARAM_FSM_SCALE_PATH_STMTS): New parameter.
	(PARAM_FSM_SCALE_PATH_BLOCKS): Likewise.
	* tree-ssa-threadbackward.c (fsm_find_control_statement_thread_paths):
	Only count PHIs in the last block in the path.  The others will
	const/copy propagate away.  Add heuristic to allow more irreducible
	subloops to be created when it is likely profitable to do so.

	* tree-ssa-threadbackward.c (fsm_find_control_statement_thread_paths):
	Fix typo in comment.  Use gsi_after_labels and remove the GIMPLE_LABEL
	check from within the loop.  Use gsi_next_nondebug rather than gsi_next.

	PR tree-optimization/68398
	* gcc.dg/tree-ssa/pr66752-3.c: Update expected output.
	* gcc.dg/tree-ssa/ssa-dom-thread-2c.c: Add extra statements on thread
	path to avoid new heuristic allowing more irreducible regions
	* gcc.dg/tree-ssa/ssa-dom-thread-2d.c: Likewise.
	* gcc.dg/tree-ssa/vrp46.c: Likewise.
	* gcc.dg/tree-ssa/ssa-dom-thread-7.c: Update expected output.
	* gcc.dg/tree-ssa/ssa-dom-thread-2g.c: New test.
	* gcc.dg/tree-ssa/ssa-dom-thread-2h.c: Likewise.

From-SVN: r232897
parent fa74a4bc
2016-01-27 Jeff Law <law@redhat.com>
PR tree-optimization/68398
PR tree-optimization/69196
* params.def (PARAM_FSM_SCALE_PATH_STMTS): New parameter.
(PARAM_FSM_SCALE_PATH_BLOCKS): Likewise.
* tree-ssa-threadbackward.c (fsm_find_control_statement_thread_paths):
Only count PHIs in the last block in the path. The others will
const/copy propagate away. Add heuristic to allow more irreducible
subloops to be created when it is likely profitable to do so.
* tree-ssa-threadbackward.c (fsm_find_control_statement_thread_paths):
Fix typo in comment. Use gsi_after_labels and remove the GIMPLE_LABEL
check from within the loop. Use gsi_next_nondebug rather than gsi_next.
2016-01-27 Jakub Jelinek <jakub@redhat.com> 2016-01-27 Jakub Jelinek <jakub@redhat.com>
PR lto/69254 PR lto/69254
......
...@@ -1145,6 +1145,16 @@ DEFPARAM (PARAM_CHKP_MAX_CTOR_SIZE, ...@@ -1145,6 +1145,16 @@ DEFPARAM (PARAM_CHKP_MAX_CTOR_SIZE,
"constructor generated by Pointer Bounds Checker.", "constructor generated by Pointer Bounds Checker.",
5000, 100, 0) 5000, 100, 0)
DEFPARAM (PARAM_FSM_SCALE_PATH_STMTS,
"fsm-scale-path-stmts",
"Scale factor to apply to the number of statements in a threading path when comparing to the number of (scaled) blocks.",
2, 1, 10)
DEFPARAM (PARAM_FSM_SCALE_PATH_BLOCKS,
"fsm-scale-path-blocks",
"Scale factor to apply to the number of blocks in a threading path when comparing to the number of (scaled) statements.",
3, 1, 10)
DEFPARAM (PARAM_MAX_FSM_THREAD_PATH_INSNS, DEFPARAM (PARAM_MAX_FSM_THREAD_PATH_INSNS,
"max-fsm-thread-path-insns", "max-fsm-thread-path-insns",
"Maximum number of instructions to copy when duplicating blocks on a finite state automaton jump thread path.", "Maximum number of instructions to copy when duplicating blocks on a finite state automaton jump thread path.",
......
2016-01-25 Jeff Law <law@redhat.com>
PR tree-optimization/68398
PR tree-optimization/69196
* gcc.dg/tree-ssa/pr66752-3.c: Update expected output.
* gcc.dg/tree-ssa/ssa-dom-thread-2c.c: Add extra statements on thread
path to avoid new heuristic allowing more irreducible regions
* gcc.dg/tree-ssa/ssa-dom-thread-2d.c: Likewise.
* gcc.dg/tree-ssa/vrp46.c: Likewise.
* gcc.dg/tree-ssa/ssa-dom-thread-7.c: Update expected output.
* gcc.dg/tree-ssa/ssa-dom-thread-2g.c: New test.
* gcc.dg/tree-ssa/ssa-dom-thread-2h.c: Likewise.
2016-01-27 Marek Polacek <polacek@redhat.com> 2016-01-27 Marek Polacek <polacek@redhat.com>
PR c/68062 PR c/68062
......
...@@ -32,10 +32,9 @@ foo (int N, int c, int b, int *a) ...@@ -32,10 +32,9 @@ foo (int N, int c, int b, int *a)
pt--; pt--;
} }
/* There are 3 FSM jump threading opportunities, one of which will /* There are 3 FSM jump threading opportunities, all of which will be
get filtered. */ realized, which will eliminate testing of FLAG, completely. */
/* { dg-final { scan-tree-dump-times "Registering FSM" 2 "vrp1"} } */ /* { dg-final { scan-tree-dump-times "Registering FSM" 3 "vrp1"} } */
/* { dg-final { scan-tree-dump-times "FSM would create irreducible loop" 1 "vrp1"} } */
/* There should be no assignments or references to FLAG. */ /* There should be no assignments or references to FLAG. */
/* { dg-final { scan-tree-dump-not "flag" "optimized"} } */ /* { dg-final { scan-tree-dump-not "flag" "optimized"} } */
...@@ -15,6 +15,9 @@ void dont_thread_1 (void) ...@@ -15,6 +15,9 @@ void dont_thread_1 (void)
do do
{ {
bla ();
bla ();
bla ();
if (first) if (first)
foo (); foo ();
else else
......
...@@ -13,6 +13,9 @@ void dont_thread_2 (int first) ...@@ -13,6 +13,9 @@ void dont_thread_2 (int first)
do do
{ {
bla ();
bla ();
bla ();
if (first) if (first)
foo (); foo ();
else else
......
/* { dg-do compile } */
/* { dg-options "-O2 -fdump-tree-vrp1-stats -fdump-tree-dom2-stats" } */
void foo();
void bla();
void bar();
void dont_thread_1 (void)
{
int i = 0;
int first = 1;
do
{
if (first)
foo ();
else
bar ();
first = 0;
bla ();
} while (i++ < 100);
}
/* { dg-final { scan-tree-dump "Jumps threaded: 2" "vrp1"} } */
/* { dg-final { scan-tree-dump "Jumps threaded: 1" "dom2"} } */
/* { dg-do compile } */
/* { dg-options "-O2 -fdump-tree-vrp1-stats -fdump-tree-dom2-stats" } */
void foo();
void bla();
void bar();
/* Avoid threading in the following case, to prevent creating subloops. */
void dont_thread_2 (int first)
{
int i = 0;
do
{
if (first)
foo ();
else
bar ();
first = 0;
bla ();
} while (i++ < 100);
}
/* Peeling off the first iteration would make threading through
the loop latch safe, but we don't do that currently. */
/* { dg-final { scan-tree-dump "Jumps threaded: 1" "vrp1"} } */
/* { dg-final { scan-tree-dump "Jumps threaded: 1" "dom2"} } */
/* { dg-do compile } */ /* { dg-do compile } */
/* { dg-options "-O2 -fdump-tree-vrp1-stats -fdump-tree-dom2-stats -fdump-tree-dom3-stats" } */ /* { dg-options "-O2 -fdump-tree-vrp1-stats -fdump-tree-dom2-stats -fdump-tree-dom3-stats -fdump-tree-vrp2-stats" } */
/* { dg-final { scan-tree-dump "Jumps threaded: 7" "vrp1" } } */ /* { dg-final { scan-tree-dump "Jumps threaded: 19" "vrp1" } } */
/* { dg-final { scan-tree-dump "Jumps threaded: 12" "dom2" } } */ /* { dg-final { scan-tree-dump "Jumps threaded: 12" "dom2" } } */
/* { dg-final { scan-tree-dump "Jumps threaded: 3" "dom3" } } */ /* { dg-final { scan-tree-dump-not "Jumps threaded" "dom3" } } */
/* { dg-final { scan-tree-dump-not "Jumps threaded" "vrp2" } } */
enum STATE { enum STATE {
S0=0, S0=0,
......
...@@ -12,6 +12,8 @@ func_18 ( int t ) ...@@ -12,6 +12,8 @@ func_18 ( int t )
for (0; 1; ++l_889) for (0; 1; ++l_889)
{ {
int t1 = 0; int t1 = 0;
func_98 (0);
func_98 (0);
if (func_81 (1)) if (func_81 (1))
{ {
int rhs = l_895; int rhs = l_895;
......
...@@ -266,7 +266,7 @@ fsm_find_control_statement_thread_paths (tree name, ...@@ -266,7 +266,7 @@ fsm_find_control_statement_thread_paths (tree name,
basic_block bb = (*path)[j]; basic_block bb = (*path)[j];
/* Remember, blocks in the path are stored in opposite order /* Remember, blocks in the path are stored in opposite order
in the PATH array. The last entry in the array reprensents in the PATH array. The last entry in the array represents
the block with an outgoing edge that we will redirect to the the block with an outgoing edge that we will redirect to the
jump threading path. Thus we don't care about that block's jump threading path. Thus we don't care about that block's
loop father, nor how many statements are in that block because loop father, nor how many statements are in that block because
...@@ -280,33 +280,19 @@ fsm_find_control_statement_thread_paths (tree name, ...@@ -280,33 +280,19 @@ fsm_find_control_statement_thread_paths (tree name,
break; break;
} }
for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) for (gsi = gsi_after_labels (bb);
!gsi_end_p (gsi);
gsi_next_nondebug (&gsi))
{ {
gimple *stmt = gsi_stmt (gsi); gimple *stmt = gsi_stmt (gsi);
/* Do not count empty statements and labels. */ /* Do not count empty statements and labels. */
if (gimple_code (stmt) != GIMPLE_NOP if (gimple_code (stmt) != GIMPLE_NOP
&& gimple_code (stmt) != GIMPLE_LABEL
&& !(gimple_code (stmt) == GIMPLE_ASSIGN && !(gimple_code (stmt) == GIMPLE_ASSIGN
&& gimple_assign_rhs_code (stmt) == ASSERT_EXPR) && gimple_assign_rhs_code (stmt) == ASSERT_EXPR)
&& !is_gimple_debug (stmt)) && !is_gimple_debug (stmt))
++n_insns; ++n_insns;
} }
gphi_iterator gsip;
for (gsip = gsi_start_phis (bb);
!gsi_end_p (gsip);
gsi_next (&gsip))
{
gphi *phi = gsip.phi ();
tree dst = gimple_phi_result (phi);
/* We consider any non-virtual PHI as a statement since it
count result in a constant assignment or copy
operation. */
if (!virtual_operand_p (dst))
++n_insns;
}
/* We do not look at the block with the threaded branch /* We do not look at the block with the threaded branch
in this loop. So if any block with a last statement that in this loop. So if any block with a last statement that
is a GIMPLE_SWITCH or GIMPLE_GOTO is seen, then we have a is a GIMPLE_SWITCH or GIMPLE_GOTO is seen, then we have a
...@@ -360,6 +346,24 @@ fsm_find_control_statement_thread_paths (tree name, ...@@ -360,6 +346,24 @@ fsm_find_control_statement_thread_paths (tree name,
== DOMST_NONDOMINATING)) == DOMST_NONDOMINATING))
creates_irreducible_loop = true; creates_irreducible_loop = true;
/* PHIs in the final target and only the final target will need
to be duplicated. So only count those against the number
of statements. */
gphi_iterator gsip;
for (gsip = gsi_start_phis (taken_edge->dest);
!gsi_end_p (gsip);
gsi_next (&gsip))
{
gphi *phi = gsip.phi ();
tree dst = gimple_phi_result (phi);
/* We consider any non-virtual PHI as a statement since it
count result in a constant assignment or copy
operation. */
if (!virtual_operand_p (dst))
++n_insns;
}
if (path_crosses_loops) if (path_crosses_loops)
{ {
if (dump_file && (dump_flags & TDF_DETAILS)) if (dump_file && (dump_flags & TDF_DETAILS))
...@@ -379,10 +383,18 @@ fsm_find_control_statement_thread_paths (tree name, ...@@ -379,10 +383,18 @@ fsm_find_control_statement_thread_paths (tree name,
continue; continue;
} }
/* We avoid creating irreducible loops unless we thread through /* We avoid creating irreducible inner loops unless we thread through
a multiway branch, in which case we have deemed it worth losing other a multiway branch, in which case we have deemed it worth losing other
loop optimizations later. */ loop optimizations later.
if (!threaded_multiway_branch && creates_irreducible_loop)
We also consider it worth creating an irreducible inner loop if
the number of copied statement is low relative to the length of
the path -- in that case there's little the traditional loop optimizer
would have done anyway, so an irreducible loop is not so bad. */
if (!threaded_multiway_branch && creates_irreducible_loop
&& (n_insns * PARAM_VALUE (PARAM_FSM_SCALE_PATH_STMTS)
> path_length * PARAM_VALUE (PARAM_FSM_SCALE_PATH_BLOCKS)))
{ {
if (dump_file && (dump_flags & TDF_DETAILS)) if (dump_file && (dump_flags & TDF_DETAILS))
fprintf (dump_file, fprintf (dump_file,
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment