Commit 5806e062 by Jeff Law

re PR tree-optimization/81165 (Regression in GCC-8.0.0's optimizer)

	PR tree-optimization/81165
	* tree-ssa-threadupdate.c (uses_in_bb): New.
	(estimate_threading_killed_stmts): New.
	* tree-ssa-threadupdate.h (estimate_threading_killed_stmts): Prototype.
	* tree-ssa-threadedge.c
	(record_temporary_equivalences_from_stmts_at_dest): Expand limit
	when its hit.

	PR tree-optimization/81165
	* gcc.dg/pr81165.c: New.

From-SVN: r255724
parent 38f4bea8
2017-12-12 Jeff Law <law@redhat.com>
2017-12-15 Alexandre Oliva <aoliva@redhat.com>
PR tree-optimization/81165
* tree-ssa-threadupdate.c (uses_in_bb): New.
(estimate_threading_killed_stmts): New.
* tree-ssa-threadupdate.h (estimate_threading_killed_stmts): Prototype.
* tree-ssa-threadedge.c
(record_temporary_equivalences_from_stmts_at_dest): Expand limit
when its hit.
2017-12-15 Jeff Law <law@redhat.com>
PR tree-optimization/83410
* tree-ssa-threadupdate.c (thread_block_1): Avoid certain jump
2017-12-15 Alexandre Oliva <aoliva@redhat.com>
PR tree-optimization/81165
* gcc.dg/pr81165.c: New.
2017-12-15 Jakub Jelinek <jakub@redhat.com>
PR c++/83205
......
/* { dg-do compile } */
/* { dg-options "-O3 -fdump-tree-optimized" } */
/* { dg-final { scan-tree-dump-not " \[/%\] " "optimized" } } */
/* Testcase submitted for PR81165, with its main function removed as
it's turned into a compile test. We want to make sure that all of
the divide/remainder computations are removed by tree optimizers.
We can figure out that we don't need to compute at runtime even the
condition to enter the loop: the initial i==0 would have to be
greater than the sum of two small unsigned values: 1U>>t1 is in the
range 0..1, whereas the char value is bounded by the range 0..127,
being 128 % a positive number (zero would invoke undefined
behavior, so we can assume it doesn't happen). (We know it's
nonnegative because it's 10 times a number that has no more than
the bits for 16, 8 and 1 set.)
We don't realize that the loop is useless right away: jump
threading helps remove some of the complexity, particularly of the
computation within the loop: t1 is compared with 1, but it can
never be 1. (We could assume as much, since its being 1 would
divide by zero, but we don't.)
If we don't enter the conditional block, t1 remains at 2; if we do,
it's set to either -1. If we jump thread at the end of the
conditional block, we can figure out the ranges exclude 1 and the
jump body is completely optimized out. However, we used to fail to
consider the block for jump threading due to the amount of
computation in it, without realizing most of it would die in
consequence of the threading.
We now take the dying code into account when deciding whether or
not to try jump threading. That might enable us to optimize the
function into { if (x2 != 0 || (x1 & 1) == 0) abort (); }. At the
time of this writing, with the patch, we get close, but the test on
x2 only gets as far as ((1 >> x2) == 0). Without the patch, some
of the loop remains. */
short x0 = 15;
void func (){
volatile int x1 = 1U;
volatile char x2 = 0;
char t0 = 0;
unsigned long t1 = 2LU;
int i = 0;
if(1>>x2) {
t0 = -1;
t1 = (1&(short)(x1^8U))-1;
}
while(i > (int)((1U>>t1)+(char)(128%(10*(25LU&(29%x0)))))) {
i += (int)(12L/(1!=(int)t1));
}
if (t0 != -1) __builtin_abort();
if (t1 != 0L) __builtin_abort();
}
......@@ -244,7 +244,22 @@ record_temporary_equivalences_from_stmts_at_dest (edge e,
expansion, then do not thread through this block. */
stmt_count++;
if (stmt_count > max_stmt_count)
return NULL;
{
/* If any of the stmts in the PATH's dests are going to be
killed due to threading, grow the max count
accordingly. */
if (max_stmt_count
== PARAM_VALUE (PARAM_MAX_JUMP_THREAD_DUPLICATION_STMTS))
{
max_stmt_count += estimate_threading_killed_stmts (e->dest);
if (dump_file)
fprintf (dump_file, "threading bb %i up to %i stmts\n",
e->dest->index, max_stmt_count);
}
/* If we're still past the limit, we're done. */
if (stmt_count > max_stmt_count)
return NULL;
}
/* These are temporary ranges, do nto reflect them back into
the global range data. */
......
......@@ -2431,3 +2431,139 @@ register_jump_thread (vec<jump_thread_edge *> *path)
paths.safe_push (path);
}
/* Return how many uses of T there are within BB, as long as there
aren't any uses outside BB. If there are any uses outside BB,
return -1 if there's at most one use within BB, or -2 if there is
more than one use within BB. */
static int
uses_in_bb (tree t, basic_block bb)
{
int uses = 0;
bool outside_bb = false;
imm_use_iterator iter;
use_operand_p use_p;
FOR_EACH_IMM_USE_FAST (use_p, iter, t)
{
if (is_gimple_debug (USE_STMT (use_p)))
continue;
if (gimple_bb (USE_STMT (use_p)) != bb)
outside_bb = true;
else
uses++;
if (outside_bb && uses > 1)
return -2;
}
if (outside_bb)
return -1;
return uses;
}
/* Starting from the final control flow stmt in BB, assuming it will
be removed, follow uses in to-be-removed stmts back to their defs
and count how many defs are to become dead and be removed as
well. */
unsigned int
estimate_threading_killed_stmts (basic_block bb)
{
int killed_stmts = 0;
hash_map<tree, int> ssa_remaining_uses;
auto_vec<gimple *, 4> dead_worklist;
/* If the block has only two predecessors, threading will turn phi
dsts into either src, so count them as dead stmts. */
bool drop_all_phis = EDGE_COUNT (bb->preds) == 2;
if (drop_all_phis)
for (gphi_iterator gsi = gsi_start_phis (bb);
!gsi_end_p (gsi); gsi_next (&gsi))
{
gphi *phi = gsi.phi ();
tree dst = gimple_phi_result (phi);
/* We don't count virtual PHIs as stmts in
record_temporary_equivalences_from_phis. */
if (virtual_operand_p (dst))
continue;
killed_stmts++;
}
if (gsi_end_p (gsi_last_bb (bb)))
return killed_stmts;
gimple *stmt = gsi_stmt (gsi_last_bb (bb));
if (gimple_code (stmt) != GIMPLE_COND
&& gimple_code (stmt) != GIMPLE_GOTO
&& gimple_code (stmt) != GIMPLE_SWITCH)
return killed_stmts;
/* The control statement is always dead. */
killed_stmts++;
dead_worklist.quick_push (stmt);
while (!dead_worklist.is_empty ())
{
stmt = dead_worklist.pop ();
ssa_op_iter iter;
use_operand_p use_p;
FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_USE)
{
tree t = USE_FROM_PTR (use_p);
gimple *def = SSA_NAME_DEF_STMT (t);
if (gimple_bb (def) == bb
&& (gimple_code (def) != GIMPLE_PHI
|| !drop_all_phis)
&& !gimple_has_side_effects (def))
{
int *usesp = ssa_remaining_uses.get (t);
int uses;
if (usesp)
uses = *usesp;
else
uses = uses_in_bb (t, bb);
gcc_assert (uses);
/* Don't bother recording the expected use count if we
won't find any further uses within BB. */
if (!usesp && (uses < -1 || uses > 1))
{
usesp = &ssa_remaining_uses.get_or_insert (t);
*usesp = uses;
}
if (uses < 0)
continue;
--uses;
if (usesp)
*usesp = uses;
if (!uses)
{
killed_stmts++;
if (usesp)
ssa_remaining_uses.remove (t);
if (gimple_code (def) != GIMPLE_PHI)
dead_worklist.safe_push (def);
}
}
}
}
if (dump_file)
fprintf (dump_file, "threading bb %i kills %i stmts\n",
bb->index, killed_stmts);
return killed_stmts;
}
......@@ -47,6 +47,7 @@ extern void remove_jump_threads_including (edge);
extern void delete_jump_thread_path (vec <class jump_thread_edge *> *);
extern void remove_ctrl_stmt_and_useless_edges (basic_block, basic_block);
extern void free_dom_edge_info (edge);
extern unsigned int estimate_threading_killed_stmts (basic_block);
enum bb_dom_status
{
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment