Commit d75dbccd by Daniel Berlin Committed by Daniel Berlin

re PR tree-optimization/27755 (PRE confused by control flow)

2006-11-14  Daniel Berlin  <dberlin@dberlin.org>

	Fix PR tree-optimization/27755

	* tree-ssa-pre.c: Update comments.
	(bb_bitmap_sets): Add pa_in and  deferred member.
	(BB_DEFERRED): New macro.
	(maximal_set): New variable.
	(pre_stats): Add pa_insert member.
	(bitmap_set_and): Short circuit orig == dest.
	(bitmap_set_subtract_values): New function.
	(bitmap_set_contains_expr): Ditto.
	(translate_vuses_through_block): Add phiblock argument.
	(dependent_clean): New function.
	(compute_antic_aux): Update for maximal_set changes.
	(compute_partial_antic_aux): New function.
	(compute_antic): Handle partial anticipation.
	(do_partial_partial_insertion): New function.
	(insert_aux): Handle partial anticipation.
	(add_to_sets): Add to maximal set.
	(compute_avail): Ditto.
	(init_pre): Initialize maximal_set.
	(execute_pre): Do partial anticipation if -O3+.

From-SVN: r118821
parent 17339e88
2006-11-14 Daniel Berlin <dberlin@dberlin.org>
Fix PR tree-optimization/27755
* tree-ssa-pre.c: Update comments.
(bb_bitmap_sets): Add pa_in and deferred member.
(BB_DEFERRED): New macro.
(maximal_set): New variable.
(pre_stats): Add pa_insert member.
(bitmap_set_and): Short circuit orig == dest.
(bitmap_set_subtract_values): New function.
(bitmap_set_contains_expr): Ditto.
(translate_vuses_through_block): Add phiblock argument.
(dependent_clean): New function.
(compute_antic_aux): Update for maximal_set changes.
(compute_partial_antic_aux): New function.
(compute_antic): Handle partial anticipation.
(do_partial_partial_insertion): New function.
(insert_aux): Handle partial anticipation.
(add_to_sets): Add to maximal set.
(compute_avail): Ditto.
(init_pre): Initialize maximal_set.
(execute_pre): Do partial anticipation if -O3+.
2006-11-14 Paolo Bonzini <bonzini@gnu.org> 2006-11-14 Paolo Bonzini <bonzini@gnu.org>
PR rtl-optimization/29798 PR rtl-optimization/29798
......
/* { dg-do compile } */
/* { dg-options "-O2 -fdump-tree-pre-stats -std=c99" } */
int foo(int k, int *x)
{
int j=0;
int res = 0;
/* We should pull res = *x all the way out of the do-while */
do {
for (int n=0;n<3;++n);
res = *x;
} while (++j<k);
return res;
}
/* { dg-final { scan-tree-dump-times "Eliminated: 1" 1 "pre"} } */
/* { dg-final { cleanup-tree-dump "pre" } } */
...@@ -80,7 +80,7 @@ Boston, MA 02110-1301, USA. */ ...@@ -80,7 +80,7 @@ Boston, MA 02110-1301, USA. */
Next, we generate the ANTIC sets. These sets represent the Next, we generate the ANTIC sets. These sets represent the
anticipatable expressions. ANTIC is a backwards dataflow anticipatable expressions. ANTIC is a backwards dataflow
problem.An expression is anticipatable in a given block if it could problem. An expression is anticipatable in a given block if it could
be generated in that block. This means that if we had to perform be generated in that block. This means that if we had to perform
an insertion in that block, of the value of that expression, we an insertion in that block, of the value of that expression, we
could. Calculating the ANTIC sets requires phi translation of could. Calculating the ANTIC sets requires phi translation of
...@@ -104,7 +104,13 @@ Boston, MA 02110-1301, USA. */ ...@@ -104,7 +104,13 @@ Boston, MA 02110-1301, USA. */
In order to make it fully redundant, we insert the expression into In order to make it fully redundant, we insert the expression into
the predecessors where it is not available, but is ANTIC. the predecessors where it is not available, but is ANTIC.
insert/insert_aux performs this insertion.
For the partial anticipation case, we only perform insertion if it
is partially anticipated in some block, and fully available in all
of the predecessors.
insert/insert_aux/do_regular_insertion/do_partial_partial_insertion
performs these steps.
Fourth, we eliminate fully redundant expressions. Fourth, we eliminate fully redundant expressions.
This is a simple statement walk that replaces redundant This is a simple statement walk that replaces redundant
...@@ -289,6 +295,10 @@ typedef struct bb_bitmap_sets ...@@ -289,6 +295,10 @@ typedef struct bb_bitmap_sets
in a given basic block. */ in a given basic block. */
bitmap_set_t antic_in; bitmap_set_t antic_in;
/* The PA_IN set, which represents which values are
partially anticipatable in a given basic block. */
bitmap_set_t pa_in;
/* The NEW_SETS set, which is used during insertion to augment the /* The NEW_SETS set, which is used during insertion to augment the
AVAIL_OUT set of blocks with the new insertions performed during AVAIL_OUT set of blocks with the new insertions performed during
the current iteration. */ the current iteration. */
...@@ -306,22 +316,32 @@ typedef struct bb_bitmap_sets ...@@ -306,22 +316,32 @@ typedef struct bb_bitmap_sets
the block, regardless of RVUSE_KILL. */ the block, regardless of RVUSE_KILL. */
bitmap_set_t antic_safe_loads; bitmap_set_t antic_safe_loads;
/* True if we have visited this block during antic calculation. */ /* True if we have visited this block during ANTIC calculation. */
unsigned int visited:1; unsigned int visited:1;
} *bb_bitmap_sets_t;
/* True we have deferred processing this block during ANTIC
#define EXP_GEN(BB) ((bb_bitmap_sets_t) ((BB)->aux))->exp_gen calculation until its successor is processed. */
#define PHI_GEN(BB) ((bb_bitmap_sets_t) ((BB)->aux))->phi_gen unsigned int deferred : 1;
#define TMP_GEN(BB) ((bb_bitmap_sets_t) ((BB)->aux))->tmp_gen } *bb_value_sets_t;
#define AVAIL_OUT(BB) ((bb_bitmap_sets_t) ((BB)->aux))->avail_out
#define ANTIC_IN(BB) ((bb_bitmap_sets_t) ((BB)->aux))->antic_in #define EXP_GEN(BB) ((bb_value_sets_t) ((BB)->aux))->exp_gen
#define RVUSE_IN(BB) ((bb_bitmap_sets_t) ((BB)->aux))->rvuse_in #define PHI_GEN(BB) ((bb_value_sets_t) ((BB)->aux))->phi_gen
#define RVUSE_GEN(BB) ((bb_bitmap_sets_t) ((BB)->aux))->rvuse_gen #define TMP_GEN(BB) ((bb_value_sets_t) ((BB)->aux))->tmp_gen
#define RVUSE_KILL(BB) ((bb_bitmap_sets_t) ((BB)->aux))->rvuse_kill #define AVAIL_OUT(BB) ((bb_value_sets_t) ((BB)->aux))->avail_out
#define RVUSE_OUT(BB) ((bb_bitmap_sets_t) ((BB)->aux))->rvuse_out #define ANTIC_IN(BB) ((bb_value_sets_t) ((BB)->aux))->antic_in
#define NEW_SETS(BB) ((bb_bitmap_sets_t) ((BB)->aux))->new_sets #define PA_IN(BB) ((bb_value_sets_t) ((BB)->aux))->pa_in
#define ANTIC_SAFE_LOADS(BB) ((bb_bitmap_sets_t) ((BB)->aux))->antic_safe_loads #define RVUSE_IN(BB) ((bb_value_sets_t) ((BB)->aux))->rvuse_in
#define BB_VISITED(BB) ((bb_bitmap_sets_t) ((BB)->aux))->visited #define RVUSE_GEN(BB) ((bb_value_sets_t) ((BB)->aux))->rvuse_gen
#define RVUSE_KILL(BB) ((bb_value_sets_t) ((BB)->aux))->rvuse_kill
#define RVUSE_OUT(BB) ((bb_value_sets_t) ((BB)->aux))->rvuse_out
#define NEW_SETS(BB) ((bb_value_sets_t) ((BB)->aux))->new_sets
#define ANTIC_SAFE_LOADS(BB) ((bb_value_sets_t) ((BB)->aux))->antic_safe_loads
#define BB_VISITED(BB) ((bb_value_sets_t) ((BB)->aux))->visited
#define BB_DEFERRED(BB) ((bb_value_sets_t) ((BB)->aux))->deferred
/* Maximal set of values, used to initialize the ANTIC problem, which
is an intersection problem. */
static bitmap_set_t maximal_set;
/* Basic block list in postorder. */ /* Basic block list in postorder. */
static int *postorder; static int *postorder;
...@@ -336,6 +356,9 @@ static struct ...@@ -336,6 +356,9 @@ static struct
/* The number of new expressions/temporaries generated by PRE. */ /* The number of new expressions/temporaries generated by PRE. */
int insertions; int insertions;
/* The number of inserts found due to partial anticipation */
int pa_insert;
/* The number of new PHI nodes added by PRE. */ /* The number of new PHI nodes added by PRE. */
int phis; int phis;
...@@ -344,6 +367,7 @@ static struct ...@@ -344,6 +367,7 @@ static struct
} pre_stats; } pre_stats;
static bool do_partial_partial;
static tree bitmap_find_leader (bitmap_set_t, tree); static tree bitmap_find_leader (bitmap_set_t, tree);
static void bitmap_value_insert_into_set (bitmap_set_t, tree); static void bitmap_value_insert_into_set (bitmap_set_t, tree);
static void bitmap_value_replace_in_set (bitmap_set_t, tree); static void bitmap_value_replace_in_set (bitmap_set_t, tree);
...@@ -632,6 +656,9 @@ bitmap_set_and (bitmap_set_t dest, bitmap_set_t orig) ...@@ -632,6 +656,9 @@ bitmap_set_and (bitmap_set_t dest, bitmap_set_t orig)
{ {
bitmap_iterator bi; bitmap_iterator bi;
unsigned int i; unsigned int i;
if (dest != orig)
{
bitmap temp = BITMAP_ALLOC (&grand_bitmap_obstack); bitmap temp = BITMAP_ALLOC (&grand_bitmap_obstack);
bitmap_and_into (dest->values, orig->values); bitmap_and_into (dest->values, orig->values);
...@@ -645,6 +672,7 @@ bitmap_set_and (bitmap_set_t dest, bitmap_set_t orig) ...@@ -645,6 +672,7 @@ bitmap_set_and (bitmap_set_t dest, bitmap_set_t orig)
bitmap_clear_bit (dest->expressions, i); bitmap_clear_bit (dest->expressions, i);
} }
BITMAP_FREE (temp); BITMAP_FREE (temp);
}
} }
/* Subtract all values and expressions contained in ORIG from DEST. */ /* Subtract all values and expressions contained in ORIG from DEST. */
...@@ -669,6 +697,26 @@ bitmap_set_subtract (bitmap_set_t dest, bitmap_set_t orig) ...@@ -669,6 +697,26 @@ bitmap_set_subtract (bitmap_set_t dest, bitmap_set_t orig)
return result; return result;
} }
/* Subtract all the values in bitmap set B from bitmap set A. */
static void
bitmap_set_subtract_values (bitmap_set_t a, bitmap_set_t b)
{
unsigned int i;
bitmap_iterator bi;
bitmap temp = BITMAP_ALLOC (&grand_bitmap_obstack);
bitmap_copy (temp, a->expressions);
EXECUTE_IF_SET_IN_BITMAP (temp, 0, i, bi)
{
tree expr = expression_for_id (i);
if (bitmap_set_contains_value (b, get_value_handle (expr)))
bitmap_remove_from_set (a, expr);
}
BITMAP_FREE (temp);
}
/* Return true if bitmapped set SET contains the value VAL. */ /* Return true if bitmapped set SET contains the value VAL. */
static bool static bool
...@@ -683,6 +731,12 @@ bitmap_set_contains_value (bitmap_set_t set, tree val) ...@@ -683,6 +731,12 @@ bitmap_set_contains_value (bitmap_set_t set, tree val)
return bitmap_bit_p (set->values, VALUE_HANDLE_ID (val)); return bitmap_bit_p (set->values, VALUE_HANDLE_ID (val));
} }
static inline bool
bitmap_set_contains_expr (bitmap_set_t set, tree expr)
{
return bitmap_bit_p (set->expressions, get_expression_id (expr));
}
/* Replace an instance of value LOOKFOR with expression EXPR in SET. */ /* Replace an instance of value LOOKFOR with expression EXPR in SET. */
static void static void
...@@ -855,11 +909,14 @@ pool_copy_list (tree list) ...@@ -855,11 +909,14 @@ pool_copy_list (tree list)
return head; return head;
} }
/* Translate the vuses in the VUSES vector backwards through phi /* Translate the vuses in the VUSES vector backwards through phi nodes
nodes, so that they have the value they would have in BLOCK. */ in PHIBLOCK, so that they have the value they would have in
BLOCK. */
static VEC(tree, gc) * static VEC(tree, gc) *
translate_vuses_through_block (VEC (tree, gc) *vuses, basic_block block) translate_vuses_through_block (VEC (tree, gc) *vuses,
basic_block phiblock,
basic_block block)
{ {
tree oldvuse; tree oldvuse;
VEC(tree, gc) *result = NULL; VEC(tree, gc) *result = NULL;
...@@ -868,7 +925,8 @@ translate_vuses_through_block (VEC (tree, gc) *vuses, basic_block block) ...@@ -868,7 +925,8 @@ translate_vuses_through_block (VEC (tree, gc) *vuses, basic_block block)
for (i = 0; VEC_iterate (tree, vuses, i, oldvuse); i++) for (i = 0; VEC_iterate (tree, vuses, i, oldvuse); i++)
{ {
tree phi = SSA_NAME_DEF_STMT (oldvuse); tree phi = SSA_NAME_DEF_STMT (oldvuse);
if (TREE_CODE (phi) == PHI_NODE) if (TREE_CODE (phi) == PHI_NODE
&& bb_for_stmt (phi) == phiblock)
{ {
edge e = find_edge (block, bb_for_stmt (phi)); edge e = find_edge (block, bb_for_stmt (phi));
if (e) if (e)
...@@ -1047,7 +1105,7 @@ phi_translate (tree expr, bitmap_set_t set1, bitmap_set_t set2, ...@@ -1047,7 +1105,7 @@ phi_translate (tree expr, bitmap_set_t set1, bitmap_set_t set2,
if (listchanged) if (listchanged)
vn_lookup_or_add (newarglist, NULL); vn_lookup_or_add (newarglist, NULL);
tvuses = translate_vuses_through_block (vuses, pred); tvuses = translate_vuses_through_block (vuses, phiblock, pred);
if (listchanged || (newop0 != oldop0) || (oldop2 != newop2) if (listchanged || (newop0 != oldop0) || (oldop2 != newop2)
|| vuses != tvuses) || vuses != tvuses)
...@@ -1073,7 +1131,8 @@ phi_translate (tree expr, bitmap_set_t set1, bitmap_set_t set2, ...@@ -1073,7 +1131,8 @@ phi_translate (tree expr, bitmap_set_t set1, bitmap_set_t set2,
oldvuses = VALUE_HANDLE_VUSES (get_value_handle (expr)); oldvuses = VALUE_HANDLE_VUSES (get_value_handle (expr));
if (oldvuses) if (oldvuses)
newvuses = translate_vuses_through_block (oldvuses, pred); newvuses = translate_vuses_through_block (oldvuses, phiblock,
pred);
if (oldvuses != newvuses) if (oldvuses != newvuses)
vn_lookup_or_add_with_vuses (expr, newvuses); vn_lookup_or_add_with_vuses (expr, newvuses);
...@@ -1137,7 +1196,8 @@ phi_translate (tree expr, bitmap_set_t set1, bitmap_set_t set2, ...@@ -1137,7 +1196,8 @@ phi_translate (tree expr, bitmap_set_t set1, bitmap_set_t set2,
oldvuses = VALUE_HANDLE_VUSES (get_value_handle (expr)); oldvuses = VALUE_HANDLE_VUSES (get_value_handle (expr));
if (oldvuses) if (oldvuses)
newvuses = translate_vuses_through_block (oldvuses, pred); newvuses = translate_vuses_through_block (oldvuses, phiblock,
pred);
if (newop0 != oldop0 || newvuses != oldvuses if (newop0 != oldop0 || newvuses != oldvuses
|| newop1 != oldop1 || newop1 != oldop1
...@@ -1258,9 +1318,13 @@ phi_translate (tree expr, bitmap_set_t set1, bitmap_set_t set2, ...@@ -1258,9 +1318,13 @@ phi_translate (tree expr, bitmap_set_t set1, bitmap_set_t set2,
{ {
tree phi = NULL; tree phi = NULL;
edge e; edge e;
tree def_stmt;
gcc_assert (TREE_CODE (expr) == SSA_NAME); gcc_assert (TREE_CODE (expr) == SSA_NAME);
if (TREE_CODE (SSA_NAME_DEF_STMT (expr)) == PHI_NODE)
phi = SSA_NAME_DEF_STMT (expr); def_stmt = SSA_NAME_DEF_STMT (expr);
if (TREE_CODE (def_stmt) == PHI_NODE
&& bb_for_stmt (def_stmt) == phiblock)
phi = def_stmt;
else else
return expr; return expr;
...@@ -1498,7 +1562,10 @@ valid_in_sets (bitmap_set_t set1, bitmap_set_t set2, tree expr, ...@@ -1498,7 +1562,10 @@ valid_in_sets (bitmap_set_t set1, bitmap_set_t set2, tree expr,
return false; return false;
case tcc_exceptional: case tcc_exceptional:
return true; {
gcc_assert (TREE_CODE (expr) == SSA_NAME);
return bitmap_set_contains_expr (AVAIL_OUT (block), expr);
}
case tcc_declaration: case tcc_declaration:
return !vuses_dies_in_block_x (VALUE_HANDLE_VUSES (vh), block); return !vuses_dies_in_block_x (VALUE_HANDLE_VUSES (vh), block);
...@@ -1509,6 +1576,27 @@ valid_in_sets (bitmap_set_t set1, bitmap_set_t set2, tree expr, ...@@ -1509,6 +1576,27 @@ valid_in_sets (bitmap_set_t set1, bitmap_set_t set2, tree expr,
} }
} }
/* Clean the set of expressions that are no longer valid in SET1 or
SET2. This means expressions that are made up of values we have no
leaders for in SET1 or SET2. This version is used for partial
anticipation, which means it is not valid in either ANTIC_IN or
PA_IN. */
static void
dependent_clean (bitmap_set_t set1, bitmap_set_t set2, basic_block block)
{
VEC (tree, heap) *exprs = sorted_array_from_bitmap_set (set1);
tree expr;
int i;
for (i = 0; VEC_iterate (tree, exprs, i, expr); i++)
{
if (!valid_in_sets (set1, set2, expr, block))
bitmap_remove_from_set (set1, expr);
}
VEC_free (tree, heap, exprs);
}
/* Clean the set of expressions that are no longer valid in SET. This /* Clean the set of expressions that are no longer valid in SET. This
means expressions that are made up of values we have no leaders for means expressions that are made up of values we have no leaders for
in SET. */ in SET. */
...@@ -1556,6 +1644,7 @@ compute_antic_aux (basic_block block, bool block_has_abnormal_pred_edge) ...@@ -1556,6 +1644,7 @@ compute_antic_aux (basic_block block, bool block_has_abnormal_pred_edge)
edge_iterator ei; edge_iterator ei;
old = ANTIC_OUT = S = NULL; old = ANTIC_OUT = S = NULL;
BB_VISITED (block) = 1;
/* If any edges from predecessors are abnormal, antic_in is empty, /* If any edges from predecessors are abnormal, antic_in is empty,
so do nothing. */ so do nothing. */
...@@ -1564,7 +1653,6 @@ compute_antic_aux (basic_block block, bool block_has_abnormal_pred_edge) ...@@ -1564,7 +1653,6 @@ compute_antic_aux (basic_block block, bool block_has_abnormal_pred_edge)
old = ANTIC_IN (block); old = ANTIC_IN (block);
ANTIC_OUT = bitmap_set_new (); ANTIC_OUT = bitmap_set_new ();
BB_VISITED (block) = 1;
/* If the block has no successors, ANTIC_OUT is empty. */ /* If the block has no successors, ANTIC_OUT is empty. */
if (EDGE_COUNT (block->succs) == 0) if (EDGE_COUNT (block->succs) == 0)
...@@ -1574,9 +1662,38 @@ compute_antic_aux (basic_block block, bool block_has_abnormal_pred_edge) ...@@ -1574,9 +1662,38 @@ compute_antic_aux (basic_block block, bool block_has_abnormal_pred_edge)
else if (single_succ_p (block)) else if (single_succ_p (block))
{ {
basic_block succ_bb = single_succ (block); basic_block succ_bb = single_succ (block);
/* We trade iterations of the dataflow equations for having to
phi translate the maximal set, which is incredibly slow
(since the maximal set often has 300+ members, even when you
have a small number of blocks).
Basically, we defer the computation of ANTIC for this block
until we have processed it's successor, which will inveitably
have a *much* smaller set of values to phi translate once
clean has been run on it.
The cost of doing this is that we technically perform more
iterations, however, they are lower cost iterations.
Timings for PRE on tramp3d-v4:
without maximal set fix: 11 seconds
with maximal set fix/without deferring: 26 seconds
with maximal set fix/with deferring: 11 seconds
*/
if (!BB_VISITED (succ_bb))
{
changed = true;
SET_BIT (changed_blocks, block->index);
BB_VISITED (block) = 0;
BB_DEFERRED (block) = 1;
goto maybe_dump_sets;
}
else
phi_translate_set (ANTIC_OUT, ANTIC_IN (succ_bb), phi_translate_set (ANTIC_OUT, ANTIC_IN (succ_bb),
block, succ_bb); block, succ_bb);
} }
/* If we have multiple successors, we take the intersection of all of /* If we have multiple successors, we take the intersection of all of
them. */ them. */
else else
...@@ -1584,36 +1701,31 @@ compute_antic_aux (basic_block block, bool block_has_abnormal_pred_edge) ...@@ -1584,36 +1701,31 @@ compute_antic_aux (basic_block block, bool block_has_abnormal_pred_edge)
VEC(basic_block, heap) * worklist; VEC(basic_block, heap) * worklist;
size_t i; size_t i;
basic_block bprime, first; basic_block bprime, first;
bool any_visited = false;
worklist = VEC_alloc (basic_block, heap, EDGE_COUNT (block->succs)); worklist = VEC_alloc (basic_block, heap, EDGE_COUNT (block->succs));
FOR_EACH_EDGE (e, ei, block->succs) FOR_EACH_EDGE (e, ei, block->succs)
{
any_visited |= BB_VISITED (e->dest);
VEC_quick_push (basic_block, worklist, e->dest); VEC_quick_push (basic_block, worklist, e->dest);
}
if (any_visited)
{
first = VEC_index (basic_block, worklist, 0); first = VEC_index (basic_block, worklist, 0);
if (!BB_VISITED (first))
bitmap_set_copy (ANTIC_OUT, maximal_set);
else
bitmap_set_copy (ANTIC_OUT, ANTIC_IN (first)); bitmap_set_copy (ANTIC_OUT, ANTIC_IN (first));
for (i = 1; VEC_iterate (basic_block, worklist, i, bprime); i++) for (i = 1; VEC_iterate (basic_block, worklist, i, bprime); i++)
{ {
if (!BB_VISITED (bprime)) if (!BB_VISITED (bprime))
continue; bitmap_set_and (ANTIC_OUT, maximal_set);
else
bitmap_set_and (ANTIC_OUT, ANTIC_IN (bprime)); bitmap_set_and (ANTIC_OUT, ANTIC_IN (bprime));
} }
VEC_free (basic_block, heap, worklist); VEC_free (basic_block, heap, worklist);
} }
}
/* Generate ANTIC_OUT - TMP_GEN. */ /* Generate ANTIC_OUT - TMP_GEN. */
S = bitmap_set_subtract (ANTIC_OUT, TMP_GEN (block)); S = bitmap_set_subtract (ANTIC_OUT, TMP_GEN (block));
/* Start ANTIC_IN with EXP_GEN - TMP_GEN */ /* Start ANTIC_IN with EXP_GEN - TMP_GEN. */
ANTIC_IN (block) = bitmap_set_subtract (EXP_GEN (block), ANTIC_IN (block) = bitmap_set_subtract (EXP_GEN (block),
TMP_GEN (block)); TMP_GEN (block));
...@@ -1624,7 +1736,9 @@ compute_antic_aux (basic_block block, bool block_has_abnormal_pred_edge) ...@@ -1624,7 +1736,9 @@ compute_antic_aux (basic_block block, bool block_has_abnormal_pred_edge)
expression_for_id (bii)); expression_for_id (bii));
clean (ANTIC_IN (block), block); clean (ANTIC_IN (block), block);
if (!bitmap_set_equal (old, ANTIC_IN (block)))
/* !old->expressions can happen when we deferred a block. */
if (!old->expressions || !bitmap_set_equal (old, ANTIC_IN (block)))
{ {
changed = true; changed = true;
SET_BIT (changed_blocks, block->index); SET_BIT (changed_blocks, block->index);
...@@ -1637,17 +1751,27 @@ compute_antic_aux (basic_block block, bool block_has_abnormal_pred_edge) ...@@ -1637,17 +1751,27 @@ compute_antic_aux (basic_block block, bool block_has_abnormal_pred_edge)
maybe_dump_sets: maybe_dump_sets:
if (dump_file && (dump_flags & TDF_DETAILS)) if (dump_file && (dump_flags & TDF_DETAILS))
{ {
if (!BB_DEFERRED (block) || BB_VISITED (block))
{
if (ANTIC_OUT) if (ANTIC_OUT)
print_bitmap_set (dump_file, ANTIC_OUT, "ANTIC_OUT", block->index); print_bitmap_set (dump_file, ANTIC_OUT, "ANTIC_OUT", block->index);
if (ANTIC_SAFE_LOADS (block)) if (ANTIC_SAFE_LOADS (block))
print_bitmap_set (dump_file, ANTIC_SAFE_LOADS (block), print_bitmap_set (dump_file, ANTIC_SAFE_LOADS (block),
"ANTIC_SAFE_LOADS", block->index); "ANTIC_SAFE_LOADS", block->index);
print_bitmap_set (dump_file, ANTIC_IN (block), "ANTIC_IN", block->index); print_bitmap_set (dump_file, ANTIC_IN (block), "ANTIC_IN",
block->index);
if (S) if (S)
print_bitmap_set (dump_file, S, "S", block->index); print_bitmap_set (dump_file, S, "S", block->index);
} }
else
{
fprintf (dump_file,
"Block %d was deferred for a future iteration.\n",
block->index);
}
}
if (old) if (old)
bitmap_set_free (old); bitmap_set_free (old);
if (S) if (S)
...@@ -1657,6 +1781,126 @@ compute_antic_aux (basic_block block, bool block_has_abnormal_pred_edge) ...@@ -1657,6 +1781,126 @@ compute_antic_aux (basic_block block, bool block_has_abnormal_pred_edge)
return changed; return changed;
} }
/* Compute PARTIAL_ANTIC for BLOCK.
If succs(BLOCK) > 1 then
PA_OUT[BLOCK] = value wise union of PA_IN[b] + all ANTIC_IN not
in ANTIC_OUT for all succ(BLOCK)
else if succs(BLOCK) == 1 then
PA_OUT[BLOCK] = phi_translate (PA_IN[succ(BLOCK)])
PA_IN[BLOCK] = dependent_clean(PA_OUT[BLOCK] - TMP_GEN[BLOCK]
- ANTIC_IN[BLOCK])
*/
static bool
compute_partial_antic_aux (basic_block block,
bool block_has_abnormal_pred_edge)
{
bool changed = false;
bitmap_set_t old_PA_IN;
bitmap_set_t PA_OUT;
edge e;
edge_iterator ei;
old_PA_IN = PA_OUT = NULL;
/* If any edges from predecessors are abnormal, antic_in is empty,
so do nothing. */
if (block_has_abnormal_pred_edge)
goto maybe_dump_sets;
old_PA_IN = PA_IN (block);
PA_OUT = bitmap_set_new ();
/* If the block has no successors, ANTIC_OUT is empty. */
if (EDGE_COUNT (block->succs) == 0)
;
/* If we have one successor, we could have some phi nodes to
translate through. Note that we can't phi translate across DFS
back edges in partial antic, because it uses a union operation
on the successors. For recurrences like IV's, we will end up generating a
new value in the set on each go around (i + 3 (VH.1) VH.1 + 1
(VH.2), VH.2 + 1 (VH.3), etc), forever. */
else if (single_succ_p (block))
{
basic_block succ = single_succ (block);
if (!(single_succ_edge (block)->flags & EDGE_DFS_BACK))
phi_translate_set (PA_OUT, PA_IN (succ), block, succ);
}
/* If we have multiple successors, we take the union of all of
them. */
else
{
VEC(basic_block, heap) * worklist;
size_t i;
basic_block bprime;
worklist = VEC_alloc (basic_block, heap, EDGE_COUNT (block->succs));
FOR_EACH_EDGE (e, ei, block->succs)
{
if (e->flags & EDGE_DFS_BACK)
continue;
VEC_quick_push (basic_block, worklist, e->dest);
}
if (VEC_length (basic_block, worklist) > 0)
{
for (i = 0; VEC_iterate (basic_block, worklist, i, bprime); i++)
{
unsigned int i;
bitmap_iterator bi;
FOR_EACH_EXPR_ID_IN_SET (ANTIC_IN (bprime), i, bi)
bitmap_value_insert_into_set (PA_OUT,
expression_for_id (i));
FOR_EACH_EXPR_ID_IN_SET (PA_IN (bprime), i, bi)
bitmap_value_insert_into_set (PA_OUT,
expression_for_id (i));
}
}
VEC_free (basic_block, heap, worklist);
}
/* PA_IN starts with PA_OUT - TMP_GEN.
Then we subtract things from ANTIC_IN. */
PA_IN (block) = bitmap_set_subtract (PA_OUT, TMP_GEN (block));
/* For partial antic, we want to put back in the phi results, since
we will properly avoid making them partially antic over backedges. */
bitmap_ior_into (PA_IN (block)->values, PHI_GEN (block)->values);
bitmap_ior_into (PA_IN (block)->expressions, PHI_GEN (block)->expressions);
/* PA_IN[block] = PA_IN[block] - ANTIC_IN[block] */
bitmap_set_subtract_values (PA_IN (block), ANTIC_IN (block));
dependent_clean (PA_IN (block), ANTIC_IN (block), block);
if (!bitmap_set_equal (old_PA_IN, PA_IN (block)))
{
changed = true;
SET_BIT (changed_blocks, block->index);
FOR_EACH_EDGE (e, ei, block->preds)
SET_BIT (changed_blocks, e->src->index);
}
else
RESET_BIT (changed_blocks, block->index);
maybe_dump_sets:
if (dump_file && (dump_flags & TDF_DETAILS))
{
if (PA_OUT)
print_bitmap_set (dump_file, PA_OUT, "PA_OUT", block->index);
print_bitmap_set (dump_file, PA_IN (block), "PA_IN", block->index);
}
if (old_PA_IN)
bitmap_set_free (old_PA_IN);
if (PA_OUT)
bitmap_set_free (PA_OUT);
return changed;
}
/* Compute ANTIC and partial ANTIC sets. */ /* Compute ANTIC and partial ANTIC sets. */
static void static void
...@@ -1688,13 +1932,16 @@ compute_antic (void) ...@@ -1688,13 +1932,16 @@ compute_antic (void)
} }
BB_VISITED (block) = 0; BB_VISITED (block) = 0;
BB_DEFERRED (block) = 0;
/* While we are here, give empty ANTIC_IN sets to each block. */ /* While we are here, give empty ANTIC_IN sets to each block. */
ANTIC_IN (block) = bitmap_set_new (); ANTIC_IN (block) = bitmap_set_new ();
PA_IN (block) = bitmap_set_new ();
} }
/* At the exit block we anticipate nothing. */ /* At the exit block we anticipate nothing. */
ANTIC_IN (EXIT_BLOCK_PTR) = bitmap_set_new (); ANTIC_IN (EXIT_BLOCK_PTR) = bitmap_set_new ();
BB_VISITED (EXIT_BLOCK_PTR) = 1; BB_VISITED (EXIT_BLOCK_PTR) = 1;
PA_IN (EXIT_BLOCK_PTR) = bitmap_set_new ();
changed_blocks = sbitmap_alloc (last_basic_block + 1); changed_blocks = sbitmap_alloc (last_basic_block + 1);
sbitmap_ones (changed_blocks); sbitmap_ones (changed_blocks);
...@@ -1714,12 +1961,44 @@ compute_antic (void) ...@@ -1714,12 +1961,44 @@ compute_antic (void)
block->index)); block->index));
} }
} }
/* Theoretically possible, but *highly* unlikely. */
gcc_assert (num_iterations < 50);
} }
if (dump_file && (dump_flags & TDF_STATS)) if (dump_file && (dump_flags & TDF_STATS))
fprintf (dump_file, "compute_antic required %d iterations\n", fprintf (dump_file, "compute_antic required %d iterations\n",
num_iterations); num_iterations);
if (do_partial_partial)
{
sbitmap_ones (changed_blocks);
mark_dfs_back_edges ();
num_iterations = 0;
changed = true;
while (changed)
{
if (dump_file && (dump_flags & TDF_DETAILS))
fprintf (dump_file, "Starting iteration %d\n", num_iterations);
num_iterations++;
changed = false;
for (i = 0; i < last_basic_block - NUM_FIXED_BLOCKS; i++)
{
if (TEST_BIT (changed_blocks, postorder[i]))
{
basic_block block = BASIC_BLOCK (postorder[i]);
changed
|= compute_partial_antic_aux (block,
TEST_BIT (has_abnormal_preds,
block->index));
}
}
/* Theoretically possible, but *highly* unlikely. */
gcc_assert (num_iterations < 50);
}
if (dump_file && (dump_flags & TDF_STATS))
fprintf (dump_file, "compute_partial_antic required %d iterations\n",
num_iterations);
}
sbitmap_free (has_abnormal_preds); sbitmap_free (has_abnormal_preds);
sbitmap_free (changed_blocks); sbitmap_free (changed_blocks);
} }
...@@ -2558,7 +2837,7 @@ insert_into_preds_of_block (basic_block block, unsigned int exprnum, ...@@ -2558,7 +2837,7 @@ insert_into_preds_of_block (basic_block block, unsigned int exprnum,
3. Recursively call ourselves on the dominator children of BLOCK. 3. Recursively call ourselves on the dominator children of BLOCK.
Steps 1, 2a, and 3 are done by insert_aux. 2b, 2c and 2d are done by Steps 1, 2a, and 3 are done by insert_aux. 2b, 2c and 2d are done by
do_regular_insertion. do_regular_insertion and do_partial_insertion.
*/ */
...@@ -2689,8 +2968,107 @@ do_regular_insertion (basic_block block, basic_block dom) ...@@ -2689,8 +2968,107 @@ do_regular_insertion (basic_block block, basic_block dom)
} }
/* Perform insertion of partially redundant expressions for block /* Perform insertion for partially anticipatable expressions. There
BLOCK. */ is only one case we will perform insertion for these. This case is
if the expression is partially anticipatable, and fully available.
In this case, we know that putting it earlier will enable us to
remove the later computation. */
static bool
do_partial_partial_insertion (basic_block block, basic_block dom)
{
bool new_stuff = false;
VEC (tree, heap) *exprs = sorted_array_from_bitmap_set (PA_IN (block));
tree expr;
int i;
for (i = 0; VEC_iterate (tree, exprs, i, expr); i++)
{
if (can_PRE_operation (expr) && !AGGREGATE_TYPE_P (TREE_TYPE (expr)))
{
tree *avail;
tree val;
bool by_all = true;
bool cant_insert = false;
edge pred;
basic_block bprime;
tree eprime = NULL_TREE;
edge_iterator ei;
val = get_value_handle (expr);
if (bitmap_set_contains_value (PHI_GEN (block), val))
continue;
if (bitmap_set_contains_value (AVAIL_OUT (dom), val))
continue;
avail = XCNEWVEC (tree, last_basic_block);
FOR_EACH_EDGE (pred, ei, block->preds)
{
tree vprime;
tree edoubleprime;
/* This can happen in the very weird case
that our fake infinite loop edges have caused a
critical edge to appear. */
if (EDGE_CRITICAL_P (pred))
{
cant_insert = true;
break;
}
bprime = pred->src;
eprime = phi_translate (expr, ANTIC_IN (block),
PA_IN (block),
bprime, block);
/* eprime will generally only be NULL if the
value of the expression, translated
through the PHI for this predecessor, is
undefined. If that is the case, we can't
make the expression fully redundant,
because its value is undefined along a
predecessor path. We can thus break out
early because it doesn't matter what the
rest of the results are. */
if (eprime == NULL)
{
cant_insert = true;
break;
}
eprime = fully_constant_expression (eprime);
vprime = get_value_handle (eprime);
gcc_assert (vprime);
edoubleprime = bitmap_find_leader (AVAIL_OUT (bprime),
vprime);
if (edoubleprime == NULL)
{
by_all = false;
break;
}
else
avail[bprime->index] = edoubleprime;
}
/* If we can insert it, it's not the same value
already existing along every predecessor, and
it's defined by some predecessor, it is
partially redundant. */
if (!cant_insert && by_all)
{
pre_stats.pa_insert++;
if (insert_into_preds_of_block (block, get_expression_id (expr),
avail))
new_stuff = true;
}
free (avail);
}
}
VEC_free (tree, heap, exprs);
return new_stuff;
}
static bool static bool
insert_aux (basic_block block) insert_aux (basic_block block)
...@@ -2723,6 +3101,8 @@ insert_aux (basic_block block) ...@@ -2723,6 +3101,8 @@ insert_aux (basic_block block)
if (!single_pred_p (block)) if (!single_pred_p (block))
{ {
new_stuff |= do_regular_insertion (block, dom); new_stuff |= do_regular_insertion (block, dom);
if (do_partial_partial)
new_stuff |= do_partial_partial_insertion (block, dom);
} }
} }
} }
...@@ -2797,6 +3177,11 @@ add_to_sets (tree var, tree expr, tree stmt, bitmap_set_t s1, ...@@ -2797,6 +3177,11 @@ add_to_sets (tree var, tree expr, tree stmt, bitmap_set_t s1,
if (s1) if (s1)
bitmap_insert_into_set (s1, var); bitmap_insert_into_set (s1, var);
/* PHI nodes can't go in the maximal sets because they are not in
TMP_GEN, so it is possible to get into non-monotonic situations
during ANTIC calculation, because it will *add* bits. */
if (!in_fre && TREE_CODE (SSA_NAME_DEF_STMT (var)) != PHI_NODE)
bitmap_value_insert_into_set (maximal_set, var);
bitmap_value_insert_into_set (s2, var); bitmap_value_insert_into_set (s2, var);
} }
...@@ -3281,6 +3666,8 @@ compute_avail (void) ...@@ -3281,6 +3666,8 @@ compute_avail (void)
vn_lookup_or_add (def, NULL); vn_lookup_or_add (def, NULL);
bitmap_insert_into_set (TMP_GEN (ENTRY_BLOCK_PTR), def); bitmap_insert_into_set (TMP_GEN (ENTRY_BLOCK_PTR), def);
if (!in_fre)
bitmap_value_insert_into_set (maximal_set, def);
bitmap_value_insert_into_set (AVAIL_OUT (ENTRY_BLOCK_PTR), def); bitmap_value_insert_into_set (AVAIL_OUT (ENTRY_BLOCK_PTR), def);
} }
} }
...@@ -3295,6 +3682,8 @@ compute_avail (void) ...@@ -3295,6 +3682,8 @@ compute_avail (void)
vn_lookup_or_add (def, NULL); vn_lookup_or_add (def, NULL);
bitmap_insert_into_set (TMP_GEN (ENTRY_BLOCK_PTR), def); bitmap_insert_into_set (TMP_GEN (ENTRY_BLOCK_PTR), def);
if (!in_fre)
bitmap_value_insert_into_set (maximal_set, def);
bitmap_value_insert_into_set (AVAIL_OUT (ENTRY_BLOCK_PTR), def); bitmap_value_insert_into_set (AVAIL_OUT (ENTRY_BLOCK_PTR), def);
} }
} }
...@@ -3410,6 +3799,8 @@ compute_avail (void) ...@@ -3410,6 +3799,8 @@ compute_avail (void)
{ {
tree val = vn_lookup_or_add (newt, stmt); tree val = vn_lookup_or_add (newt, stmt);
vn_add (lhs, val); vn_add (lhs, val);
if (!in_fre)
bitmap_value_insert_into_set (maximal_set, newt);
bitmap_value_insert_into_set (EXP_GEN (block), newt); bitmap_value_insert_into_set (EXP_GEN (block), newt);
} }
bitmap_insert_into_set (TMP_GEN (block), lhs); bitmap_insert_into_set (TMP_GEN (block), lhs);
...@@ -3679,19 +4070,21 @@ init_pre (bool do_fre) ...@@ -3679,19 +4070,21 @@ init_pre (bool do_fre)
connect_infinite_loops_to_exit (); connect_infinite_loops_to_exit ();
memset (&pre_stats, 0, sizeof (pre_stats)); memset (&pre_stats, 0, sizeof (pre_stats));
postorder = XNEWVEC (int, n_basic_blocks - NUM_FIXED_BLOCKS); postorder = XNEWVEC (int, n_basic_blocks - NUM_FIXED_BLOCKS);
post_order_compute (postorder, false); post_order_compute (postorder, false);
FOR_ALL_BB (bb) FOR_ALL_BB (bb)
bb->aux = xcalloc (1, sizeof (struct bb_bitmap_sets)); bb->aux = xcalloc (1, sizeof (struct bb_bitmap_sets));
calculate_dominance_info (CDI_POST_DOMINATORS);
calculate_dominance_info (CDI_DOMINATORS);
bitmap_obstack_initialize (&grand_bitmap_obstack); bitmap_obstack_initialize (&grand_bitmap_obstack);
phi_translate_table = htab_create (5110, expr_pred_trans_hash, phi_translate_table = htab_create (5110, expr_pred_trans_hash,
expr_pred_trans_eq, free); expr_pred_trans_eq, free);
bitmap_set_pool = create_alloc_pool ("Bitmap sets", bitmap_set_pool = create_alloc_pool ("Bitmap sets",
sizeof (struct bitmap_set), 30); sizeof (struct bitmap_set), 30);
calculate_dominance_info (CDI_POST_DOMINATORS);
calculate_dominance_info (CDI_DOMINATORS);
binary_node_pool = create_alloc_pool ("Binary tree nodes", binary_node_pool = create_alloc_pool ("Binary tree nodes",
tree_code_size (PLUS_EXPR), 30); tree_code_size (PLUS_EXPR), 30);
unary_node_pool = create_alloc_pool ("Unary tree nodes", unary_node_pool = create_alloc_pool ("Unary tree nodes",
...@@ -3716,6 +4109,8 @@ init_pre (bool do_fre) ...@@ -3716,6 +4109,8 @@ init_pre (bool do_fre)
TMP_GEN (bb) = bitmap_set_new (); TMP_GEN (bb) = bitmap_set_new ();
AVAIL_OUT (bb) = bitmap_set_new (); AVAIL_OUT (bb) = bitmap_set_new ();
} }
maximal_set = in_fre ? NULL : bitmap_set_new ();
need_eh_cleanup = BITMAP_ALLOC (NULL); need_eh_cleanup = BITMAP_ALLOC (NULL);
} }
...@@ -3787,6 +4182,7 @@ static void ...@@ -3787,6 +4182,7 @@ static void
execute_pre (bool do_fre) execute_pre (bool do_fre)
{ {
do_partial_partial = optimize > 2;
init_pre (do_fre); init_pre (do_fre);
if (!do_fre) if (!do_fre)
...@@ -3829,6 +4225,7 @@ execute_pre (bool do_fre) ...@@ -3829,6 +4225,7 @@ execute_pre (bool do_fre)
if (dump_file && (dump_flags & TDF_STATS)) if (dump_file && (dump_flags & TDF_STATS))
{ {
fprintf (dump_file, "Insertions: %d\n", pre_stats.insertions); fprintf (dump_file, "Insertions: %d\n", pre_stats.insertions);
fprintf (dump_file, "PA inserted: %d\n", pre_stats.pa_insert);
fprintf (dump_file, "New PHIs: %d\n", pre_stats.phis); fprintf (dump_file, "New PHIs: %d\n", pre_stats.phis);
fprintf (dump_file, "Eliminated: %d\n", pre_stats.eliminations); fprintf (dump_file, "Eliminated: %d\n", pre_stats.eliminations);
fprintf (dump_file, "Constified: %d\n", pre_stats.constified); fprintf (dump_file, "Constified: %d\n", pre_stats.constified);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment