Commit 00952e97 by Paolo Bonzini Committed by Paolo Bonzini

re PR rtl-optimization/33928 (30% performance slowdown in floating-point code caused by r118475)

2009-05-08  Paolo Bonzini  <bonzini@gnu.org>

	PR rtl-optimization/33928
	PR 26854
	* fwprop.c (use_def_ref, get_def_for_use, bitmap_only_bit_bitween,
	process_uses, build_single_def_use_links): New.
	(update_df): Update use_def_ref.
	(forward_propagate_into): Use get_def_for_use instead of use-def
	chains.
	(fwprop_init): Call build_single_def_use_links and let it initialize
	dataflow.
	(fwprop_done): Free use_def_ref.
	(fwprop_addr): Eliminate duplicate call to df_set_flags.
	* df-problems.c (df_rd_simulate_artificial_defs_at_top, 
	df_rd_simulate_one_insn): New.
	(df_rd_bb_local_compute_process_def): Update head comment.
	(df_chain_create_bb): Use the new RD simulation functions.
	* df.h (df_rd_simulate_artificial_defs_at_top, 
	df_rd_simulate_one_insn): New.
	* opts.c (decode_options): Enable fwprop at -O1.
	* doc/invoke.texi (-fforward-propagate): Document this.

From-SVN: r147282
parent 2ca862e9
2009-05-08 Paolo Bonzini <bonzini@gnu.org>
PR rtl-optimization/33928
PR 26854
* fwprop.c (use_def_ref, get_def_for_use, bitmap_only_bit_bitween,
process_uses, build_single_def_use_links): New.
(update_df): Update use_def_ref.
(forward_propagate_into): Use get_def_for_use instead of use-def
chains.
(fwprop_init): Call build_single_def_use_links and let it initialize
dataflow.
(fwprop_done): Free use_def_ref.
(fwprop_addr): Eliminate duplicate call to df_set_flags.
* df-problems.c (df_rd_simulate_artificial_defs_at_top,
df_rd_simulate_one_insn): New.
(df_rd_bb_local_compute_process_def): Update head comment.
(df_chain_create_bb): Use the new RD simulation functions.
* df.h (df_rd_simulate_artificial_defs_at_top,
df_rd_simulate_one_insn): New.
* opts.c (decode_options): Enable fwprop at -O1.
* doc/invoke.texi (-fforward-propagate): Document this.
2009-05-08 Joseph Myers <joseph@codesourcery.com>
PR c/24581
......
......@@ -316,7 +316,61 @@ df_rd_alloc (bitmap all_blocks)
}
/* Process a list of DEFs for df_rd_bb_local_compute. */
/* Add the effect of the top artificial defs of BB to the reaching definitions
bitmap LOCAL_RD. */
void
df_rd_simulate_artificial_defs_at_top (basic_block bb, bitmap local_rd)
{
int bb_index = bb->index;
df_ref *def_rec;
for (def_rec = df_get_artificial_defs (bb_index); *def_rec; def_rec++)
{
df_ref def = *def_rec;
if (DF_REF_FLAGS (def) & DF_REF_AT_TOP)
{
unsigned int dregno = DF_REF_REGNO (def);
if (!(DF_REF_FLAGS (def) & (DF_REF_PARTIAL | DF_REF_CONDITIONAL)))
bitmap_clear_range (local_rd,
DF_DEFS_BEGIN (dregno),
DF_DEFS_COUNT (dregno));
bitmap_set_bit (local_rd, DF_REF_ID (def));
}
}
}
/* Add the effect of the defs of INSN to the reaching definitions bitmap
LOCAL_RD. */
void
df_rd_simulate_one_insn (basic_block bb ATTRIBUTE_UNUSED, rtx insn,
bitmap local_rd)
{
unsigned uid = INSN_UID (insn);
df_ref *def_rec;
for (def_rec = DF_INSN_UID_DEFS (uid); *def_rec; def_rec++)
{
df_ref def = *def_rec;
unsigned int dregno = DF_REF_REGNO (def);
if ((!(df->changeable_flags & DF_NO_HARD_REGS))
|| (dregno >= FIRST_PSEUDO_REGISTER))
{
if (!(DF_REF_FLAGS (def) & (DF_REF_PARTIAL | DF_REF_CONDITIONAL)))
bitmap_clear_range (local_rd,
DF_DEFS_BEGIN (dregno),
DF_DEFS_COUNT (dregno));
if (!(DF_REF_FLAGS (def)
& (DF_REF_MUST_CLOBBER | DF_REF_MAY_CLOBBER)))
bitmap_set_bit (local_rd, DF_REF_ID (def));
}
}
}
/* Process a list of DEFs for df_rd_bb_local_compute. This is a bit
more complicated than just simulating, because we must produce the
gen and kill sets and hence deal with the two possible representations
of kill sets. */
static void
df_rd_bb_local_compute_process_def (struct df_rd_bb_info *bb_info,
......@@ -2076,7 +2130,6 @@ df_chain_create_bb (unsigned int bb_index)
struct df_rd_bb_info *bb_info = df_rd_get_bb_info (bb_index);
rtx insn;
bitmap cpy = BITMAP_ALLOC (NULL);
df_ref *def_rec;
bitmap_copy (cpy, bb_info->in);
bitmap_set_bit (df_chain->out_of_date_transfer_functions, bb_index);
......@@ -2095,57 +2148,23 @@ df_chain_create_bb (unsigned int bb_index)
DF_REF_AT_TOP);
#endif
for (def_rec = df_get_artificial_defs (bb_index); *def_rec; def_rec++)
{
df_ref def = *def_rec;
if (DF_REF_FLAGS (def) & DF_REF_AT_TOP)
{
unsigned int dregno = DF_REF_REGNO (def);
if (!(DF_REF_FLAGS (def) & (DF_REF_PARTIAL | DF_REF_CONDITIONAL)))
bitmap_clear_range (cpy,
DF_DEFS_BEGIN (dregno),
DF_DEFS_COUNT (dregno));
bitmap_set_bit (cpy, DF_REF_ID (def));
}
}
df_rd_simulate_artificial_defs_at_top (bb, cpy);
/* Process the regular instructions next. */
FOR_BB_INSNS (bb, insn)
{
df_ref *def_rec;
unsigned int uid = INSN_UID (insn);
if (!INSN_P (insn))
continue;
/* Now scan the uses and link them up with the defs that remain
in the cpy vector. */
df_chain_create_bb_process_use (cpy, DF_INSN_UID_USES (uid), 0);
if (df->changeable_flags & DF_EQ_NOTES)
df_chain_create_bb_process_use (cpy, DF_INSN_UID_EQ_USES (uid), 0);
if (INSN_P (insn))
{
unsigned int uid = INSN_UID (insn);
/* First scan the uses and link them up with the defs that remain
in the cpy vector. */
df_chain_create_bb_process_use (cpy, DF_INSN_UID_USES (uid), 0);
if (df->changeable_flags & DF_EQ_NOTES)
df_chain_create_bb_process_use (cpy, DF_INSN_UID_EQ_USES (uid), 0);
/* Since we are going forwards, process the defs second. This
pass only changes the bits in cpy. */
for (def_rec = DF_INSN_UID_DEFS (uid); *def_rec; def_rec++)
{
df_ref def = *def_rec;
unsigned int dregno = DF_REF_REGNO (def);
if ((!(df->changeable_flags & DF_NO_HARD_REGS))
|| (dregno >= FIRST_PSEUDO_REGISTER))
{
if (!(DF_REF_FLAGS (def) & (DF_REF_PARTIAL | DF_REF_CONDITIONAL)))
bitmap_clear_range (cpy,
DF_DEFS_BEGIN (dregno),
DF_DEFS_COUNT (dregno));
if (!(DF_REF_FLAGS (def)
& (DF_REF_MUST_CLOBBER | DF_REF_MAY_CLOBBER)))
bitmap_set_bit (cpy, DF_REF_ID (def));
}
}
}
/* Since we are going forwards, process the defs second. */
df_rd_simulate_one_insn (bb, insn, cpy);
}
/* Create the chains for the artificial uses of the hard registers
at the end of the block. */
......
......@@ -939,6 +939,8 @@ extern void df_grow_bb_info (struct dataflow *);
extern void df_chain_dump (struct df_link *, FILE *);
extern void df_print_bb_index (basic_block bb, FILE *file);
extern void df_rd_add_problem (void);
extern void df_rd_simulate_artificial_defs_at_top (basic_block, bitmap);
extern void df_rd_simulate_one_insn (basic_block, rtx, bitmap);
extern void df_lr_add_problem (void);
extern void df_lr_verify_transfer_functions (void);
extern void df_live_verify_transfer_functions (void);
......
......@@ -5554,8 +5554,8 @@ instructions and checks if the result can be simplified. If loop unrolling
is active, two passes are performed and the second is scheduled after
loop unrolling.
This option is enabled by default at optimization levels @option{-O2},
@option{-O3}, @option{-Os}.
This option is enabled by default at optimization levels @option{-O},
@option{-O2}, @option{-O3}, @option{-Os}.
@item -fomit-frame-pointer
@opindex fomit-frame-pointer
......
......@@ -105,6 +105,111 @@ along with GCC; see the file COPYING3. If not see
static int num_changes;
DEF_VEC_P(df_ref);
DEF_VEC_ALLOC_P(df_ref,heap);
VEC(df_ref,heap) *use_def_ref;
/* Return the only def in USE's use-def chain, or NULL if there is
more than one def in the chain. */
static inline df_ref
get_def_for_use (df_ref use)
{
return VEC_index (df_ref, use_def_ref, DF_REF_ID (use));
}
/* Return the only bit between FIRST and LAST that is set in B,
or -1 if there are zero or more than one such bits. */
static inline int
bitmap_only_bit_between (const_bitmap b, unsigned first, unsigned last)
{
bitmap_iterator bi;
unsigned bit, bit2;
if (last < first)
return -1;
bmp_iter_set_init (&bi, b, first, &bit);
if (bmp_iter_set (&bi, &bit) && bit <= last)
{
bit2 = bit;
bmp_iter_next (&bi, &bit2);
if (!bmp_iter_set (&bi, &bit2) || bit2 > last)
return bit;
}
return -1;
}
/* Fill the use_def_ref vector with values for the uses in USE_REC,
taking reaching definitions info from LOCAL_RD. TOP_FLAG says
which artificials uses should be used, when USE_REC is an
artificial use vector. */
static void
process_uses (bitmap local_rd, df_ref *use_rec, int top_flag)
{
df_ref use;
while ((use = *use_rec++) != NULL)
if (top_flag == (DF_REF_FLAGS (use) & DF_REF_AT_TOP))
{
unsigned int uregno = DF_REF_REGNO (use);
unsigned int first = DF_DEFS_BEGIN (uregno);
unsigned int last = first + DF_DEFS_COUNT (uregno) - 1;
int defno = bitmap_only_bit_between (local_rd, first, last);
df_ref def = (defno == -1) ? NULL : DF_DEFS_GET (defno);
VEC_replace (df_ref, use_def_ref, DF_REF_ID (use), def);
}
}
/* Do dataflow analysis and use reaching definitions to build
a vector holding the reaching definitions of uses that have a
single RD. */
static void
build_single_def_use_links (void)
{
basic_block bb;
bitmap local_rd = BITMAP_ALLOC (NULL);
/* We use reaching definitions to compute our restricted use-def chains. */
df_set_flags (DF_EQ_NOTES);
df_rd_add_problem ();
df_analyze ();
df_maybe_reorganize_use_refs (DF_REF_ORDER_BY_INSN_WITH_NOTES);
use_def_ref = VEC_alloc (df_ref, heap, DF_USES_TABLE_SIZE ());
VEC_safe_grow (df_ref, heap, use_def_ref, DF_USES_TABLE_SIZE ());
FOR_EACH_BB (bb)
{
int bb_index = bb->index;
struct df_rd_bb_info *bb_info = df_rd_get_bb_info (bb_index);
rtx insn;
bitmap_copy (local_rd, bb_info->in);
process_uses (local_rd, df_get_artificial_uses (bb_index), DF_REF_AT_TOP);
df_rd_simulate_artificial_defs_at_top (bb, local_rd);
FOR_BB_INSNS (bb, insn)
if (INSN_P (insn))
{
unsigned int uid = INSN_UID (insn);
process_uses (local_rd, DF_INSN_UID_USES (uid), 0);
process_uses (local_rd, DF_INSN_UID_EQ_USES (uid), 0);
df_rd_simulate_one_insn (bb, insn, local_rd);
}
process_uses (local_rd, df_get_artificial_uses (bb_index), 0);
}
BITMAP_FREE (local_rd);
}
/* Do not try to replace constant addresses or addresses of local and
argument slots. These MEM expressions are made only once and inserted
......@@ -716,7 +821,8 @@ update_df (rtx insn, rtx *loc, df_ref *use_rec, enum df_ref_type type,
width, offset, mode);
/* Set up the use-def chain. */
df_chain_copy (new_use, DF_REF_CHAIN (orig_use));
gcc_assert (DF_REF_ID (new_use) == (int) VEC_length (df_ref, use_def_ref));
VEC_safe_push (df_ref, heap, use_def_ref, get_def_for_use (orig_use));
changed = true;
}
if (changed)
......@@ -1035,7 +1141,6 @@ forward_propagate_and_simplify (df_ref use, rtx def_insn, rtx def_set)
static void
forward_propagate_into (df_ref use)
{
struct df_link *defs;
df_ref def;
rtx def_insn, def_set, use_insn;
rtx parent;
......@@ -1046,11 +1151,9 @@ forward_propagate_into (df_ref use)
return;
/* Only consider uses that have a single definition. */
defs = DF_REF_CHAIN (use);
if (!defs || defs->next)
def = get_def_for_use (use);
if (!def)
return;
def = defs->ref;
if (DF_REF_FLAGS (def) & DF_REF_READ_WRITE)
return;
if (DF_REF_IS_ARTIFICIAL (def))
......@@ -1096,12 +1199,7 @@ fwprop_init (void)
insns (sadly) if we are not working in cfglayout mode. */
loop_optimizer_init (0);
/* Now set up the dataflow problem (we only want use-def chains) and
put the dataflow solver to work. */
df_set_flags (DF_EQ_NOTES);
df_chain_add_problem (DF_UD_CHAIN);
df_analyze ();
df_maybe_reorganize_use_refs (DF_REF_ORDER_BY_INSN_WITH_NOTES);
build_single_def_use_links ();
df_set_flags (DF_DEFER_INSN_RESCAN);
}
......@@ -1110,6 +1208,7 @@ fwprop_done (void)
{
loop_optimizer_finalize ();
VEC_free (df_ref, heap, use_def_ref);
free_dominance_info (CDI_DOMINATORS);
cleanup_cfg (0);
delete_trivially_dead_insns (get_insns (), max_reg_num ());
......@@ -1187,8 +1286,6 @@ fwprop_addr (void)
/* Go through all the uses. update_df will create new ones at the
end, and we'll go through them as well. */
df_set_flags (DF_DEFER_INSN_RESCAN);
for (i = 0; i < DF_USES_TABLE_SIZE (); i++)
{
df_ref use = DF_USES_GET (i);
......
......@@ -848,6 +848,7 @@ decode_options (unsigned int argc, const char **argv)
#endif
flag_guess_branch_prob = opt1;
flag_cprop_registers = opt1;
flag_forward_propagate = opt1;
flag_if_conversion = opt1;
flag_if_conversion2 = opt1;
flag_ipa_pure_const = opt1;
......@@ -873,7 +874,6 @@ decode_options (unsigned int argc, const char **argv)
flag_thread_jumps = opt2;
flag_crossjumping = opt2;
flag_optimize_sibling_calls = opt2;
flag_forward_propagate = opt2;
flag_cse_follow_jumps = opt2;
flag_gcse = opt2;
flag_expensive_optimizations = opt2;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment