Commit 3c3bd471 by Bernd Schmidt Committed by Bernd Schmidt

re PR rtl-optimization/44374 (Hoist same instructions in different branches)

	PR rtl-optimization/44374
	* basic-block.h (enum bb_flags): Add BB_MODIFIED.
	* df-core.c (df_set_bb_dirty): Set it.
	* ifcvt.c (find_memory): Remove function.
	(dead_or_predicable): Use can_move_insns_across.
	* df.h (can_move_insns_across): Declare function.
	* cfgcleanup.c (block_was_dirty): New static variable.
	(try_crossjump_bb, try_forward_edges): Test BB_MODIFIED flag rather
	than df_get_bb_dirty.
	(try_head_merge_bb): New static function.
	(try_optimize_cfg): Call it.  Call df_analyze if block_was_dirty
	is set.
	* df-problems.c: Include "target.h"
	(df_simulate_find_uses): New static function.
	(MEMREF_NORMAL, MEMREF_VOLATILE): New macros.
	(find_memory, find_memory_store): New static functions.
	(can_move_insns_across): New function.
	* Makefile.in (df-problems.o): Update dependencies.

testsuite/
	PR rtl-optimization/44374
	* gcc.target/arm/headmerge-1.c: New test.
	* gcc.target/arm/headmerge-2.c: New test.
	* gcc.target/i386/headmerge-1.c: New test.
	* gcc.target/i386/headmerge-2.c: New test.

From-SVN: r164552
parent 87350d4a
2010-09-23 Bernd Schmidt <bernds@codesourcery.com>
PR rtl-optimization/44374
* basic-block.h (enum bb_flags): Add BB_MODIFIED.
* df-core.c (df_set_bb_dirty): Set it.
* ifcvt.c (find_memory): Remove function.
(dead_or_predicable): Use can_move_insns_across.
* df.h (can_move_insns_across): Declare function.
* cfgcleanup.c (block_was_dirty): New static variable.
(try_crossjump_bb, try_forward_edges): Test BB_MODIFIED flag rather
than df_get_bb_dirty.
(try_head_merge_bb): New static function.
(try_optimize_cfg): Call it. Call df_analyze if block_was_dirty
is set.
* df-problems.c: Include "target.h"
(df_simulate_find_uses): New static function.
(MEMREF_NORMAL, MEMREF_VOLATILE): New macros.
(find_memory, find_memory_store): New static functions.
(can_move_insns_across): New function.
* Makefile.in (df-problems.o): Update dependencies.
2010-09-22 Eric Botcazou <ebotcazou@adacore.com> 2010-09-22 Eric Botcazou <ebotcazou@adacore.com>
PR java/44095 PR java/44095
...@@ -3171,7 +3171,7 @@ df-core.o : df-core.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) $(RTL_H) \ ...@@ -3171,7 +3171,7 @@ df-core.o : df-core.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) $(RTL_H) \
df-problems.o : df-problems.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) \ df-problems.o : df-problems.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) \
$(RTL_H) insn-config.h $(RECOG_H) $(FUNCTION_H) $(REGS_H) alloc-pool.h \ $(RTL_H) insn-config.h $(RECOG_H) $(FUNCTION_H) $(REGS_H) alloc-pool.h \
hard-reg-set.h $(BASIC_BLOCK_H) $(DF_H) $(BITMAP_H) sbitmap.h $(TIMEVAR_H) \ hard-reg-set.h $(BASIC_BLOCK_H) $(DF_H) $(BITMAP_H) sbitmap.h $(TIMEVAR_H) \
$(TM_P_H) $(FLAGS_H) output.h $(EXCEPT_H) dce.h vecprim.h $(TM_P_H) $(TARGET_H) $(FLAGS_H) output.h $(EXCEPT_H) dce.h vecprim.h
df-scan.o : df-scan.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) $(RTL_H) \ df-scan.o : df-scan.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) $(RTL_H) \
insn-config.h $(RECOG_H) $(FUNCTION_H) $(REGS_H) alloc-pool.h \ insn-config.h $(RECOG_H) $(FUNCTION_H) $(REGS_H) alloc-pool.h \
hard-reg-set.h $(BASIC_BLOCK_H) $(DF_H) $(BITMAP_H) sbitmap.h $(TIMEVAR_H) \ hard-reg-set.h $(BASIC_BLOCK_H) $(DF_H) $(BITMAP_H) sbitmap.h $(TIMEVAR_H) \
......
...@@ -246,7 +246,13 @@ enum bb_flags ...@@ -246,7 +246,13 @@ enum bb_flags
/* Set on blocks that cannot be threaded through. /* Set on blocks that cannot be threaded through.
Only used in cfgcleanup.c. */ Only used in cfgcleanup.c. */
BB_NONTHREADABLE_BLOCK = 1 << 11 BB_NONTHREADABLE_BLOCK = 1 << 11,
/* Set on blocks that were modified in some way. This bit is set in
df_set_bb_dirty, but not cleared by df_analyze, so it can be used
to test whether a block has been modified prior to a df_analyze
call. */
BB_MODIFIED = 1 << 12
}; };
/* Dummy flag for convenience in the hot/cold partitioning code. */ /* Dummy flag for convenience in the hot/cold partitioning code. */
......
...@@ -1413,6 +1413,7 @@ df_get_bb_dirty (basic_block bb) ...@@ -1413,6 +1413,7 @@ df_get_bb_dirty (basic_block bb)
void void
df_set_bb_dirty (basic_block bb) df_set_bb_dirty (basic_block bb)
{ {
bb->flags |= BB_MODIFIED;
if (df) if (df)
{ {
int p; int p;
......
...@@ -971,7 +971,9 @@ extern void df_simulate_one_insn_backwards (basic_block, rtx, bitmap); ...@@ -971,7 +971,9 @@ extern void df_simulate_one_insn_backwards (basic_block, rtx, bitmap);
extern void df_simulate_finalize_backwards (basic_block, bitmap); extern void df_simulate_finalize_backwards (basic_block, bitmap);
extern void df_simulate_initialize_forwards (basic_block, bitmap); extern void df_simulate_initialize_forwards (basic_block, bitmap);
extern void df_simulate_one_insn_forwards (basic_block, rtx, bitmap); extern void df_simulate_one_insn_forwards (basic_block, rtx, bitmap);
extern void simulate_backwards_to_point (basic_block, regset, rtx);
extern bool can_move_insns_across (rtx, rtx, rtx, rtx, basic_block, regset,
regset, rtx *);
/* Functions defined in df-scan.c. */ /* Functions defined in df-scan.c. */
extern void df_scan_alloc (bitmap); extern void df_scan_alloc (bitmap);
......
...@@ -101,7 +101,6 @@ static int noce_find_if_block (basic_block, edge, edge, int); ...@@ -101,7 +101,6 @@ static int noce_find_if_block (basic_block, edge, edge, int);
static int cond_exec_find_if_block (ce_if_block_t *); static int cond_exec_find_if_block (ce_if_block_t *);
static int find_if_case_1 (basic_block, edge, edge); static int find_if_case_1 (basic_block, edge, edge);
static int find_if_case_2 (basic_block, edge, edge); static int find_if_case_2 (basic_block, edge, edge);
static int find_memory (rtx *, void *);
static int dead_or_predicable (basic_block, basic_block, basic_block, static int dead_or_predicable (basic_block, basic_block, basic_block,
basic_block, int); basic_block, int);
static void noce_emit_move_insn (rtx, rtx); static void noce_emit_move_insn (rtx, rtx);
...@@ -3882,15 +3881,6 @@ find_if_case_2 (basic_block test_bb, edge then_edge, edge else_edge) ...@@ -3882,15 +3881,6 @@ find_if_case_2 (basic_block test_bb, edge then_edge, edge else_edge)
return TRUE; return TRUE;
} }
/* A subroutine of dead_or_predicable called through for_each_rtx.
Return 1 if a memory is found. */
static int
find_memory (rtx *px, void *data ATTRIBUTE_UNUSED)
{
return MEM_P (*px);
}
/* Used by the code above to perform the actual rtl transformations. /* Used by the code above to perform the actual rtl transformations.
Return TRUE if successful. Return TRUE if successful.
...@@ -3992,131 +3982,38 @@ dead_or_predicable (basic_block test_bb, basic_block merge_bb, ...@@ -3992,131 +3982,38 @@ dead_or_predicable (basic_block test_bb, basic_block merge_bb,
earliest = jump; earliest = jump;
} }
#endif #endif
/* If we allocated new pseudos (e.g. in the conditional move
expander called from noce_emit_cmove), we must resize the
array first. */
if (max_regno < max_reg_num ())
max_regno = max_reg_num ();
/* Try the NCE path if the CE path did not result in any changes. */ /* Try the NCE path if the CE path did not result in any changes. */
if (n_validated_changes == 0) if (n_validated_changes == 0)
{ {
rtx cond;
regset live;
bool success;
/* In the non-conditional execution case, we have to verify that there /* In the non-conditional execution case, we have to verify that there
are no trapping operations, no calls, no references to memory, and are no trapping operations, no calls, no references to memory, and
that any registers modified are dead at the branch site. */ that any registers modified are dead at the branch site. */
rtx insn, cond, prev; if (!any_condjump_p (jump))
bitmap merge_set, merge_set_noclobber, test_live, test_set;
unsigned i, fail = 0;
bitmap_iterator bi;
/* Check for no calls or trapping operations. */
for (insn = head; ; insn = NEXT_INSN (insn))
{
if (CALL_P (insn))
return FALSE;
if (NONDEBUG_INSN_P (insn))
{
if (may_trap_p (PATTERN (insn)))
return FALSE;
/* ??? Even non-trapping memories such as stack frame
references must be avoided. For stores, we collect
no lifetime info; for reads, we'd have to assert
true_dependence false against every store in the
TEST range. */
if (for_each_rtx (&PATTERN (insn), find_memory, NULL))
return FALSE;
}
if (insn == end)
break;
}
if (! any_condjump_p (jump))
return FALSE; return FALSE;
/* Find the extent of the conditional. */ /* Find the extent of the conditional. */
cond = noce_get_condition (jump, &earliest, false); cond = noce_get_condition (jump, &earliest, false);
if (! cond) if (!cond)
return FALSE; return FALSE;
/* Collect: live = BITMAP_ALLOC (&reg_obstack);
MERGE_SET = set of registers set in MERGE_BB simulate_backwards_to_point (merge_bb, live, end);
MERGE_SET_NOCLOBBER = like MERGE_SET, but only includes registers success = can_move_insns_across (head, end, earliest, jump,
that are really set, not just clobbered. merge_bb, live,
TEST_LIVE = set of registers live at EARLIEST df_get_live_in (other_bb), NULL);
TEST_SET = set of registers set between EARLIEST and the BITMAP_FREE (live);
end of the block. */ if (!success)
merge_set = BITMAP_ALLOC (&reg_obstack);
merge_set_noclobber = BITMAP_ALLOC (&reg_obstack);
test_live = BITMAP_ALLOC (&reg_obstack);
test_set = BITMAP_ALLOC (&reg_obstack);
/* ??? bb->local_set is only valid during calculate_global_regs_live,
so we must recompute usage for MERGE_BB. Not so bad, I suppose,
since we've already asserted that MERGE_BB is small. */
/* If we allocated new pseudos (e.g. in the conditional move
expander called from noce_emit_cmove), we must resize the
array first. */
if (max_regno < max_reg_num ())
max_regno = max_reg_num ();
FOR_BB_INSNS (merge_bb, insn)
{
if (NONDEBUG_INSN_P (insn))
{
df_simulate_find_defs (insn, merge_set);
df_simulate_find_noclobber_defs (insn, merge_set_noclobber);
}
}
/* For small register class machines, don't lengthen lifetimes of
hard registers before reload. */
if (! reload_completed
&& targetm.small_register_classes_for_mode_p (VOIDmode))
{
EXECUTE_IF_SET_IN_BITMAP (merge_set_noclobber, 0, i, bi)
{
if (i < FIRST_PSEUDO_REGISTER
&& ! fixed_regs[i]
&& ! global_regs[i])
fail = 1;
}
}
/* For TEST, we're interested in a range of insns, not a whole block.
Moreover, we're interested in the insns live from OTHER_BB. */
/* The loop below takes the set of live registers
after JUMP, and calculates the live set before EARLIEST. */
bitmap_copy (test_live, df_get_live_in (other_bb));
df_simulate_initialize_backwards (test_bb, test_live);
for (insn = jump; ; insn = prev)
{
if (INSN_P (insn))
{
df_simulate_find_defs (insn, test_set);
df_simulate_one_insn_backwards (test_bb, insn, test_live);
}
prev = PREV_INSN (insn);
if (insn == earliest)
break;
}
/* We can perform the transformation if
MERGE_SET_NOCLOBBER & TEST_SET
and
MERGE_SET & TEST_LIVE)
and
TEST_SET & DF_LIVE_IN (merge_bb)
are empty. */
if (bitmap_intersect_p (test_set, merge_set_noclobber)
|| bitmap_intersect_p (test_live, merge_set)
|| bitmap_intersect_p (test_set, df_get_live_in (merge_bb)))
fail = 1;
BITMAP_FREE (merge_set_noclobber);
BITMAP_FREE (merge_set);
BITMAP_FREE (test_live);
BITMAP_FREE (test_set);
if (fail)
return FALSE; return FALSE;
} }
......
2010-09-23 Bernd Schmidt <bernds@codesourcery.com>
PR rtl-optimization/44374
* gcc.target/arm/headmerge-1.c: New test.
* gcc.target/arm/headmerge-2.c: New test.
* gcc.target/i386/headmerge-1.c: New test.
* gcc.target/i386/headmerge-2.c: New test.
2010-09-23 Daniel Kraft <d@domob.eu> 2010-09-23 Daniel Kraft <d@domob.eu>
PR fortran/38936 PR fortran/38936
......
/* { dg-do compile } */
/* { dg-options "-O2" } */
/* { dg-final { scan-assembler-times "#120" 1 } } */
extern void foo1 (int);
extern void foo2 (int);
void t (int x, int y)
{
if (y < 5)
foo1 (120);
else
foo2 (120);
}
/* { dg-do compile } */
/* { dg-options "-O2" } */
/* { dg-final { scan-assembler-times "120" 1 } } */
extern void foo1 (int);
extern void foo2 (int);
extern void foo3 (int);
extern void foo4 (int);
extern void foo5 (int);
extern void foo6 (int);
void t (int x, int y)
{
switch (y)
{
case 1:
foo1 (120);
break;
case 5:
foo2 (120);
break;
case 7:
foo3 (120);
break;
case 10:
foo4 (120);
break;
case 13:
foo5 (120);
break;
default:
foo6 (120);
break;
}
}
/* { dg-do compile } */
/* { dg-options "-O2" } */
/* { dg-final { scan-assembler-times "120" 1 } } */
extern void foo1 (int);
extern void foo2 (int);
void t (int x, int y)
{
if (y < 5)
foo1 (120);
else
foo2 (120);
}
/* { dg-do compile } */
/* { dg-options "-O2" } */
/* { dg-final { scan-assembler-times "120" 1 } } */
extern void foo1 (int);
extern void foo2 (int);
extern void foo3 (int);
extern void foo4 (int);
extern void foo5 (int);
extern void foo6 (int);
void t (int x, int y)
{
switch (y)
{
case 1:
foo1 (120);
break;
case 5:
foo2 (120);
break;
case 7:
foo3 (120);
break;
case 10:
foo4 (120);
break;
case 13:
foo5 (120);
break;
default:
foo6 (120);
break;
}
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment