Commit ebe8f322 by James Greenhalgh Committed by James Greenhalgh

[Patch 2/2 ifcvt costs] Introduce a new cost model for ifcvt.

gcc/

	* ifcvt.c (noce_if_info): New fields: speed_p, original_cost,
	max_seq_cost.  Removed fields: then_cost, else_cost, branch_cost.
	(noce_conversion_profitable_p): New.
	(noce_try_store_flag_constants): Use it.
	(noce_try_addcc): Likewise.
	(noce_try_store_flag_mask): Likewise.
	(noce_try_cmove): Likewise.
	(noce_try_cmove_arith): Likewise.
	(bb_valid_for_noce_process_p): Add to the cost parameter rather than
	overwriting it.
	(noce_convert_multiple_sets): Move cost model to here, from...
	(bb_ok_for_noce_convert_multiple_sets) ...here.
	(noce_process_if_block): Update calls for above changes.
	(noce_find_if_block): Record new noce_if_info parameters.

gcc/testsuite/

	* gcc.dg/ifcvt-2.c: Use parameter to guide if-conversion heuristics.
	* gcc.dg/ifcvt-3.c: Use parameter to guide if-conversion heuristics.
	* gcc.dg/pr68435.c: Use parameter to guide if-conversion heuristics.
	* gcc.dg/ifcvt-4.c: Use parameter to guide if-conversion heuristics.
	* gcc.dg/ifcvt-5.c: Use parameter to guide if-conversion heuristics.

From-SVN: r238594
parent e914c11a
2016-07-21 James Greenhalgh <james.greenhalgh@arm.com>
* ifcvt.c (noce_if_info): New fields: speed_p, original_cost,
max_seq_cost. Removed fields: then_cost, else_cost, branch_cost.
(noce_conversion_profitable_p): New.
(noce_try_store_flag_constants): Use it.
(noce_try_addcc): Likewise.
(noce_try_store_flag_mask): Likewise.
(noce_try_cmove): Likewise.
(noce_try_cmove_arith): Likewise.
(bb_valid_for_noce_process_p): Add to the cost parameter rather than
overwriting it.
(noce_convert_multiple_sets): Move cost model to here, from...
(bb_ok_for_noce_convert_multiple_sets) ...here.
(noce_process_if_block): Update calls for above changes.
(noce_find_if_block): Record new noce_if_info parameters.
2016-07-21 James Greenhalgh <james.greenhalgh@arm.com>
* target.def (max_noce_ifcvt_seq_cost): New.
* doc/tm.texi.in (TARGET_MAX_NOCE_IFCVT_SEQ_COST): Document it.
* doc/tm.texi: Regenerate.
......
......@@ -807,12 +807,17 @@ struct noce_if_info
bool then_simple;
bool else_simple;
/* The total rtx cost of the instructions in then_bb and else_bb. */
unsigned int then_cost;
unsigned int else_cost;
/* True if we're optimisizing the control block for speed, false if
we're optimizing for size. */
bool speed_p;
/* Estimated cost of the particular branch instruction. */
unsigned int branch_cost;
/* The combined cost of COND, JUMP and the costs for THEN_BB and
ELSE_BB. */
unsigned int original_cost;
/* Maximum permissible cost for the unconditional sequence we should
generate to replace this branch. */
unsigned int max_seq_cost;
/* The name of the noce transform that succeeded in if-converting
this structure. Used for debugging. */
......@@ -835,6 +840,25 @@ static int noce_try_minmax (struct noce_if_info *);
static int noce_try_abs (struct noce_if_info *);
static int noce_try_sign_mask (struct noce_if_info *);
/* Return TRUE if SEQ is a good candidate as a replacement for the
if-convertible sequence described in IF_INFO. */
inline static bool
noce_conversion_profitable_p (rtx_insn *seq, struct noce_if_info *if_info)
{
bool speed_p = if_info->speed_p;
/* Cost up the new sequence. */
unsigned int cost = seq_cost (seq, speed_p);
/* When compiling for size, we can make a reasonably accurately guess
at the size growth. */
if (!speed_p)
return cost <= if_info->original_cost;
else
return cost <= if_info->max_seq_cost;
}
/* Helper function for noce_try_store_flag*. */
static rtx
......@@ -1319,8 +1343,7 @@ noce_try_store_flag_constants (struct noce_if_info *if_info)
registers where we handle overlap below. */
&& (REG_P (XEXP (a, 0))
|| (noce_operand_ok (XEXP (a, 0))
&& ! reg_overlap_mentioned_p (if_info->x, XEXP (a, 0))))
&& if_info->branch_cost >= 2)
&& ! reg_overlap_mentioned_p (if_info->x, XEXP (a, 0)))))
{
common = XEXP (a, 0);
a = XEXP (a, 1);
......@@ -1393,22 +1416,24 @@ noce_try_store_flag_constants (struct noce_if_info *if_info)
else
gcc_unreachable ();
}
/* Is this (cond) ? 2^n : 0? */
else if (ifalse == 0 && exact_log2 (itrue) >= 0
&& (STORE_FLAG_VALUE == 1
|| if_info->branch_cost >= 2))
&& STORE_FLAG_VALUE == 1)
normalize = 1;
/* Is this (cond) ? 0 : 2^n? */
else if (itrue == 0 && exact_log2 (ifalse) >= 0 && can_reverse
&& (STORE_FLAG_VALUE == 1 || if_info->branch_cost >= 2))
&& STORE_FLAG_VALUE == 1)
{
normalize = 1;
reversep = true;
}
/* Is this (cond) ? -1 : x? */
else if (itrue == -1
&& (STORE_FLAG_VALUE == -1
|| if_info->branch_cost >= 2))
&& STORE_FLAG_VALUE == -1)
normalize = -1;
/* Is this (cond) ? x : -1? */
else if (ifalse == -1 && can_reverse
&& (STORE_FLAG_VALUE == -1 || if_info->branch_cost >= 2))
&& STORE_FLAG_VALUE == -1)
{
normalize = -1;
reversep = true;
......@@ -1497,7 +1522,7 @@ noce_try_store_flag_constants (struct noce_if_info *if_info)
noce_emit_move_insn (if_info->x, target);
seq = end_ifcvt_sequence (if_info);
if (!seq)
if (!seq || !noce_conversion_profitable_p (seq, if_info))
return FALSE;
emit_insn_before_setloc (seq, if_info->jump,
......@@ -1551,7 +1576,7 @@ noce_try_addcc (struct noce_if_info *if_info)
noce_emit_move_insn (if_info->x, target);
seq = end_ifcvt_sequence (if_info);
if (!seq)
if (!seq || !noce_conversion_profitable_p (seq, if_info))
return FALSE;
emit_insn_before_setloc (seq, if_info->jump,
......@@ -1564,10 +1589,10 @@ noce_try_addcc (struct noce_if_info *if_info)
}
/* If that fails, construct conditional increment or decrement using
setcc. */
if (if_info->branch_cost >= 2
&& (XEXP (if_info->a, 1) == const1_rtx
|| XEXP (if_info->a, 1) == constm1_rtx))
setcc. We're changing a branch and an increment to a comparison and
an ADD/SUB. */
if (XEXP (if_info->a, 1) == const1_rtx
|| XEXP (if_info->a, 1) == constm1_rtx)
{
start_sequence ();
if (STORE_FLAG_VALUE == INTVAL (XEXP (if_info->a, 1)))
......@@ -1593,7 +1618,7 @@ noce_try_addcc (struct noce_if_info *if_info)
noce_emit_move_insn (if_info->x, target);
seq = end_ifcvt_sequence (if_info);
if (!seq)
if (!seq || !noce_conversion_profitable_p (seq, if_info))
return FALSE;
emit_insn_before_setloc (seq, if_info->jump,
......@@ -1621,15 +1646,14 @@ noce_try_store_flag_mask (struct noce_if_info *if_info)
return FALSE;
reversep = 0;
if ((if_info->branch_cost >= 2
|| STORE_FLAG_VALUE == -1)
&& ((if_info->a == const0_rtx
&& rtx_equal_p (if_info->b, if_info->x))
|| ((reversep = (reversed_comparison_code (if_info->cond,
if_info->jump)
!= UNKNOWN))
&& if_info->b == const0_rtx
&& rtx_equal_p (if_info->a, if_info->x))))
if ((if_info->a == const0_rtx
&& rtx_equal_p (if_info->b, if_info->x))
|| ((reversep = (reversed_comparison_code (if_info->cond,
if_info->jump)
!= UNKNOWN))
&& if_info->b == const0_rtx
&& rtx_equal_p (if_info->a, if_info->x)))
{
start_sequence ();
target = noce_emit_store_flag (if_info,
......@@ -1643,22 +1667,11 @@ noce_try_store_flag_mask (struct noce_if_info *if_info)
if (target)
{
int old_cost, new_cost, insn_cost;
int speed_p;
if (target != if_info->x)
noce_emit_move_insn (if_info->x, target);
seq = end_ifcvt_sequence (if_info);
if (!seq)
return FALSE;
speed_p = optimize_bb_for_speed_p (BLOCK_FOR_INSN (if_info->insn_a));
insn_cost = insn_rtx_cost (PATTERN (if_info->insn_a), speed_p);
old_cost = COSTS_N_INSNS (if_info->branch_cost) + insn_cost;
new_cost = seq_cost (seq, speed_p);
if (new_cost > old_cost)
if (!seq || !noce_conversion_profitable_p (seq, if_info))
return FALSE;
emit_insn_before_setloc (seq, if_info->jump,
......@@ -1827,9 +1840,7 @@ noce_try_cmove (struct noce_if_info *if_info)
we don't know about, so give them a chance before trying this
approach. */
else if (!targetm.have_conditional_execution ()
&& CONST_INT_P (if_info->a) && CONST_INT_P (if_info->b)
&& ((if_info->branch_cost >= 2 && STORE_FLAG_VALUE == -1)
|| if_info->branch_cost >= 3))
&& CONST_INT_P (if_info->a) && CONST_INT_P (if_info->b))
{
machine_mode mode = GET_MODE (if_info->x);
HOST_WIDE_INT ifalse = INTVAL (if_info->a);
......@@ -1865,7 +1876,7 @@ noce_try_cmove (struct noce_if_info *if_info)
noce_emit_move_insn (if_info->x, target);
seq = end_ifcvt_sequence (if_info);
if (!seq)
if (!seq || !noce_conversion_profitable_p (seq, if_info))
return FALSE;
emit_insn_before_setloc (seq, if_info->jump,
......@@ -2078,11 +2089,9 @@ noce_try_cmove_arith (struct noce_if_info *if_info)
conditional on their addresses followed by a load. Don't do this
early because it'll screw alias analysis. Note that we've
already checked for no side effects. */
/* ??? FIXME: Magic number 5. */
if (cse_not_expected
&& MEM_P (a) && MEM_P (b)
&& MEM_ADDR_SPACE (a) == MEM_ADDR_SPACE (b)
&& if_info->branch_cost >= 5)
&& MEM_ADDR_SPACE (a) == MEM_ADDR_SPACE (b))
{
machine_mode address_mode = get_address_mode (a);
......@@ -2114,23 +2123,6 @@ noce_try_cmove_arith (struct noce_if_info *if_info)
if (!can_conditionally_move_p (x_mode))
return FALSE;
unsigned int then_cost;
unsigned int else_cost;
if (insn_a)
then_cost = if_info->then_cost;
else
then_cost = 0;
if (insn_b)
else_cost = if_info->else_cost;
else
else_cost = 0;
/* We're going to execute one of the basic blocks anyway, so
bail out if the most expensive of the two blocks is unacceptable. */
if (MAX (then_cost, else_cost) > COSTS_N_INSNS (if_info->branch_cost))
return FALSE;
/* Possibly rearrange operands to make things come out more natural. */
if (reversed_comparison_code (if_info->cond, if_info->jump) != UNKNOWN)
{
......@@ -2319,7 +2311,7 @@ noce_try_cmove_arith (struct noce_if_info *if_info)
noce_emit_move_insn (x, target);
ifcvt_seq = end_ifcvt_sequence (if_info);
if (!ifcvt_seq)
if (!ifcvt_seq || !noce_conversion_profitable_p (ifcvt_seq, if_info))
return FALSE;
emit_insn_before_setloc (ifcvt_seq, if_info->jump,
......@@ -2805,7 +2797,7 @@ noce_try_sign_mask (struct noce_if_info *if_info)
&& (if_info->insn_b == NULL_RTX
|| BLOCK_FOR_INSN (if_info->insn_b) == if_info->test_bb));
if (!(t_unconditional
|| (set_src_cost (t, mode, optimize_bb_for_speed_p (if_info->test_bb))
|| (set_src_cost (t, mode, if_info->speed_p)
< COSTS_N_INSNS (2))))
return FALSE;
......@@ -3034,8 +3026,8 @@ contains_mem_rtx_p (rtx x)
x := a and all previous computations
in TEST_BB don't produce any values that are live after TEST_BB.
In other words, all the insns in TEST_BB are there only
to compute a value for x. Put the rtx cost of the insns
in TEST_BB into COST. Record whether TEST_BB is a single simple
to compute a value for x. Add the rtx cost of the insns
in TEST_BB to COST. Record whether TEST_BB is a single simple
set instruction in SIMPLE_P. */
static bool
......@@ -3067,7 +3059,7 @@ bb_valid_for_noce_process_p (basic_block test_bb, rtx cond,
if (first_insn == last_insn)
{
*simple_p = noce_operand_ok (SET_DEST (first_set));
*cost = insn_rtx_cost (first_set, speed_p);
*cost += insn_rtx_cost (first_set, speed_p);
return *simple_p;
}
......@@ -3114,7 +3106,7 @@ bb_valid_for_noce_process_p (basic_block test_bb, rtx cond,
goto free_bitmap_and_fail;
BITMAP_FREE (test_bb_temps);
*cost = potential_cost;
*cost += potential_cost;
*simple_p = false;
return true;
......@@ -3290,9 +3282,15 @@ noce_convert_multiple_sets (struct noce_if_info *if_info)
for (int i = 0; i < count; i++)
noce_emit_move_insn (targets[i], temporaries[i]);
/* Actually emit the sequence. */
/* Actually emit the sequence if it isn't too expensive. */
rtx_insn *seq = get_insns ();
if (!noce_conversion_profitable_p (seq, if_info))
{
end_sequence ();
return FALSE;
}
for (insn = seq; insn; insn = NEXT_INSN (insn))
set_used_flags (insn);
......@@ -3342,22 +3340,16 @@ noce_convert_multiple_sets (struct noce_if_info *if_info)
/* Return true iff basic block TEST_BB is comprised of only
(SET (REG) (REG)) insns suitable for conversion to a series
of conditional moves. FORNOW: Use II to find the expected cost of
the branch into/over TEST_BB.
TODO: This creates an implicit "magic number" for branch_cost.
II->branch_cost now guides the maximum number of set instructions in
a basic block which is considered profitable to completely
if-convert. */
of conditional moves. Also check that we have more than one set
(other routines can handle a single set better than we would), and
fewer than PARAM_MAX_RTL_IF_CONVERSION_INSNS sets. */
static bool
bb_ok_for_noce_convert_multiple_sets (basic_block test_bb,
struct noce_if_info *ii)
bb_ok_for_noce_convert_multiple_sets (basic_block test_bb)
{
rtx_insn *insn;
unsigned count = 0;
unsigned param = PARAM_VALUE (PARAM_MAX_RTL_IF_CONVERSION_INSNS);
unsigned limit = MIN (ii->branch_cost, param);
FOR_BB_INSNS (test_bb, insn)
{
......@@ -3393,14 +3385,15 @@ bb_ok_for_noce_convert_multiple_sets (basic_block test_bb,
if (!can_conditionally_move_p (GET_MODE (dest)))
return false;
/* FORNOW: Our cost model is a count of the number of instructions we
would if-convert. This is suboptimal, and should be improved as part
of a wider rework of branch_cost. */
if (++count > limit)
return false;
count++;
}
return count > 1;
/* If we would only put out one conditional move, the other strategies
this pass tries are better optimized and will be more appropriate.
Some targets want to strictly limit the number of conditional moves
that are emitted, they set this through PARAM, we need to respect
that. */
return count > 1 && count <= param;
}
/* Given a simple IF-THEN-JOIN or IF-THEN-ELSE-JOIN block, attempt to convert
......@@ -3436,7 +3429,7 @@ noce_process_if_block (struct noce_if_info *if_info)
if (!else_bb
&& HAVE_conditional_move
&& !HAVE_cc0
&& bb_ok_for_noce_convert_multiple_sets (then_bb, if_info))
&& bb_ok_for_noce_convert_multiple_sets (then_bb))
{
if (noce_convert_multiple_sets (if_info))
{
......@@ -3447,12 +3440,12 @@ noce_process_if_block (struct noce_if_info *if_info)
}
}
if (! bb_valid_for_noce_process_p (then_bb, cond, &if_info->then_cost,
if (! bb_valid_for_noce_process_p (then_bb, cond, &if_info->original_cost,
&if_info->then_simple))
return false;
if (else_bb
&& ! bb_valid_for_noce_process_p (else_bb, cond, &if_info->else_cost,
&& ! bb_valid_for_noce_process_p (else_bb, cond, &if_info->original_cost,
&if_info->else_simple))
return false;
......@@ -3983,6 +3976,7 @@ noce_find_if_block (basic_block test_bb, edge then_edge, edge else_edge,
rtx cond;
rtx_insn *cond_earliest;
struct noce_if_info if_info;
bool speed_p = optimize_bb_for_speed_p (test_bb);
/* We only ever should get here before reload. */
gcc_assert (!reload_completed);
......@@ -4074,8 +4068,16 @@ noce_find_if_block (basic_block test_bb, edge then_edge, edge else_edge,
if_info.cond_earliest = cond_earliest;
if_info.jump = jump;
if_info.then_else_reversed = then_else_reversed;
if_info.branch_cost = BRANCH_COST (optimize_bb_for_speed_p (test_bb),
predictable_edge_p (then_edge));
if_info.speed_p = speed_p;
if_info.max_seq_cost
= targetm.max_noce_ifcvt_seq_cost (then_edge);
/* We'll add in the cost of THEN_BB and ELSE_BB later, when we check
that they are valid to transform. We can't easily get back to the insn
for COND (and it may not exist if we had to canonicalize to get COND),
and jump_insns are always given a cost of 1 by seq_cost, so treat
both instructions as having cost COSTS_N_INSNS (1). */
if_info.original_cost = COSTS_N_INSNS (2);
/* Do the real work. */
......
2016-07-21 James Greenhalgh <james.greenhalgh@arm.com>
* gcc.dg/ifcvt-2.c: Use parameter to guide if-conversion heuristics.
* gcc.dg/ifcvt-3.c: Use parameter to guide if-conversion heuristics.
* gcc.dg/pr68435.c: Use parameter to guide if-conversion heuristics.
* gcc.dg/ifcvt-4.c: Use parameter to guide if-conversion heuristics.
* gcc.dg/ifcvt-5.c: Use parameter to guide if-conversion heuristics.
2016-07-21 Richard Biener <rguenther@suse.de>
PR tree-optimization/71947
......
/* { dg-do compile { target aarch64*-*-* x86_64-*-* } } */
/* { dg-options "-fdump-rtl-ce1 -O2" } */
/* { dg-options "-fdump-rtl-ce1 -O2 --param max-rtl-if-conversion-unpredictable-cost=100" } */
typedef unsigned char uint8_t;
......
/* { dg-do compile { target { { aarch64*-*-* i?86-*-* x86_64-*-* } && lp64 } } } */
/* { dg-options "-fdump-rtl-ce1 -O2" } */
/* { dg-options "-fdump-rtl-ce1 -O2 --param max-rtl-if-conversion-unpredictable-cost=100" } */
typedef long long s64;
......
/* { dg-options "-fdump-rtl-ce1 -O2 --param max-rtl-if-conversion-insns=3" } */
/* { dg-options "-fdump-rtl-ce1 -O2 --param max-rtl-if-conversion-insns=3 --param max-rtl-if-conversion-unpredictable-cost=100" } */
/* { dg-additional-options "-misel" { target { powerpc*-*-* } } } */
/* { dg-skip-if "Multiple set if-conversion not guaranteed on all subtargets" { "arm*-*-* hppa*64*-*-* visium-*-*" } } */
......
/* Check that multi-insn if-conversion is not done if the override
parameter would not allow it. */
parameter would not allow it. Set the cost parameter very high
to ensure that the limiting factor is actually the count parameter. */
/* { dg-options "-fdump-rtl-ce1 -O2 --param max-rtl-if-conversion-insns=1" } */
/* { dg-options "-fdump-rtl-ce1 -O2 --param max-rtl-if-conversion-insns=1 --param max-rtl-if-conversion-unpredictable-cost=200" } */
typedef int word __attribute__((mode(word)));
......
/* { dg-do compile { target aarch64*-*-* x86_64-*-* } } */
/* { dg-options "-fdump-rtl-ce1 -O2 -w" } */
/* { dg-options "-fdump-rtl-ce1 -O2 -w --param max-rtl-if-conversion-unpredictable-cost=100" } */
typedef struct cpp_reader cpp_reader;
enum cpp_ttype
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment